diff --git a/.hgignore b/.hgignore
index 849cb42f..bd95308d 100644
--- a/.hgignore
+++ b/.hgignore
@@ -21,7 +21,6 @@ syntax: regexp
^tests/dstress/
^tests/reference/
^tango/
-^druntime/
^import/
^bin/ldc2?$
^bin/ldc2?\.conf$
diff --git a/druntime/import/core/bitmanip.di b/druntime/import/core/bitmanip.di
new file mode 100644
index 00000000..3cb3ac10
--- /dev/null
+++ b/druntime/import/core/bitmanip.di
@@ -0,0 +1,262 @@
+/**
+ * This module contains a collection of bit-level operations.
+ *
+ * Copyright: Copyright (c) 2005-2008, The D Runtime Project
+ * License: BSD Style, see LICENSE
+ * Authors: Walter Bright, Don Clugston, Sean Kelly
+ */
+module core.bitmanip;
+
+
+version( DDoc )
+{
+ /**
+ * Scans the bits in v starting with bit 0, looking
+ * for the first set bit.
+ * Returns:
+ * The bit number of the first bit set.
+ * The return value is undefined if v is zero.
+ */
+ int bsf( uint v );
+
+
+ /**
+ * Scans the bits in v from the most significant bit
+ * to the least significant bit, looking
+ * for the first set bit.
+ * Returns:
+ * The bit number of the first bit set.
+ * The return value is undefined if v is zero.
+ * Example:
+ * ---
+ * import bitmanip;
+ *
+ * int main()
+ * {
+ * uint v;
+ * int x;
+ *
+ * v = 0x21;
+ * x = bsf(v);
+ * printf("bsf(x%x) = %d\n", v, x);
+ * x = bsr(v);
+ * printf("bsr(x%x) = %d\n", v, x);
+ * return 0;
+ * }
+ * ---
+ * Output:
+ * bsf(x21) = 0
+ * bsr(x21) = 5
+ */
+ int bsr( uint v );
+
+
+ /**
+ * Tests the bit.
+ */
+ int bt( uint* p, uint bitnum );
+
+
+ /**
+ * Tests and complements the bit.
+ */
+ int btc( uint* p, uint bitnum );
+
+
+ /**
+ * Tests and resets (sets to 0) the bit.
+ */
+ int btr( uint* p, uint bitnum );
+
+
+ /**
+ * Tests and sets the bit.
+ * Params:
+ * p = a non-NULL pointer to an array of uints.
+ * index = a bit number, starting with bit 0 of p[0],
+ * and progressing. It addresses bits like the expression:
+ ---
+ p[index / (uint.sizeof*8)] & (1 << (index & ((uint.sizeof*8) - 1)))
+ ---
+ * Returns:
+ * A non-zero value if the bit was set, and a zero
+ * if it was clear.
+ *
+ * Example:
+ * ---
+ import bitmanip;
+
+ int main()
+ {
+ uint array[2];
+
+ array[0] = 2;
+ array[1] = 0x100;
+
+ printf("btc(array, 35) = %d\n", btc(array, 35));
+ printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+ printf("btc(array, 35) = %d\n", btc(array, 35));
+ printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+ printf("bts(array, 35) = %d\n", bts(array, 35));
+ printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+ printf("btr(array, 35) = %d\n", btr(array, 35));
+ printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+ printf("bt(array, 1) = %d\n", bt(array, 1));
+ printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]);
+
+ return 0;
+ }
+ * ---
+ * Output:
+
+ btc(array, 35) = 0 + array = [0]:x2, [1]:x108 + btc(array, 35) = -1 + array = [0]:x2, [1]:x100 + bts(array, 35) = 0 + array = [0]:x2, [1]:x108 + btr(array, 35) = -1 + array = [0]:x2, [1]:x100 + bt(array, 1) = -1 + array = [0]:x2, [1]:x100 ++ */ + int bts( uint* p, uint bitnum ); + + + /** + * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes + * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3 + * becomes byte 0. + */ + uint bswap( uint v ); + + + /** + * Reads I/O port at port_address. + */ + ubyte inp( uint port_address ); + + + /** + * ditto + */ + ushort inpw( uint port_address ); + + + /** + * ditto + */ + uint inpl( uint port_address ); + + + /** + * Writes and returns value to I/O port at port_address. + */ + ubyte outp( uint port_address, ubyte value ); + + + /** + * ditto + */ + ushort outpw( uint port_address, ushort value ); + + + /** + * ditto + */ + uint outpl( uint port_address, uint value ); +} +else +{ + public import std.intrinsic; +} + + +/** + * Calculates the number of set bits in a 32-bit integer. + */ +int popcnt( uint x ) +{ + // Avoid branches, and the potential for cache misses which + // could be incurred with a table lookup. + + // We need to mask alternate bits to prevent the + // sum from overflowing. + // add neighbouring bits. Each bit is 0 or 1. + x = x - ((x>>1) & 0x5555_5555); + // now each two bits of x is a number 00,01 or 10. + // now add neighbouring pairs + x = ((x&0xCCCC_CCCC)>>2) + (x&0x3333_3333); + // now each nibble holds 0000-0100. Adding them won't + // overflow any more, so we don't need to mask any more + + // Now add the nibbles, then the bytes, then the words + // We still need to mask to prevent double-counting. + // Note that if we used a rotate instead of a shift, we + // wouldn't need the masks, and could just divide the sum + // by 8 to account for the double-counting. + // On some CPUs, it may be faster to perform a multiply. + + x += (x>>4); + x &= 0x0F0F_0F0F; + x += (x>>8); + x &= 0x00FF_00FF; + x += (x>>16); + x &= 0xFFFF; + return x; +} + + +/** + * Reverses the order of bits in a 32-bit integer. + */ +uint bitswap( uint x ) +{ + + version( D_InlineAsm_X86 ) + { + asm + { + // Author: Tiago Gasiba. + mov EDX, EAX; + shr EAX, 1; + and EDX, 0x5555_5555; + and EAX, 0x5555_5555; + shl EDX, 1; + or EAX, EDX; + mov EDX, EAX; + shr EAX, 2; + and EDX, 0x3333_3333; + and EAX, 0x3333_3333; + shl EDX, 2; + or EAX, EDX; + mov EDX, EAX; + shr EAX, 4; + and EDX, 0x0f0f_0f0f; + and EAX, 0x0f0f_0f0f; + shl EDX, 4; + or EAX, EDX; + bswap EAX; + } + } + else + { + // swap odd and even bits + x = ((x >> 1) & 0x5555_5555) | ((x & 0x5555_5555) << 1); + // swap consecutive pairs + x = ((x >> 2) & 0x3333_3333) | ((x & 0x3333_3333) << 2); + // swap nibbles + x = ((x >> 4) & 0x0F0F_0F0F) | ((x & 0x0F0F_0F0F) << 4); + // swap bytes + x = ((x >> 8) & 0x00FF_00FF) | ((x & 0x00FF_00FF) << 8); + // swap 2-byte long pairs + x = ( x >> 16 ) | ( x << 16); + return x; + + } +} diff --git a/druntime/import/core/stdc/complex.d b/druntime/import/core/stdc/complex.d new file mode 100644 index 00000000..26c16188 --- /dev/null +++ b/druntime/import/core/stdc/complex.d @@ -0,0 +1,107 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.config; + +extern (C): + +version( Windows ) +{ + alias int c_long; + alias uint c_ulong; +} +else +{ + static if( (void*).sizeof > int.sizeof ) + { + alias long c_long; + alias ulong c_ulong; + } + else + { + alias int c_long; + alias uint c_ulong; + } +} diff --git a/druntime/import/core/stdc/ctype.d b/druntime/import/core/stdc/ctype.d new file mode 100644 index 00000000..51b00eb0 --- /dev/null +++ b/druntime/import/core/stdc/ctype.d @@ -0,0 +1,31 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.errno; + +extern (C) int getErrno(); // for internal use +extern (C) int setErrno(int); // for internal use + +alias getErrno errno; +alias setErrno errno; + +extern (C): + +version( Windows ) +{ + enum EPERM = 1; // Operation not permitted + enum ENOENT = 2; // No such file or directory + enum ESRCH = 3; // No such process + enum EINTR = 4; // Interrupted system call + enum EIO = 5; // I/O error + enum ENXIO = 6; // No such device or address + enum E2BIG = 7; // Argument list too long + enum ENOEXEC = 8; // Exec format error + enum EBADF = 9; // Bad file number + enum ECHILD = 10; // No child processes + enum EAGAIN = 11; // Try again + enum ENOMEM = 12; // Out of memory + enum EACCES = 13; // Permission denied + enum EFAULT = 14; // Bad address + enum EBUSY = 16; // Device or resource busy + enum EEXIST = 17; // File exists + enum EXDEV = 18; // Cross-device link + enum ENODEV = 19; // No such device + enum ENOTDIR = 20; // Not a directory + enum EISDIR = 21; // Is a directory + enum EINVAL = 22; // Invalid argument + enum ENFILE = 23; // File table overflow + enum EMFILE = 24; // Too many open files + enum ENOTTY = 25; // Not a typewriter + enum EFBIG = 27; // File too large + enum ENOSPC = 28; // No space left on device + enum ESPIPE = 29; // Illegal seek + enum EROFS = 30; // Read-only file system + enum EMLINK = 31; // Too many links + enum EPIPE = 32; // Broken pipe + enum EDOM = 33; // Math argument out of domain of func + enum ERANGE = 34; // Math result not representable + enum EDEADLK = 36; // Resource deadlock would occur + enum ENAMETOOLONG = 38; // File name too long + enum ENOLCK = 39; // No record locks available + enum ENOSYS = 40; // Function not implemented + enum ENOTEMPTY = 41; // Directory not empty + enum EILSEQ = 42; // Illegal byte sequence + enum EDEADLOCK = EDEADLK; +} +else version( linux ) +{ + enum EPERM = 1; // Operation not permitted + enum ENOENT = 2; // No such file or directory + enum ESRCH = 3; // No such process + enum EINTR = 4; // Interrupted system call + enum EIO = 5; // I/O error + enum ENXIO = 6; // No such device or address + enum E2BIG = 7; // Argument list too long + enum ENOEXEC = 8; // Exec format error + enum EBADF = 9; // Bad file number + enum ECHILD = 10; // No child processes + enum EAGAIN = 11; // Try again + enum ENOMEM = 12; // Out of memory + enum EACCES = 13; // Permission denied + enum EFAULT = 14; // Bad address + enum ENOTBLK = 15; // Block device required + enum EBUSY = 16; // Device or resource busy + enum EEXIST = 17; // File exists + enum EXDEV = 18; // Cross-device link + enum ENODEV = 19; // No such device + enum ENOTDIR = 20; // Not a directory + enum EISDIR = 21; // Is a directory + enum EINVAL = 22; // Invalid argument + enum ENFILE = 23; // File table overflow + enum EMFILE = 24; // Too many open files + enum ENOTTY = 25; // Not a typewriter + enum ETXTBSY = 26; // Text file busy + enum EFBIG = 27; // File too large + enum ENOSPC = 28; // No space left on device + enum ESPIPE = 29; // Illegal seek + enum EROFS = 30; // Read-only file system + enum EMLINK = 31; // Too many links + enum EPIPE = 32; // Broken pipe + enum EDOM = 33; // Math argument out of domain of func + enum ERANGE = 34; // Math result not representable + enum EDEADLK = 35; // Resource deadlock would occur + enum ENAMETOOLONG = 36; // File name too long + enum ENOLCK = 37; // No record locks available + enum ENOSYS = 38; // Function not implemented + enum ENOTEMPTY = 39; // Directory not empty + enum ELOOP = 40; // Too many symbolic links encountered + enum EWOULDBLOCK = EAGAIN; // Operation would block + enum ENOMSG = 42; // No message of desired type + enum EIDRM = 43; // Identifier removed + enum ECHRNG = 44; // Channel number out of range + enum EL2NSYNC = 45; // Level 2 not synchronized + enum EL3HLT = 46; // Level 3 halted + enum EL3RST = 47; // Level 3 reset + enum ELNRNG = 48; // Link number out of range + enum EUNATCH = 49; // Protocol driver not attached + enum ENOCSI = 50; // No CSI structure available + enum EL2HLT = 51; // Level 2 halted + enum EBADE = 52; // Invalid exchange + enum EBADR = 53; // Invalid request descriptor + enum EXFULL = 54; // Exchange full + enum ENOANO = 55; // No anode + enum EBADRQC = 56; // Invalid request code + enum EBADSLT = 57; // Invalid slot + enum EDEADLOCK = EDEADLK; + enum EBFONT = 59; // Bad font file format + enum ENOSTR = 60; // Device not a stream + enum ENODATA = 61; // No data available + enum ETIME = 62; // Timer expired + enum ENOSR = 63; // Out of streams resources + enum ENONET = 64; // Machine is not on the network + enum ENOPKG = 65; // Package not installed + enum EREMOTE = 66; // Object is remote + enum ENOLINK = 67; // Link has been severed + enum EADV = 68; // Advertise error + enum ESRMNT = 69; // Srmount error + enum ECOMM = 70; // Communication error on send + enum EPROTO = 71; // Protocol error + enum EMULTIHOP = 72; // Multihop attempted + enum EDOTDOT = 73; // RFS specific error + enum EBADMSG = 74; // Not a data message + enum EOVERFLOW = 75; // Value too large for defined data type + enum ENOTUNIQ = 76; // Name not unique on network + enum EBADFD = 77; // File descriptor in bad state + enum EREMCHG = 78; // Remote address changed + enum ELIBACC = 79; // Can not access a needed shared library + enum ELIBBAD = 80; // Accessing a corrupted shared library + enum ELIBSCN = 81; // .lib section in a.out corrupted + enum ELIBMAX = 82; // Attempting to link in too many shared libraries + enum ELIBEXEC = 83; // Cannot exec a shared library directly + enum EILSEQ = 84; // Illegal byte sequence + enum ERESTART = 85; // Interrupted system call should be restarted + enum ESTRPIPE = 86; // Streams pipe error + enum EUSERS = 87; // Too many users + enum ENOTSOCK = 88; // Socket operation on non-socket + enum EDESTADDRREQ = 89; // Destination address required + enum EMSGSIZE = 90; // Message too long + enum EPROTOTYPE = 91; // Protocol wrong type for socket + enum ENOPROTOOPT = 92; // Protocol not available + enum EPROTONOSUPPORT = 93; // Protocol not supported + enum ESOCKTNOSUPPORT = 94; // Socket type not supported + enum EOPNOTSUPP = 95; // Operation not supported on transport endpoint + enum EPFNOSUPPORT = 96; // Protocol family not supported + enum EAFNOSUPPORT = 97; // Address family not supported by protocol + enum EADDRINUSE = 98; // Address already in use + enum EADDRNOTAVAIL = 99; // Cannot assign requested address + enum ENETDOWN = 100; // Network is down + enum ENETUNREACH = 101; // Network is unreachable + enum ENETRESET = 102; // Network dropped connection because of reset + enum ECONNABORTED = 103; // Software caused connection abort + enum ECONNRESET = 104; // Connection reset by peer + enum ENOBUFS = 105; // No buffer space available + enum EISCONN = 106; // Transport endpoint is already connected + enum ENOTCONN = 107; // Transport endpoint is not connected + enum ESHUTDOWN = 108; // Cannot send after transport endpoint shutdown + enum ETOOMANYREFS = 109; // Too many references: cannot splice + enum ETIMEDOUT = 110; // Connection timed out + enum ECONNREFUSED = 111; // Connection refused + enum EHOSTDOWN = 112; // Host is down + enum EHOSTUNREACH = 113; // No route to host + enum EALREADY = 114; // Operation already in progress + enum EINPROGRESS = 115; // Operation now in progress + enum ESTALE = 116; // Stale NFS file handle + enum EUCLEAN = 117; // Structure needs cleaning + enum ENOTNAM = 118; // Not a XENIX named type file + enum ENAVAIL = 119; // No XENIX semaphores available + enum EISNAM = 120; // Is a named type file + enum EREMOTEIO = 121; // Remote I/O error + enum EDQUOT = 122; // Quota exceeded + enum ENOMEDIUM = 123; // No medium found + enum EMEDIUMTYPE = 124; // Wrong medium type + enum ECANCELED = 125; // Operation Canceled + enum ENOKEY = 126; // Required key not available + enum EKEYEXPIRED = 127; // Key has expired + enum EKEYREVOKED = 128; // Key has been revoked + enum EKEYREJECTED = 129; // Key was rejected by service + enum EOWNERDEAD = 130; // Owner died + enum ENOTRECOVERABLE = 131; // State not recoverable +} +else version( OSX ) +{ + enum EPERM = 1; // Operation not permitted + enum ENOENT = 2; // No such file or directory + enum ESRCH = 3; // No such process + enum EINTR = 4; // Interrupted system call + enum EIO = 5; // Input/output error + enum ENXIO = 6; // Device not configured + enum E2BIG = 7; // Argument list too long + enum ENOEXEC = 8; // Exec format error + enum EBADF = 9; // Bad file descriptor + enum ECHILD = 10; // No child processes + enum EDEADLK = 11; // Resource deadlock avoided + enum ENOMEM = 12; // Cannot allocate memory + enum EACCES = 13; // Permission denied + enum EFAULT = 14; // Bad address + enum EBUSY = 16; // Device busy + enum EEXIST = 17; // File exists + enum EXDEV = 18; // Cross-device link + enum ENODEV = 19; // Operation not supported by device + enum ENOTDIR = 20; // Not a directory + enum EISDIR = 21; // Is a directory + enum EINVAL = 22; // Invalid argument + enum ENFILE = 23; // Too many open files in system + enum EMFILE = 24; // Too many open files + enum ENOTTY = 25; // Inappropriate ioctl for device + enum ETXTBSY = 26; // Text file busy + enum EFBIG = 27; // File too large + enum ENOSPC = 28; // No space left on device + enum ESPIPE = 29; // Illegal seek + enum EROFS = 30; // Read-only file system + enum EMLINK = 31; // Too many links + enum EPIPE = 32; // Broken pipe + enum EDOM = 33; // Numerical argument out of domain + enum ERANGE = 34; // Result too large + enum EAGAIN = 35; // Resource temporarily unavailable + enum EWOULDBLOCK = EAGAIN; // Operation would block + enum EINPROGRESS = 36; // Operation now in progress + enum EALREADY = 37; // Operation already in progress + enum ENOTSOCK = 38; // Socket operation on non-socket + enum EDESTADDRREQ = 39; // Destination address required + enum EMSGSIZE = 40; // Message too long + enum EPROTOTYPE = 41; // Protocol wrong type for socket + enum ENOPROTOOPT = 42; // Protocol not available + enum EPROTONOSUPPORT = 43; // Protocol not supported + enum ENOTSUP = 45; // Operation not supported + enum EOPNOTSUPP = ENOTSUP; // Operation not supported on socket + enum EAFNOSUPPORT = 47; // Address family not supported by protocol family + enum EADDRINUSE = 48; // Address already in use + enum EADDRNOTAVAIL = 49; // Can't assign requested address + enum ENETDOWN = 50; // Network is down + enum ENETUNREACH = 51; // Network is unreachable + enum ENETRESET = 52; // Network dropped connection on reset + enum ECONNABORTED = 53; // Software caused connection abort + enum ECONNRESET = 54; // Connection reset by peer + enum ENOBUFS = 55; // No buffer space available + enum EISCONN = 56; // Socket is already connected + enum ENOTCONN = 57; // Socket is not connected + enum ETIMEDOUT = 60; // Operation timed out + enum ECONNREFUSED = 61; // Connection refused + enum ELOOP = 62; // Too many levels of symbolic links + enum ENAMETOOLONG = 63; // File name too long + enum EHOSTUNREACH = 65; // No route to host + enum ENOTEMPTY = 66; // Directory not empty + enum EDQUOT = 69; // Disc quota exceeded + enum ESTALE = 70; // Stale NFS file handle + enum ENOLCK = 77; // No locks available + enum ENOSYS = 78; // Function not implemented + enum EOVERFLOW = 84; // Value too large to be stored in data type + enum ECANCELED = 89; // Operation canceled + enum EIDRM = 90; // Identifier removed + enum ENOMSG = 91; // No message of desired type + enum EILSEQ = 92; // Illegal byte sequence + enum EBADMSG = 94; // Bad message + enum EMULTIHOP = 95; // Reserved + enum ENODATA = 96; // No message available on STREAM + enum ENOLINK = 97; // Reserved + enum ENOSR = 98; // No STREAM resources + enum ENOSTR = 99; // Not a STREAM + enum EPROTO = 100; // Protocol error + enum ETIME = 101; // STREAM ioctl timeout + enum ELAST = 101; // Must be equal largest errno +} +else version( freebsd ) +{ + enum EPERM = 1; // Operation not permitted + enum ENOENT = 2; // No such file or directory + enum ESRCH = 3; // No such process + enum EINTR = 4; // Interrupted system call + enum EIO = 5; // Input/output error + enum ENXIO = 6; // Device not configured + enum E2BIG = 7; // Argument list too long + enum ENOEXEC = 8; // Exec format error + enum EBADF = 9; // Bad file descriptor + enum ECHILD = 10; // No child processes + enum EDEADLK = 11; // Resource deadlock avoided + enum ENOMEM = 12; // Cannot allocate memory + enum EACCES = 13; // Permission denied + enum EFAULT = 14; // Bad address + enum ENOTBLK = 15; // Block device required + enum EBUSY = 16; // Device busy + enum EEXIST = 17; // File exists + enum EXDEV = 18; // Cross-device link + enum ENODEV = 19; // Operation not supported by device + enum ENOTDIR = 20; // Not a directory + enum EISDIR = 21; // Is a directory + enum EINVAL = 22; // Invalid argument + enum ENFILE = 23; // Too many open files in system + enum EMFILE = 24; // Too many open files + enum ENOTTY = 25; // Inappropriate ioctl for device + enum ETXTBSY = 26; // Text file busy + enum EFBIG = 27; // File too large + enum ENOSPC = 28; // No space left on device + enum ESPIPE = 29; // Illegal seek + enum EROFS = 30; // Read-only file system + enum EMLINK = 31; // Too many links + enum EPIPE = 32; // Broken pipe + enum EDOM = 33; // Numerical argument out of domain + enum ERANGE = 34; // Result too large + enum EAGAIN = 35; // Resource temporarily unavailable + enum EWOULDBLOCK = EAGAIN; // Operation would block + enum EINPROGRESS = 36; // Operation now in progress + enum EALREADY = 37; // Operation already in progress + enum ENOTSOCK = 38; // Socket operation on non-socket + enum EDESTADDRREQ = 39; // Destination address required + enum EMSGSIZE = 40; // Message too long + enum EPROTOTYPE = 41; // Protocol wrong type for socket + enum ENOPROTOOPT = 42; // Protocol not available + enum EPROTONOSUPPORT = 43; // Protocol not supported + enum ENOTSUP = 45; // Operation not supported + enum EOPNOTSUPP = ENOTSUP; // Operation not supported on socket + enum EAFNOSUPPORT = 47; // Address family not supported by protocol family + enum EADDRINUSE = 48; // Address already in use + enum EADDRNOTAVAIL = 49; // Can't assign requested address + enum ENETDOWN = 50; // Network is down + enum ENETUNREACH = 51; // Network is unreachable + enum ENETRESET = 52; // Network dropped connection on reset + enum ECONNABORTED = 53; // Software caused connection abort + enum ECONNRESET = 54; // Connection reset by peer + enum ENOBUFS = 55; // No buffer space available + enum EISCONN = 56; // Socket is already connected + enum ENOTCONN = 57; // Socket is not connected + enum ESHUTDOWN = 58; // Can't send after socket shutdown + enum ETOOMANYREFS = 59; // Too many refrences; can't splice + enum ETIMEDOUT = 60; // Operation timed out + enum ECONNREFUSED = 61; // Connection refused + enum ELOOP = 62; // Too many levels of symbolic links + enum ENAMETOOLONG = 63; // File name too long + enum EHOSTUNREACH = 65; // No route to host + enum ENOTEMPTY = 66; // Directory not empty + enum EPROCLIM = 67; // Too many processes + enum EUSERS = 68; // Too many users + enum EDQUOT = 69; // Disc quota exceeded + enum ESTALE = 70; // Stale NFS file handle + enum EREMOTE = 71; // Too many levels of remote in path + enum EBADRPC = 72; // RPC struct is bad + enum ERPCMISMATCH = 73; // RPC version wrong + enum EPROGUNAVAIL = 74; // RPC prog. not avail + enum EPROGMISMATCH = 75; // Program version wrong + enum EPROCUNAVAIL = 76; // Bad procedure for program + enum ENOLCK = 77; // No locks available + enum ENOSYS = 78; // Function not implemented + enum EFTYPE = 79; // Inappropriate file type or format + enum EAUTH = 80; // Authentication error + enum ENEEDAUTH = 81; // Need authenticator + enum EIDRM = 82; // Itendifier removed + enum ENOMSG = 83; // No message of desired type + enum EOVERFLOW = 84; // Value too large to be stored in data type + enum ECANCELED = 85; // Operation canceled + enum EILSEQ = 86; // Illegal byte sequence + enum ENOATTR = 87; // Attribute not found + enum EDOOFUS = 88; // Programming error + enum EBADMSG = 89; // Bad message + enum EMULTIHOP = 90; // Multihop attempted + enum ENOLINK = 91; // Link has been severed + enum EPROTO = 92; // Protocol error + enum ELAST = 92; // Must be equal largest errno +} diff --git a/druntime/import/core/stdc/fenv.d b/druntime/import/core/stdc/fenv.d new file mode 100644 index 00000000..4c001c01 --- /dev/null +++ b/druntime/import/core/stdc/fenv.d @@ -0,0 +1,142 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.float_; + +extern (C): + +enum FLT_ROUNDS = 1; +enum FLT_EVAL_METHOD = 2; +enum FLT_RADIX = 2; + +enum DECIMAL_DIG = real.dig; +enum FLT_DIG = float.dig; +enum DBL_DIG = double.dig; +enum LDBL_DIG = real.dig; + +enum FLT_MANT_DIG = float.mant_dig; +enum DBL_MANT_DIG = double.mant_dig; +enum LDBL_MANT_DIG = real.mant_dig; + +enum FLT_MIN = float.min; +enum DBL_MIN = double.min; +enum LDBL_MIN = real.min; + +enum FLT_MAX = float.max; +enum DBL_MAX = double.max; +enum LDBL_MAX = real.max; + +enum FLT_EPSILON = float.epsilon; +enum DBL_EPSILON = double.epsilon; +enum LDBL_EPSILON = real.epsilon; + +enum FLT_MIN_EXP = float.min_exp; +enum DBL_MIN_EXP = double.min_exp; +enum LDBL_MIN_EXP = real.min_exp; + +enum FLT_MAX_EXP = float.max_exp; +enum DBL_MAX_EXP = double.max_exp; +enum LDBL_MAX_EXP = real.max_exp; + +enum FLT_MIN_10_EXP = float.min_10_exp; +enum DBL_MIN_10_EXP = double.min_10_exp; +enum LDBL_MIN_10_EXP = real.min_10_exp; + +enum FLT_MAX_10_EXP = float.max_10_exp; +enum DBL_MAX_10_EXP = double.max_10_exp; +enum LDBL_MAX_10_EXP = real.max_10_exp; diff --git a/druntime/import/core/stdc/inttypes.d b/druntime/import/core/stdc/inttypes.d new file mode 100644 index 00000000..290e169c --- /dev/null +++ b/druntime/import/core/stdc/inttypes.d @@ -0,0 +1,256 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.locale; + +extern (C): + +struct lconv +{ + char* decimal_point; + char* thousands_sep; + char* grouping; + char* int_curr_symbol; + char* currency_symbol; + char* mon_decimal_point; + char* mon_thousands_sep; + char* mon_grouping; + char* positive_sign; + char* negative_sign; + byte int_frac_digits; + byte frac_digits; + byte p_cs_precedes; + byte p_sep_by_space; + byte n_cs_precedes; + byte n_sep_by_space; + byte p_sign_posn; + byte n_sign_posn; + byte int_p_cs_precedes; + byte int_p_sep_by_space; + byte int_n_cs_precedes; + byte int_n_sep_by_space; + byte int_p_sign_posn; + byte int_n_sign_posn; +} + +enum LC_CTYPE = 0; +enum LC_NUMERIC = 1; +enum LC_TIME = 2; +enum LC_COLLATE = 3; +enum LC_MONETARY = 4; +enum LC_ALL = 6; +enum LC_PAPER = 7; // non-standard +enum LC_NAME = 8; // non-standard +enum LC_ADDRESS = 9; // non-standard +enum LC_TELEPHONE = 10; // non-standard +enum LC_MEASUREMENT = 11; // non-standard +enum LC_IDENTIFICATION = 12; // non-standard + +char* setlocale(int category, in char* locale); +lconv* localeconv(); diff --git a/druntime/import/core/stdc/math.d b/druntime/import/core/stdc/math.d new file mode 100644 index 00000000..14474b6b --- /dev/null +++ b/druntime/import/core/stdc/math.d @@ -0,0 +1,933 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.signal; + +extern (C): + +// this should be volatile +alias int sig_atomic_t; + +private alias void function(int) sigfn_t; + +version( Posix ) +{ + enum SIG_ERR = cast(sigfn_t) -1; + enum SIG_DFL = cast(sigfn_t) 0; + enum SIG_IGN = cast(sigfn_t) 1; + + // standard C signals + enum SIGABRT = 6; // Abnormal termination + enum SIGFPE = 8; // Floating-point error + enum SIGILL = 4; // Illegal hardware instruction + enum SIGINT = 2; // Terminal interrupt character + enum SIGSEGV = 11; // Invalid memory reference + enum SIGTERM = 15; // Termination +} +else +{ + enum SIG_ERR = cast(sigfn_t) -1; + enum SIG_DFL = cast(sigfn_t) 0; + enum SIG_IGN = cast(sigfn_t) 1; + + // standard C signals + enum SIGABRT = 22; // Abnormal termination + enum SIGFPE = 8; // Floating-point error + enum SIGILL = 4; // Illegal hardware instruction + enum SIGINT = 2; // Terminal interrupt character + enum SIGSEGV = 11; // Invalid memory reference + enum SIGTERM = 15; // Termination +} + +sigfn_t signal(int sig, sigfn_t func); +int raise(int sig); diff --git a/druntime/import/core/stdc/stdarg.d b/druntime/import/core/stdc/stdarg.d new file mode 100644 index 00000000..46a827cc --- /dev/null +++ b/druntime/import/core/stdc/stdarg.d @@ -0,0 +1,45 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.stddef; + +extern (C): + +//alias typeof(int.sizeof) size_t; +//alias typeof(cast(void*)0 - cast(void*)0) ptrdiff_t; + +version( Windows ) +{ + alias wchar wchar_t; +} +else +{ + alias dchar wchar_t; +} diff --git a/druntime/import/core/stdc/stdint.d b/druntime/import/core/stdc/stdint.d new file mode 100644 index 00000000..478f1963 --- /dev/null +++ b/druntime/import/core/stdc/stdint.d @@ -0,0 +1,154 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.stdio; + +private +{ + import core.stdc.config; + import core.stdc.stddef; // for size_t + import core.stdc.stdarg; // for va_list +} + +extern (C): + +version( Windows ) +{ + enum + { + BUFSIZ = 0x4000, + EOF = -1, + FOPEN_MAX = 20, + FILENAME_MAX = 256, // 255 plus NULL + TMP_MAX = 32767, + SYS_OPEN = 20, // non-standard + } + + enum int _NFILE = 60; // non-standard + enum string _P_tmpdir = "\\"; // non-standard + enum wstring _wP_tmpdir = "\\"; // non-standard + enum int L_tmpnam = _P_tmpdir.length + 12; +} +else version( linux ) +{ + enum + { + BUFSIZ = 8192, + EOF = -1, + FOPEN_MAX = 16, + FILENAME_MAX = 4095, + TMP_MAX = 238328, + L_tmpnam = 20 + } +} +else version( OSX ) +{ + enum + { + BUFSIZ = 1024, + EOF = -1, + FOPEN_MAX = 20, + FILENAME_MAX = 1024, + TMP_MAX = 308915776, + L_tmpnam = 1024, + } + + private + { + struct __sbuf + { + ubyte* _base; + int _size; + } + + struct __sFILEX + { + + } + } +} +else version ( FreeBSD ) +{ + enum + { + EOF = -1, + FOPEN_MAX = 20, + FILENAME_MAX = 1024, + TMP_MAX = 308915776, + L_tmpnam = 1024 + } + + private + { + struct __sbuf + { + ubyte *_base; + int _size; + } + struct __sFILEX + { + + } + } +} +else +{ + static assert( false ); +} + +enum +{ + SEEK_SET, + SEEK_CUR, + SEEK_END +} + +struct _iobuf +{ + align (1): + version( Windows ) + { + char* _ptr; + int _cnt; + char* _base; + int _flag; + int _file; + int _charbuf; + int _bufsiz; + int __tmpnum; + } + else version( linux ) + { + char* _read_ptr; + char* _read_end; + char* _read_base; + char* _write_base; + char* _write_ptr; + char* _write_end; + char* _buf_base; + char* _buf_end; + char* _save_base; + char* _backup_base; + char* _save_end; + void* _markers; + _iobuf* _chain; + int _fileno; + int _blksize; + int _old_offset; + ushort _cur_column; + byte _vtable_offset; + char[1] _shortbuf; + void* _lock; + } + else version( OSX ) + { + ubyte* _p; + int _r; + int _w; + short _flags; + short _file; + __sbuf _bf; + int _lbfsize; + + int* function(void*) _close; + int* function(void*, char*, int) _read; + fpos_t* function(void*, fpos_t, int) _seek; + int* function(void*, char *, int) _write; + + __sbuf _ub; + __sFILEX* _extra; + int _ur; + + ubyte[3] _ubuf; + ubyte[1] _nbuf; + + __sbuf _lb; + + int _blksize; + fpos_t _offset; + } + else version( FreeBSD ) + { + ubyte* _p; + int _r; + int _w; + short _flags; + short _file; + __sbuf _bf; + int _lbfsize; + + void* function() _cookie; + int* function(void*) _close; + int* function(void*, char*, int) _read; + fpos_t* function(void*, fpos_t, int) _seek; + int* function(void*, char *, int) _write; + + __sbuf _ub; + __sFILEX* _extra; + int _ur; + + ubyte[3] _ubuf; + ubyte[1] _nbuf; + + __sbuf _lb; + + int _blksize; + fpos_t _offset; + } + else + { + static assert( false ); + } +} + +alias shared(_iobuf) FILE; + +enum +{ + _F_RDWR = 0x0003, // non-standard + _F_READ = 0x0001, // non-standard + _F_WRIT = 0x0002, // non-standard + _F_BUF = 0x0004, // non-standard + _F_LBUF = 0x0008, // non-standard + _F_ERR = 0x0010, // non-standard + _F_EOF = 0x0020, // non-standard + _F_BIN = 0x0040, // non-standard + _F_IN = 0x0080, // non-standard + _F_OUT = 0x0100, // non-standard + _F_TERM = 0x0200, // non-standard +} + +version( Windows ) +{ + enum + { + _IOFBF = 0, + _IOLBF = 0x40, + _IONBF = 4, + _IOREAD = 1, // non-standard + _IOWRT = 2, // non-standard + _IOMYBUF = 8, // non-standard + _IOEOF = 0x10, // non-standard + _IOERR = 0x20, // non-standard + _IOSTRG = 0x40, // non-standard + _IORW = 0x80, // non-standard + _IOTRAN = 0x100, // non-standard + _IOAPP = 0x200, // non-standard + } + + extern shared void function() _fcloseallp; + + private extern shared FILE[_NFILE] _iob; + + shared stdin = &_iob[0]; + shared stdout = &_iob[1]; + shared stderr = &_iob[2]; + shared stdaux = &_iob[3]; + shared stdprn = &_iob[4]; +} +else version( linux ) +{ + enum + { + _IOFBF = 0, + _IOLBF = 1, + _IONBF = 2, + } + + extern shared FILE* stdin; + extern shared FILE* stdout; + extern shared FILE* stderr; +} +else version( OSX ) +{ + enum + { + _IOFBF = 0, + _IOLBF = 1, + _IONBF = 2, + } + + private extern shared FILE* __stdinp; + private extern shared FILE* __stdoutp; + private extern shared FILE* __stderrp; + + alias __stdinp stdin; + alias __stdoutp stdout; + alias __stderrp stderr; +} +else version( FreeBSD ) +{ + private extern shared FILE[3] __sF; + + shared stdin = &__sF[0]; + shared stdout = &__sF[1]; + shared stderr = &__sF[2]; +} +else +{ + static assert( false ); +} + +alias int fpos_t; + +int remove(in char* filename); +int rename(in char* from, in char* to); + +FILE* tmpfile(); +char* tmpnam(char* s); + +int fclose(FILE* stream); +int fflush(FILE* stream); +FILE* fopen(in char* filename, in char* mode); +FILE* freopen(in char* filename, in char* mode, FILE* stream); + +void setbuf(FILE* stream, char* buf); +int setvbuf(FILE* stream, char* buf, int mode, size_t size); + +int fprintf(FILE* stream, in char* format, ...); +int fscanf(FILE* stream, in char* format, ...); +int sprintf(char* s, in char* format, ...); +int sscanf(in char* s, in char* format, ...); +int vfprintf(FILE* stream, in char* format, va_list arg); +int vfscanf(FILE* stream, in char* format, va_list arg); +int vsprintf(char* s, in char* format, va_list arg); +int vsscanf(in char* s, in char* format, va_list arg); +int vprintf(in char* format, va_list arg); +int vscanf(in char* format, va_list arg); +int printf(in char* format, ...); +int scanf(in char* format, ...); + +int fgetc(FILE* stream); +int fputc(int c, FILE* stream); + +char* fgets(char* s, int n, FILE* stream); +int fputs(in char* s, FILE* stream); +char* gets(char* s); +int puts(in char* s); + +extern (D) +{ + int getchar() { return getc(stdin); } + int putchar(int c) { return putc(c,stdout); } + int getc(FILE* stream) { return fgetc(stream); } + int putc(int c, FILE* stream) { return fputc(c,stream); } +} + +int ungetc(int c, FILE* stream); + +size_t fread(void* ptr, size_t size, size_t nmemb, FILE* stream); +size_t fwrite(in void* ptr, size_t size, size_t nmemb, FILE* stream); + +int fgetpos(FILE* stream, fpos_t * pos); +int fsetpos(FILE* stream, in fpos_t* pos); + +int fseek(FILE* stream, c_long offset, int whence); +c_long ftell(FILE* stream); + +version( Windows ) +{ + extern (D) + { + void rewind(FILE* stream) { fseek(stream,0L,SEEK_SET); stream._flag&=~_IOERR; } + void clearerr(FILE* stream) { stream._flag &= ~(_IOERR|_IOEOF); } + int feof(FILE* stream) { return stream._flag&_IOEOF; } + int ferror(FILE* stream) { return stream._flag&_IOERR; } + } + int _snprintf(char* s, size_t n, in char* fmt, ...); + alias _snprintf snprintf; + + int _vsnprintf(char* s, size_t n, in char* format, va_list arg); + alias _vsnprintf vsnprintf; +} +else version( linux ) +{ + void rewind(FILE* stream); + void clearerr(FILE* stream); + int feof(FILE* stream); + int ferror(FILE* stream); + int fileno(FILE *); + + int snprintf(char* s, size_t n, in char* format, ...); + int vsnprintf(char* s, size_t n, in char* format, va_list arg); +} +else version( OSX ) +{ + void rewind(FILE*); + void clearerr(FILE*); + int feof(FILE*); + int ferror(FILE*); + int fileno(FILE*); + + int snprintf(char* s, size_t n, in char* format, ...); + int vsnprintf(char* s, size_t n, in char* format, va_list arg); +} +else version( FreeBSD ) +{ + void rewind(FILE*); + void clearerr(FILE*); + int feof(FILE*); + int ferror(FILE*); + int fileno(FILE*); + + int snprintf(char* s, size_t n, in char* format, ...); + int vsnprintf(char* s, size_t n, in char* format, va_list arg); +} +else +{ + static assert( false ); +} + +void perror(in char* s); diff --git a/druntime/import/core/stdc/stdlib.d b/druntime/import/core/stdc/stdlib.d new file mode 100644 index 00000000..5098d41e --- /dev/null +++ b/druntime/import/core/stdc/stdlib.d @@ -0,0 +1,93 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.string; + +private import core.stdc.stddef; // for size_t + +extern (C): + +void* memchr(in void* s, int c, size_t n); +int memcmp(in void* s1, in void* s2, size_t n); +void* memcpy(void* s1, in void* s2, size_t n); +void* memmove(void* s1, in void* s2, size_t n); +void* memset(void* s, int c, size_t n); + +char* strcpy(char* s1, in char* s2); +char* strncpy(char* s1, in char* s2, size_t n); +char* strcat(char* s1, in char* s2); +char* strncat(char* s1, in char* s2, size_t n); +int strcmp(in char* s1, in char* s2); +int strcoll(in char* s1, in char* s2); +int strncmp(in char* s1, in char* s2, size_t n); +size_t strxfrm(char* s1, in char* s2, size_t n); +char* strchr(in char* s, int c); +size_t strcspn(in char* s1, in char* s2); +char* strpbrk(in char* s1, in char* s2); +char* strrchr(in char* s, int c); +size_t strspn(in char* s1, in char* s2); +char* strstr(in char* s1, in char* s2); +char* strtok(char* s1, in char* s2); +char* strerror(int errnum); +size_t strlen(in char* s); diff --git a/druntime/import/core/stdc/tgmath.d b/druntime/import/core/stdc/tgmath.d new file mode 100644 index 00000000..3dfe31a5 --- /dev/null +++ b/druntime/import/core/stdc/tgmath.d @@ -0,0 +1,657 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.time; + +private import core.stdc.config; +private import core.stdc.stddef; // for size_t + +extern (C): + +version( Windows ) +{ + struct tm + { + int tm_sec; // seconds after the minute - [0, 60] + int tm_min; // minutes after the hour - [0, 59] + int tm_hour; // hours since midnight - [0, 23] + int tm_mday; // day of the month - [1, 31] + int tm_mon; // months since January - [0, 11] + int tm_year; // years since 1900 + int tm_wday; // days since Sunday - [0, 6] + int tm_yday; // days since January 1 - [0, 365] + int tm_isdst; // Daylight Saving Time flag + } +} +else +{ + struct tm + { + int tm_sec; // seconds after the minute [0-60] + int tm_min; // minutes after the hour [0-59] + int tm_hour; // hours since midnight [0-23] + int tm_mday; // day of the month [1-31] + int tm_mon; // months since January [0-11] + int tm_year; // years since 1900 + int tm_wday; // days since Sunday [0-6] + int tm_yday; // days since January 1 [0-365] + int tm_isdst; // Daylight Savings Time flag + c_long tm_gmtoff; // offset from CUT in seconds + char* tm_zone; // timezone abbreviation + } +} + +alias c_long time_t; +alias c_long clock_t; + +version( Windows ) +{ + clock_t CLOCKS_PER_SEC = 1000; +} +else version( OSX ) +{ + clock_t CLOCKS_PER_SEC = 100; +} +else version( freebsd ) +{ + clock_t CLOCKS_PER_SEC = 128; +} +else +{ + clock_t CLOCKS_PER_SEC = 1000000; +} + +clock_t clock(); +double difftime(time_t time1, time_t time0); +time_t mktime(tm* timeptr); +time_t time(time_t* timer); +char* asctime(in tm* timeptr); +char* ctime(in time_t* timer); +tm* gmtime(in time_t* timer); +tm* localtime(in time_t* timer); +size_t strftime(char* s, size_t maxsize, in char* format, in tm* timeptr); + +version( Windows ) +{ + void tzset(); // non-standard + void _tzset(); // non-standard + char* _strdate(char* s); // non-standard + char* _strtime(char* s); // non-standard +} +else version( linux ) +{ + void tzset(); // non-standard +} +else version( freebsd ) +{ + void tzset(); // non-standard +} diff --git a/druntime/import/core/stdc/wchar_.d b/druntime/import/core/stdc/wchar_.d new file mode 100644 index 00000000..e223d5cd --- /dev/null +++ b/druntime/import/core/stdc/wchar_.d @@ -0,0 +1,108 @@ +/** + * D header file for C99. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: ISO/IEC 9899:1999 (E) + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.stdc.wctype; + +public import core.stdc.wchar_; // for wint_t, WEOF + +extern (C): + +alias wchar_t wctrans_t; +alias wchar_t wctype_t; + +int iswalnum(wint_t wc); +int iswalpha(wint_t wc); +int iswblank(wint_t wc); +int iswcntrl(wint_t wc); +int iswdigit(wint_t wc); +int iswgraph(wint_t wc); +int iswlower(wint_t wc); +int iswprint(wint_t wc); +int iswpunct(wint_t wc); +int iswspace(wint_t wc); +int iswupper(wint_t wc); +int iswxdigit(wint_t wc); + +int iswctype(wint_t wc, wctype_t desc); +wctype_t wctype(in char* property); +wint_t towlower(wint_t wc); +wint_t towupper(wint_t wc); +wint_t towctrans(wint_t wc, wctrans_t desc); +wctrans_t wctrans(in char* property); \ No newline at end of file diff --git a/druntime/import/core/sys/osx/mach/kern_return.d b/druntime/import/core/sys/osx/mach/kern_return.d new file mode 100644 index 00000000..926483f1 --- /dev/null +++ b/druntime/import/core/sys/osx/mach/kern_return.d @@ -0,0 +1,71 @@ +/** + * D header file for OSX. + * + * Copyright: Copyright Sean Kelly 2008 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * + * Copyright Sean Kelly 2008 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.osx.mach.port; + +extern (C): + +version( X86 ) + version = i386; +version( X86_64 ) + version = i386; +version( i386 ) +{ + alias uint natural_t; + alias natural_t mach_port_t; +} diff --git a/druntime/import/core/sys/osx/mach/semaphore.d b/druntime/import/core/sys/osx/mach/semaphore.d new file mode 100644 index 00000000..a84c19c6 --- /dev/null +++ b/druntime/import/core/sys/osx/mach/semaphore.d @@ -0,0 +1,54 @@ +/** + * D header file for OSX. + * + * Copyright: Copyright Sean Kelly 2008 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * + * Copyright Sean Kelly 2008 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.osx.mach.thread_act; + +public import core.sys.osx.mach.kern_return; +public import core.sys.osx.mach.port; + +extern (C): + +version( X86 ) + version = i386; +version( X86_64 ) + version = i386; +version( i386 ) +{ + alias mach_port_t thread_act_t; + alias void thread_state_t; + alias int thread_state_flavor_t; + alias natural_t mach_msg_type_number_t; + + enum + { + x86_THREAD_STATE32 = 1, + x86_FLOAT_STATE32 = 2, + x86_EXCEPTION_STATE32 = 3, + x86_THREAD_STATE64 = 4, + x86_FLOAT_STATE64 = 5, + x86_EXCEPTION_STATE64 = 6, + x86_THREAD_STATE = 7, + x86_FLOAT_STATE = 8, + x86_EXCEPTION_STATE = 9, + x86_DEBUG_STATE32 = 10, + x86_DEBUG_STATE64 = 11, + x86_DEBUG_STATE = 12, + THREAD_STATE_NONE = 13, + } + + struct x86_thread_state32_t + { + uint eax; + uint ebx; + uint ecx; + uint edx; + uint edi; + uint esi; + uint ebp; + uint esp; + uint ss; + uint eflags; + uint eip; + uint cs; + uint ds; + uint es; + uint fs; + uint gs; + } + + struct x86_thread_state64_t + { + ulong rax; + ulong rbx; + ulong rcx; + ulong rdx; + ulong rdi; + ulong rsi; + ulong rbp; + ulong rsp; + ulong r8; + ulong r9; + ulong r10; + ulong r11; + ulong r12; + ulong r13; + ulong r14; + ulong r15; + ulong rip; + ulong rflags; + ulong cs; + ulong fs; + ulong gs; + } + + struct x86_state_hdr_t + { + int flavor; + int count; + } + + struct x86_thread_state_t + { + x86_state_hdr_t tsh; + union _uts + { + x86_thread_state32_t ts32; + x86_thread_state64_t ts64; + } + _uts uts; + } + + enum : mach_msg_type_number_t + { + x86_THREAD_STATE32_COUNT = cast(mach_msg_type_number_t)( x86_thread_state32_t.sizeof / int.sizeof ), + x86_THREAD_STATE64_COUNT = cast(mach_msg_type_number_t)( x86_thread_state64_t.sizeof / int.sizeof ), + x86_THREAD_STATE_COUNT = cast(mach_msg_type_number_t)( x86_thread_state_t.sizeof / int.sizeof ), + } + + alias x86_THREAD_STATE MACHINE_THREAD_STATE; + alias x86_THREAD_STATE_COUNT MACHINE_THREAD_STATE_COUNT; + + mach_port_t mach_thread_self(); + kern_return_t thread_suspend(thread_act_t); + kern_return_t thread_resume(thread_act_t); + kern_return_t thread_get_state(thread_act_t, thread_state_flavor_t, thread_state_t*, mach_msg_type_number_t*); +} diff --git a/druntime/import/core/sys/posix/arpa/inet.d b/druntime/import/core/sys/posix/arpa/inet.d new file mode 100644 index 00000000..88cbbe57 --- /dev/null +++ b/druntime/import/core/sys/posix/arpa/inet.d @@ -0,0 +1,132 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.config; + +public import core.stdc.config; + +extern (C): + +version( linux ) +{ + version( X86_64 ) + { + enum bool __USE_LARGEFILE64 = true; + } + else + { + enum bool __USE_LARGEFILE64 = false; + } + enum bool __USE_FILE_OFFSET64 = __USE_LARGEFILE64; + enum bool __REDIRECT = false; +} diff --git a/druntime/import/core/sys/posix/dirent.d b/druntime/import/core/sys/posix/dirent.d new file mode 100644 index 00000000..75ba73c8 --- /dev/null +++ b/druntime/import/core/sys/posix/dirent.d @@ -0,0 +1,203 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.dlfcn; + +private import core.sys.posix.config; + +extern (C): + +// +// XOpen (XSI) +// +/* +RTLD_LAZY +RTLD_NOW +RTLD_GLOBAL +RTLD_LOCAL + +int dlclose(void*); +char* dlerror(); +void* dlopen(in char*, int); +void* dlsym(void*, in char*); +*/ + +version( linux ) +{ + enum RTLD_LAZY = 0x00001; + enum RTLD_NOW = 0x00002; + enum RTLD_GLOBAL = 0x00100; + enum RTLD_LOCAL = 0x00000; + + int dlclose(void*); + char* dlerror(); + void* dlopen(in char*, int); + void* dlsym(void*, in char*); +} +else version( OSX ) +{ + enum RTLD_LAZY = 0x00001; + enum RTLD_NOW = 0x00002; + enum RTLD_GLOBAL = 0x00100; + enum RTLD_LOCAL = 0x00000; + + int dlclose(void*); + char* dlerror(); + void* dlopen(in char*, int); + void* dlsym(void*, in char*); +} +else version( freebsd ) +{ + enum RTLD_LAZY = 1; + enum RTLD_NOW = 2; + enum RTLD_GLOBAL = 0x100; + enum RTLD_LOCAL = 0; + + int dlclose(void*); + char* dlerror(); + void* dlopen(in char*, int); + void* dlsym(void*, in char*); +} diff --git a/druntime/import/core/sys/posix/fcntl.d b/druntime/import/core/sys/posix/fcntl.d new file mode 100644 index 00000000..67b6dbb4 --- /dev/null +++ b/druntime/import/core/sys/posix/fcntl.d @@ -0,0 +1,253 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.inttypes; + +private import core.sys.posix.config; +public import core.stdc.inttypes; + +// +// Required +// +/* +intmax_t imaxabs(intmax_t); +imaxdiv_t imaxdiv(intmax_t, intmax_t); +intmax_t strtoimax(in char*, char**, int); +uintmax_t strtoumax(in char *, char**, int); +intmax_t wcstoimax(in wchar_t*, wchar_t**, int); +uintmax_t wcstoumax(in wchar_t*, wchar_t**, int); +*/ +intmax_t imaxabs(intmax_t); +imaxdiv_t imaxdiv(intmax_t, intmax_t); +intmax_t strtoimax(in char*, char**, int); +uintmax_t strtoumax(in char *, char**, int); +intmax_t wcstoimax(in wchar_t*, wchar_t**, int); +uintmax_t wcstoumax(in wchar_t*, wchar_t**, int); diff --git a/druntime/import/core/sys/posix/net/if_.d b/druntime/import/core/sys/posix/net/if_.d new file mode 100644 index 00000000..00e7dbae --- /dev/null +++ b/druntime/import/core/sys/posix/net/if_.d @@ -0,0 +1,82 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.netinet.in_; + +private import core.sys.posix.config; +public import core.stdc.inttypes; // for uint32_t, uint16_t, uint8_t +public import core.sys.posix.arpa.inet; +public import core.sys.posix.sys.socket; // for sa_family_t + +extern (C): + +// +// Required +// +/* +NOTE: The following must must be defined in core.sys.posix.arpa.inet to break + a circular import: in_port_t, in_addr_t, struct in_addr, INET_ADDRSTRLEN. + +in_port_t +in_addr_t + +sa_family_t // from core.sys.posix.sys.socket +uint8_t // from core.stdc.inttypes +uint32_t // from core.stdc.inttypes + +struct in_addr +{ + in_addr_t s_addr; +} + +struct sockaddr_in +{ + sa_family_t sin_family; + in_port_t sin_port; + in_addr sin_addr; +} + +IPPROTO_IP +IPPROTO_ICMP +IPPROTO_TCP +IPPROTO_UDP + +INADDR_ANY +INADDR_BROADCAST + +INET_ADDRSTRLEN + +htonl() // from core.sys.posix.arpa.inet +htons() // from core.sys.posix.arpa.inet +ntohl() // from core.sys.posix.arpa.inet +ntohs() // from core.sys.posix.arpa.inet +*/ + +version( linux ) +{ + private enum __SOCK_SIZE__ = 16; + + struct sockaddr_in + { + sa_family_t sin_family; + in_port_t sin_port; + in_addr sin_addr; + + /* Pad to size of `struct sockaddr'. */ + ubyte[__SOCK_SIZE__ - sa_family_t.sizeof - + in_port_t.sizeof - in_addr.sizeof] __pad; + } + + enum + { + IPPROTO_IP = 0, + IPPROTO_ICMP = 1, + IPPROTO_TCP = 6, + IPPROTO_UDP = 17 + } + + enum uint INADDR_ANY = 0x00000000; + enum uint INADDR_BROADCAST = 0xffffffff; + + enum INET_ADDRSTRLEN = 16; +} +else version( OSX ) +{ + private enum __SOCK_SIZE__ = 16; + + struct sockaddr_in + { + ubyte sin_len; + sa_family_t sin_family; + in_port_t sin_port; + in_addr sin_addr; + ubyte[8] sin_zero; + } + + enum + { + IPPROTO_IP = 0, + IPPROTO_ICMP = 1, + IPPROTO_TCP = 6, + IPPROTO_UDP = 17 + } + + enum uint INADDR_ANY = 0x00000000; + enum uint INADDR_BROADCAST = 0xffffffff; + + enum INET_ADDRSTRLEN = 16; +} +else version( freebsd ) +{ + private enum __SOCK_SIZE__ = 16; + + struct sockaddr_in + { + ubyte sin_len; + sa_family_t sin_family; + in_port_t sin_port; + in_addr sin_addr; + ubyte[8] sin_zero; + } + + enum + { + IPPROTO_IP = 0, + IPPROTO_ICMP = 1, + IPPROTO_TCP = 6, + IPPROTO_UDP = 17 + } + + enum uint INADDR_ANY = 0x00000000; + enum uint INADDR_BROADCAST = 0xffffffff; +} + + +// +// IPV6 (IP6) +// +/* +NOTE: The following must must be defined in core.sys.posix.arpa.inet to break + a circular import: INET6_ADDRSTRLEN. + +struct in6_addr +{ + uint8_t[16] s6_addr; +} + +struct sockaddr_in6 +{ + sa_family_t sin6_family; + in_port_t sin6_port; + uint32_t sin6_flowinfo; + in6_addr sin6_addr; + uint32_t sin6_scope_id; +} + +extern in6_addr in6addr_any; +extern in6_addr in6addr_loopback; + +struct ipv6_mreq +{ + in6_addr ipv6mr_multiaddr; + uint ipv6mr_interface; +} + +IPPROTO_IPV6 + +INET6_ADDRSTRLEN + +IPV6_JOIN_GROUP +IPV6_LEAVE_GROUP +IPV6_MULTICAST_HOPS +IPV6_MULTICAST_IF +IPV6_MULTICAST_LOOP +IPV6_UNICAST_HOPS +IPV6_V6ONLY + +// macros +int IN6_IS_ADDR_UNSPECIFIED(in6_addr*) +int IN6_IS_ADDR_LOOPBACK(in6_addr*) +int IN6_IS_ADDR_MULTICAST(in6_addr*) +int IN6_IS_ADDR_LINKLOCAL(in6_addr*) +int IN6_IS_ADDR_SITELOCAL(in6_addr*) +int IN6_IS_ADDR_V4MAPPED(in6_addr*) +int IN6_IS_ADDR_V4COMPAT(in6_addr*) +int IN6_IS_ADDR_MC_NODELOCAL(in6_addr*) +int IN6_IS_ADDR_MC_LINKLOCAL(in6_addr*) +int IN6_IS_ADDR_MC_SITELOCAL(in6_addr*) +int IN6_IS_ADDR_MC_ORGLOCAL(in6_addr*) +int IN6_IS_ADDR_MC_GLOBAL(in6_addr*) +*/ + +version ( linux ) +{ + struct in6_addr + { + union + { + uint8_t[16] s6_addr; + uint16_t[8] s6_addr16; + uint32_t[4] s6_addr32; + } + } + + struct sockaddr_in6 + { + sa_family_t sin6_family; + in_port_t sin6_port; + uint32_t sin6_flowinfo; + in6_addr sin6_addr; + uint32_t sin6_scope_id; + } + + extern in6_addr in6addr_any; + extern in6_addr in6addr_loopback; + + struct ipv6_mreq + { + in6_addr ipv6mr_multiaddr; + uint ipv6mr_interface; + } + + enum : uint + { + IPPROTO_IPV6 = 41, + + INET6_ADDRSTRLEN = 46, + + IPV6_JOIN_GROUP = 20, + IPV6_LEAVE_GROUP = 21, + IPV6_MULTICAST_HOPS = 18, + IPV6_MULTICAST_IF = 17, + IPV6_MULTICAST_LOOP = 19, + IPV6_UNICAST_HOPS = 16, + IPV6_V6ONLY = 26 + } + + // macros + extern (D) int IN6_IS_ADDR_UNSPECIFIED( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == 0 && + (cast(uint32_t*) addr)[3] == 0; + } + + extern (D) int IN6_IS_ADDR_LOOPBACK( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == 0 && + (cast(uint32_t*) addr)[3] == htonl( 1 ); + } + + extern (D) int IN6_IS_ADDR_MULTICAST( in6_addr* addr ) + { + return (cast(uint8_t*) addr)[0] == 0xff; + } + + extern (D) int IN6_IS_ADDR_LINKLOCAL( in6_addr* addr ) + { + return ((cast(uint32_t*) addr)[0] & htonl( 0xffc00000 )) == htonl( 0xfe800000 ); + } + + extern (D) int IN6_IS_ADDR_SITELOCAL( in6_addr* addr ) + { + return ((cast(uint32_t*) addr)[0] & htonl( 0xffc00000 )) == htonl( 0xfec00000 ); + } + + extern (D) int IN6_IS_ADDR_V4MAPPED( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == htonl( 0xffff ); + } + + extern (D) int IN6_IS_ADDR_V4COMPAT( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == 0 && + ntohl( (cast(uint32_t*) addr)[3] ) > 1; + } + + extern (D) int IN6_IS_ADDR_MC_NODELOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr ) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x1; + } + + extern (D) int IN6_IS_ADDR_MC_LINKLOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr ) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x2; + } + + extern (D) int IN6_IS_ADDR_MC_SITELOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST(addr) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x5; + } + + extern (D) int IN6_IS_ADDR_MC_ORGLOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x8; + } + + extern (D) int IN6_IS_ADDR_MC_GLOBAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr ) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0xe; + } +} +else version( OSX ) +{ + struct in6_addr + { + union + { + uint8_t[16] s6_addr; + uint16_t[8] s6_addr16; + uint32_t[4] s6_addr32; + } + } + + struct sockaddr_in6 + { + uint8_t sin6_len; + sa_family_t sin6_family; + in_port_t sin6_port; + uint32_t sin6_flowinfo; + in6_addr sin6_addr; + uint32_t sin6_scope_id; + } + + extern in6_addr in6addr_any; + extern in6_addr in6addr_loopback; + + struct ipv6_mreq + { + in6_addr ipv6mr_multiaddr; + uint ipv6mr_interface; + } + + enum : uint + { + IPPROTO_IPV6 = 41, + + INET6_ADDRSTRLEN = 46, + + IPV6_JOIN_GROUP = 12, + IPV6_LEAVE_GROUP = 13, + IPV6_MULTICAST_HOPS = 10, + IPV6_MULTICAST_IF = 9, + IPV6_MULTICAST_LOOP = 11, + IPV6_UNICAST_HOPS = 4, + IPV6_V6ONLY = 27 + } + + // macros + extern (D) int IN6_IS_ADDR_UNSPECIFIED( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == 0 && + (cast(uint32_t*) addr)[3] == 0; + } + + extern (D) int IN6_IS_ADDR_LOOPBACK( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == 0 && + (cast(uint32_t*) addr)[3] == ntohl( 1 ); + } + + extern (D) int IN6_IS_ADDR_MULTICAST( in6_addr* addr ) + { + return addr.s6_addr[0] == 0xff; + } + + extern (D) int IN6_IS_ADDR_LINKLOCAL( in6_addr* addr ) + { + return addr.s6_addr[0] == 0xfe && (addr.s6_addr[1] & 0xc0) == 0x80; + } + + extern (D) int IN6_IS_ADDR_SITELOCAL( in6_addr* addr ) + { + return addr.s6_addr[0] == 0xfe && (addr.s6_addr[1] & 0xc0) == 0xc0; + } + + extern (D) int IN6_IS_ADDR_V4MAPPED( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == ntohl( 0x0000ffff ); + } + + extern (D) int IN6_IS_ADDR_V4COMPAT( in6_addr* addr ) + { + return (cast(uint32_t*) addr)[0] == 0 && + (cast(uint32_t*) addr)[1] == 0 && + (cast(uint32_t*) addr)[2] == 0 && + (cast(uint32_t*) addr)[3] != 0 && + (cast(uint32_t*) addr)[3] != ntohl( 1 ); + } + + extern (D) int IN6_IS_ADDR_MC_NODELOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr ) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x1; + } + + extern (D) int IN6_IS_ADDR_MC_LINKLOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr ) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x2; + } + + extern (D) int IN6_IS_ADDR_MC_SITELOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST(addr) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x5; + } + + extern (D) int IN6_IS_ADDR_MC_ORGLOCAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0x8; + } + + extern (D) int IN6_IS_ADDR_MC_GLOBAL( in6_addr* addr ) + { + return IN6_IS_ADDR_MULTICAST( addr ) && + ((cast(uint8_t*) addr)[1] & 0xf) == 0xe; + } +} + + +// +// Raw Sockets (RS) +// +/* +IPPROTO_RAW +*/ + +version (linux ) +{ + enum uint IPPROTO_RAW = 255; +} +else version( OSX ) +{ + enum uint IPPROTO_RAW = 255; +} diff --git a/druntime/import/core/sys/posix/netinet/tcp.d b/druntime/import/core/sys/posix/netinet/tcp.d new file mode 100644 index 00000000..377c18b7 --- /dev/null +++ b/druntime/import/core/sys/posix/netinet/tcp.d @@ -0,0 +1,38 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.poll; + +private import core.sys.posix.config; + +extern (C): + +// +// XOpen (XSI) +// +/* +struct pollfd +{ + int fd; + short events; + short revents; +} + +nfds_t + +POLLIN +POLLRDNORM +POLLRDBAND +POLLPRI +POLLOUT +POLLWRNORM +POLLWRBAND +POLLERR +POLLHUP +POLLNVAL + +int poll(pollfd[], nfds_t, int); +*/ + +version( linux ) +{ + struct pollfd + { + int fd; + short events; + short revents; + } + + alias c_ulong nfds_t; + + enum + { + POLLIN = 0x001, + POLLRDNORM = 0x040, + POLLRDBAND = 0x080, + POLLPRI = 0x002, + POLLOUT = 0x004, + POLLWRNORM = 0x100, + POLLWRBAND = 0x200, + POLLERR = 0x008, + POLLHUP = 0x010, + POLLNVAL = 0x020, + } + + int poll(pollfd*, nfds_t, int); +} +else version( OSX ) +{ + struct pollfd + { + int fd; + short events; + short revents; + }; + + alias uint nfds_t; + + enum + { + POLLIN = 0x0001, + POLLPRI = 0x0002, + POLLOUT = 0x0004, + POLLRDNORM = 0x0040, + POLLWRNORM = POLLOUT, + POLLRDBAND = 0x0080, + POLLWRBAND = 0x0100, + POLLEXTEND = 0x0200, + POLLATTRIB = 0x0400, + POLLNLINK = 0x0800, + POLLWRITE = 0x1000, + POLLERR = 0x0008, + POLLHUP = 0x0010, + POLLNVAL = 0x0020, + + POLLSTANDARD = (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLRDBAND| + POLLWRBAND|POLLERR|POLLHUP|POLLNVAL) + } + + int poll(pollfd*, nfds_t, int); +} +else version( freebsd ) +{ + struct pollfd + { + int fd; + short events; + short revents; + }; + + alias uint nfds_t; + + enum + { + POLLIN = 0x0001, + POLLPRI = 0x0002, + POLLOUT = 0x0004, + POLLRDNORM = 0x0040, + POLLWRNORM = POLLOUT, + POLLRDBAND = 0x0080, + POLLWRBAND = 0x0100, + //POLLEXTEND = 0x0200, + //POLLATTRIB = 0x0400, + //POLLNLINK = 0x0800, + //POLLWRITE = 0x1000, + POLLERR = 0x0008, + POLLHUP = 0x0010, + POLLNVAL = 0x0020, + + POLLSTANDARD = (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLRDBAND| + POLLWRBAND|POLLERR|POLLHUP|POLLNVAL) + } + + int poll(pollfd*, nfds_t, int); +} diff --git a/druntime/import/core/sys/posix/pthread.d b/druntime/import/core/sys/posix/pthread.d new file mode 100644 index 00000000..dc582a11 --- /dev/null +++ b/druntime/import/core/sys/posix/pthread.d @@ -0,0 +1,585 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.sched; + +private import core.sys.posix.config; +public import core.sys.posix.time; +public import core.sys.posix.sys.types; + +extern (C): + +// +// Required +// +/* +struct sched_param +{ + int sched_priority (THR) + int sched_ss_low_priority (SS|TSP) + struct timespec sched_ss_repl_period (SS|TSP) + struct timespec sched_ss_init_budget (SS|TSP) + int sched_ss_max_repl (SS|TSP) +} + +SCHED_FIFO +SCHED_RR +SCHED_SPORADIC (SS|TSP) +SCHED_OTHER + +int sched_getparam(pid_t, sched_param*); +int sched_getscheduler(pid_t); +int sched_setparam(pid_t, in sched_param*); +int sched_setscheduler(pid_t, int, in sched_param*); +*/ + +version( linux ) +{ + struct sched_param + { + int sched_priority; + } + + enum SCHED_OTHER = 0; + enum SCHED_FIFO = 1; + enum SCHED_RR = 2; + //SCHED_SPORADIC (SS|TSP) +} +else version( OSX ) +{ + enum SCHED_OTHER = 1; + enum SCHED_FIFO = 4; + enum SCHED_RR = 2; + //SCHED_SPORADIC (SS|TSP) + + private enum __SCHED_PARAM_SIZE__ = 4; + + struct sched_param + { + int sched_priority; + byte[__PTHREAD_MUTEX_SIZE__] __opaque; + } +} +else version( freebsd ) +{ + struct sched_param + { + int sched_priority; + } + + enum SCHED_FIFO = 1; + enum SCHED_OTHER = 2; + enum SCHED_RR = 3; + //SCHED_SPORADIC (SS|TSP) +} + +int sched_getparam(pid_t, sched_param*); +int sched_getscheduler(pid_t); +int sched_setparam(pid_t, in sched_param*); +int sched_setscheduler(pid_t, int, in sched_param*); + +// +// Thread (THR) +// +/* +int sched_yield(); +*/ + +version( linux ) +{ + int sched_yield(); +} +else version( OSX ) +{ + int sched_yield(); +} +else version( freebsd ) +{ + int sched_yield(); +} + +// +// Scheduling (TPS) +// +/* +int sched_get_priority_max(int); +int sched_get_priority_min(int); +int sched_rr_get_interval(pid_t, timespec*); +*/ + +version( linux ) +{ + int sched_get_priority_max(int); + int sched_get_priority_min(int); + int sched_rr_get_interval(pid_t, timespec*); +} +else version( OSX ) +{ + int sched_get_priority_min(int); + int sched_get_priority_max(int); + //int sched_rr_get_interval(pid_t, timespec*); // FIXME: unavailable? +} +else version( freebsd ) +{ + int sched_get_priority_min(int); + int sched_get_priority_max(int); + int sched_rr_get_interval(pid_t, timespec*); +} diff --git a/druntime/import/core/sys/posix/semaphore.d b/druntime/import/core/sys/posix/semaphore.d new file mode 100644 index 00000000..6570962d --- /dev/null +++ b/druntime/import/core/sys/posix/semaphore.d @@ -0,0 +1,102 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.setjmp; + +private import core.sys.posix.config; +private import core.sys.posix.signal; // for sigset_t + +extern (C): + +// +// Required +// +/* +jmp_buf + +int setjmp(jmp_buf); +void longjmp(jmp_buf, int); +*/ + +version( linux ) +{ + version( X86_64 ) + { + //enum JB_BX = 0; + //enum JB_BP = 1; + //enum JB_12 = 2; + //enum JB_13 = 3; + //enum JB_14 = 4; + //enum JB_15 = 5; + //enum JB_SP = 6; + //enum JB_PC = 7; + //enum JB_SIZE = 64; + + alias long[8] __jmp_buf; + } + else version( X86 ) + { + //enum JB_BX = 0; + //enum JB_SI = 1; + //enum JB_DI = 2; + //enum JB_BP = 3; + //enum JB_SP = 4; + //enum JB_PC = 5; + //enum JB_SIZE = 24; + + alias int[6] __jmp_buf; + } + else version ( SPARC ) + { + alias int[3] __jmp_buf; + } + + struct __jmp_buf_tag + { + __jmp_buf __jmpbuf; + int __mask_was_saved; + sigset_t __saved_mask; + } + + alias __jmp_buf_tag[1] jmp_buf; + + alias _setjmp setjmp; // see XOpen block + void longjmp(jmp_buf, int); +} + +// +// C Extension (CX) +// +/* +sigjmp_buf + +int sigsetjmp(sigjmp_buf, int); +void siglongjmp(sigjmp_buf, int); +*/ + +version( linux ) +{ + alias jmp_buf sigjmp_buf; + + int __sigsetjmp(sigjmp_buf, int); + alias __sigsetjmp sigsetjmp; + void siglongjmp(sigjmp_buf, int); +} + +// +// XOpen (XSI) +// +/* +int _setjmp(jmp_buf); +void _longjmp(jmp_buf, int); +*/ + +version( linux ) +{ + int _setjmp(jmp_buf); + void _longjmp(jmp_buf, int); +} diff --git a/druntime/import/core/sys/posix/signal.d b/druntime/import/core/sys/posix/signal.d new file mode 100644 index 00000000..d5505711 --- /dev/null +++ b/druntime/import/core/sys/posix/signal.d @@ -0,0 +1,843 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.stdio; + +private import core.sys.posix.config; +public import core.stdc.stdio; +public import core.sys.posix.sys.types; // for off_t + +extern (C): + +// +// Required (defined in core.stdc.stdio) +// +/* +BUFSIZ +_IOFBF +_IOLBF +_IONBF +L_tmpnam +SEEK_CUR +SEEK_END +SEEK_SET +FILENAME_MAX +FOPEN_MAX +TMP_MAX +EOF +NULL +stderr +stdin +stdout +FILE +fpos_t +size_t + +void clearerr(FILE*); +int fclose(FILE*); +int feof(FILE*); +int ferror(FILE*); +int fflush(FILE*); +int fgetc(FILE*); +int fgetpos(FILE*, fpos_t *); +char* fgets(char*, int, FILE*); +FILE* fopen(in char*, in char*); +int fprintf(FILE*, in char*, ...); +int fputc(int, FILE*); +int fputs(in char*, FILE*); +size_t fread(void *, size_t, size_t, FILE*); +FILE* freopen(in char*, in char*, FILE*); +int fscanf(FILE*, in char*, ...); +int fseek(FILE*, c_long, int); +int fsetpos(FILE*, in fpos_t*); +c_long ftell(FILE*); +size_t fwrite(in void *, size_t, size_t, FILE*); +int getc(FILE*); +int getchar(); +char* gets(char*); +void perror(in char*); +int printf(in char*, ...); +int putc(int, FILE*); +int putchar(int); +int puts(in char*); +int remove(in char*); +int rename(in char*, in char*); +void rewind(FILE*); +int scanf(in char*, ...); +void setbuf(FILE*, char*); +int setvbuf(FILE*, char*, int, size_t); +int snprintf(char*, size_t, in char*, ...); +int sprintf(char*, in char*, ...); +int sscanf(in char*, in char*, int ...); +FILE* tmpfile(); +char* tmpnam(char*); +int ungetc(int, FILE*); +int vfprintf(FILE*, in char*, va_list); +int vfscanf(FILE*, in char*, va_list); +int vprintf(in char*, va_list); +int vscanf(in char*, va_list); +int vsnprintf(char*, size_t, in char*, va_list); +int vsprintf(char*, in char*, va_list); +int vsscanf(in char*, in char*, va_list arg); +*/ + +version( linux ) +{ + static if( __USE_LARGEFILE64 ) + { + int fgetpos64(FILE*, fpos_t *); + alias fgetpos64 fgetpos; + + FILE* fopen64(in char*, in char*); + alias fopen64 fopen; + + FILE* freopen64(in char*, in char*, FILE*); + alias freopen64 freopen; + + int fseek64(FILE*, c_long, int); + alias fseek64 fseek; + + int fsetpos64(FILE*, in fpos_t*); + alias fsetpos64 fsetpos; + + FILE* tmpfile64(); + alias tmpfile64 tmpfile; + } + else + { + int fgetpos(FILE*, fpos_t *); + FILE* fopen(in char*, in char*); + FILE* freopen(in char*, in char*, FILE*); + int fseek(FILE*, c_long, int); + int fsetpos(FILE*, in fpos_t*); + FILE* tmpfile(); + } +} + +// +// C Extension (CX) +// +/* +L_ctermid + +char* ctermid(char*); +FILE* fdopen(int, in char*); +int fileno(FILE*); +int fseeko(FILE*, off_t, int); +off_t ftello(FILE*); +char* gets(char*); +FILE* popen(in char*, in char*); +*/ + +version( linux ) +{ + enum L_ctermid = 9; + + static if( __USE_FILE_OFFSET64 ) + { + int fseeko64(FILE*, off_t, int); + alias fseeko64 fseeko; + } + else + { + int fseeko(FILE*, off_t, int); + } + + static if( __USE_LARGEFILE64 ) + { + off_t ftello64(FILE*); + alias ftello64 ftello; + } + else + { + off_t ftello(FILE*); + } +} +else +{ + int fseeko(FILE*, off_t, int); + off_t ftello(FILE*); +} + +char* ctermid(char*); +FILE* fdopen(int, in char*); +int fileno(FILE*); +//int fseeko(FILE*, off_t, int); +//off_t ftello(FILE*); +char* gets(char*); +FILE* popen(in char*, in char*); + +// +// Thread-Safe Functions (TSF) +// +/* +void flockfile(FILE*); +int ftrylockfile(FILE*); +void funlockfile(FILE*); +int getc_unlocked(FILE*); +int getchar_unlocked(); +int putc_unlocked(int, FILE*); +int putchar_unlocked(int); +*/ + +version( linux ) +{ + void flockfile(FILE*); + int ftrylockfile(FILE*); + void funlockfile(FILE*); + int getc_unlocked(FILE*); + int getchar_unlocked(); + int putc_unlocked(int, FILE*); + int putchar_unlocked(int); +} + +// +// XOpen (XSI) +// +/* +P_tmpdir +va_list (defined in core.stdc.stdarg) + +char* tempnam(in char*, in char*); +*/ + +version( linux ) +{ + enum P_tmpdir = "/tmp"; + + char* tempnam(in char*, in char*); +} diff --git a/druntime/import/core/sys/posix/stdlib.d b/druntime/import/core/sys/posix/stdlib.d new file mode 100644 index 00000000..00d9916b --- /dev/null +++ b/druntime/import/core/sys/posix/stdlib.d @@ -0,0 +1,310 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.sys.ipc; + +private import core.sys.posix.config; +public import core.sys.posix.sys.types; // for uid_t, gid_t, mode_t, key_t + +extern (C): + +// +// XOpen (XSI) +// +/* +struct ipc_perm +{ + uid_t uid; + gid_t gid; + uid_t cuid; + gid_t cgid; + mode_t mode; +} + +IPC_CREAT +IPC_EXCL +IPC_NOWAIT + +IPC_PRIVATE + +IPC_RMID +IPC_SET +IPC_STAT + +key_t ftok(in char*, int); +*/ + +version( linux ) +{ + struct ipc_perm + { + key_t __key; + uid_t uid; + gid_t gid; + uid_t cuid; + gid_t cgid; + ushort mode; + ushort __pad1; + ushort __seq; + ushort __pad2; + c_ulong __unused1; + c_ulong __unused2; + } + + enum IPC_CREAT = 01000; + enum IPC_EXCL = 02000; + enum IPC_NOWAIT = 04000; + + enum key_t IPC_PRIVATE = 0; + + enum IPC_RMID = 0; + enum IPC_SET = 1; + enum IPC_STAT = 2; + + key_t ftok(in char*, int); +} +else version( OSX ) +{ + +} +else version( freebsd ) +{ + struct ipc_perm + { + ushort cuid; + ushort cguid; + ushort uid; + ushort gid; + ushort mode; + ushort seq; + key_t key; + } + + enum IPC_CREAT = 01000; + enum IPC_EXCL = 02000; + enum IPC_NOWAIT = 04000; + + enum key_t IPC_PRIVATE = 0; + + enum IPC_RMID = 0; + enum IPC_SET = 1; + enum IPC_STAT = 2; + + key_t ftok(in char*, int); +} diff --git a/druntime/import/core/sys/posix/sys/mman.d b/druntime/import/core/sys/posix/sys/mman.d new file mode 100644 index 00000000..bacfb23a --- /dev/null +++ b/druntime/import/core/sys/posix/sys/mman.d @@ -0,0 +1,313 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.sys.select; + +private import core.sys.posix.config; +public import core.stdc.time; // for timespec +public import core.sys.posix.sys.time; // for timeval +public import core.sys.posix.sys.types; // for time_t +public import core.sys.posix.signal; // for sigset_t + +extern (C): + +// +// Required +// +/* +NOTE: This module requires timeval from core.sys.posix.sys.time, but timeval + is supposedly an XOpen extension. As a result, this header will not + compile on platforms that are not XSI-compliant. This must be resolved + on a per-platform basis. + +fd_set + +void FD_CLR(int fd, fd_set* fdset); +int FD_ISSET(int fd, fd_set* fdset); +void FD_SET(int fd, fd_set* fdset); +void FD_ZERO(fd_set* fdset); + +FD_SETSIZE + +int pselect(int, fd_set*, fd_set*, fd_set*, in timespec*, in sigset_t*); +int select(int, fd_set*, fd_set*, fd_set*, timeval*); +*/ + +version( linux ) +{ + private + { + alias c_long __fd_mask; + enum __NFDBITS = 8 * __fd_mask.sizeof; + + extern (D) int __FDELT( int d ) + { + return d / __NFDBITS; + } + + extern (D) int __FDMASK( int d ) + { + return cast(__fd_mask) 1 << ( d % __NFDBITS ); + } + } + + enum FD_SETSIZE = 1024; + + struct fd_set + { + __fd_mask[FD_SETSIZE / __NFDBITS] fds_bits; + } + + extern (D) void FD_CLR( int fd, fd_set* fdset ) + { + fdset.fds_bits[__FDELT( fd )] &= ~__FDMASK( fd ); + } + + extern (D) int FD_ISSET( int fd, fd_set* fdset ) + { + return fdset.fds_bits[__FDELT( fd )] & __FDMASK( fd ); + } + + extern (D) void FD_SET( int fd, fd_set* fdset ) + { + fdset.fds_bits[__FDELT( fd )] |= __FDMASK( fd ); + } + + extern (D) void FD_ZERO( fd_set* fdset ) + { + fdset.fds_bits[0 .. $] = 0; + } + + /+ + + GNU ASM Implementation + + + # define __FD_ZERO(fdsp) \ + do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("cld; rep; stosl" \ + : "=c" (__d0), "=D" (__d1) \ + : "a" (0), "0" (sizeof (fd_set) \ + / sizeof (__fd_mask)), \ + "1" (&__FDS_BITS (fdsp)[0]) \ + : "memory"); \ + } while (0) + + # define __FD_SET(fd, fdsp) \ + __asm__ __volatile__ ("btsl %1,%0" \ + : "=m" (__FDS_BITS (fdsp)[__FDELT (fd)]) \ + : "r" (((int) (fd)) % __NFDBITS) \ + : "cc","memory") + # define __FD_CLR(fd, fdsp) \ + __asm__ __volatile__ ("btrl %1,%0" \ + : "=m" (__FDS_BITS (fdsp)[__FDELT (fd)]) \ + : "r" (((int) (fd)) % __NFDBITS) \ + : "cc","memory") + # define __FD_ISSET(fd, fdsp) \ + (__extension__ \ + ({register char __result; \ + __asm__ __volatile__ ("btl %1,%2 ; setcb %b0" \ + : "=q" (__result) \ + : "r" (((int) (fd)) % __NFDBITS), \ + "m" (__FDS_BITS (fdsp)[__FDELT (fd)]) \ + : "cc"); \ + __result; })) + +/ + + int pselect(int, fd_set*, fd_set*, fd_set*, in timespec*, in sigset_t*); + int select(int, fd_set*, fd_set*, fd_set*, timeval*); +} +else version( OSX ) +{ + private + { + enum uint __DARWIN_NBBY = 8; /* bits in a byte */ + enum uint __DARWIN_NFDBITS = (int.sizeof * __DARWIN_NBBY); /* bits per mask */ + } + + enum FD_SETSIZE = 1024; + + struct fd_set + { + int[(FD_SETSIZE + (__DARWIN_NFDBITS - 1)) / __DARWIN_NFDBITS] fds_bits; + } + + extern (D) void FD_CLR( int fd, fd_set* fdset ) + { + fdset.fds_bits[fd / __DARWIN_NFDBITS] &= ~(1 << (fd % __DARWIN_NFDBITS)); + } + + extern (D) int FD_ISSET( int fd, fd_set* fdset ) + { + return fdset.fds_bits[fd / __DARWIN_NFDBITS] & (1 << (fd % __DARWIN_NFDBITS)); + } + + extern (D) void FD_SET( int fd, fd_set* fdset ) + { + fdset.fds_bits[fd / __DARWIN_NFDBITS] |= 1 << (fd % __DARWIN_NFDBITS); + } + + extern (D) void FD_ZERO( fd_set* fdset ) + { + fdset.fds_bits[0 .. $] = 0; + } + + int pselect(int, fd_set*, fd_set*, fd_set*, in timespec*, in sigset_t*); + int select(int, fd_set*, fd_set*, fd_set*, timeval*); +} +else version( freebsd ) +{ + private + { + enum uint _NFDBITS = c_ulong.sizeof * 8; + } + + enum uint FD_SETSIZE = 1024; + + struct fd_set + { + c_ulong fds_bits[(FD_SETSIZE + (_NFDBITS - 1)) / _NFDBITS]; + } +} diff --git a/druntime/import/core/sys/posix/sys/shm.d b/druntime/import/core/sys/posix/sys/shm.d new file mode 100644 index 00000000..afe874df --- /dev/null +++ b/druntime/import/core/sys/posix/sys/shm.d @@ -0,0 +1,116 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.sys.socket; + +private import core.sys.posix.config; +public import core.sys.posix.sys.types; // for ssize_t, size_t +public import core.sys.posix.sys.uio; // for iovec + +extern (C): + +// +// Required +// +/* +socklen_t +sa_family_t + +struct sockaddr +{ + sa_family_t sa_family; + char sa_data[]; +} + +struct sockaddr_storage +{ + sa_family_t ss_family; +} + +struct msghdr +{ + void* msg_name; + socklen_t msg_namelen; + struct iovec* msg_iov; + int msg_iovlen; + void* msg_control; + socklen_t msg_controllen; + int msg_flags; +} + +struct iovec {} // from core.sys.posix.sys.uio + +struct cmsghdr +{ + socklen_t cmsg_len; + int cmsg_level; + int cmsg_type; +} + +SCM_RIGHTS + +CMSG_DATA(cmsg) +CMSG_NXTHDR(mhdr,cmsg) +CMSG_FIRSTHDR(mhdr) + +struct linger +{ + int l_onoff; + int l_linger; +} + +SOCK_DGRAM +SOCK_SEQPACKET +SOCK_STREAM + +SOL_SOCKET + +SO_ACCEPTCONN +SO_BROADCAST +SO_DEBUG +SO_DONTROUTE +SO_ERROR +SO_KEEPALIVE +SO_LINGER +SO_OOBINLINE +SO_RCVBUF +SO_RCVLOWAT +SO_RCVTIMEO +SO_REUSEADDR +SO_SNDBUF +SO_SNDLOWAT +SO_SNDTIMEO +SO_TYPE + +SOMAXCONN + +MSG_CTRUNC +MSG_DONTROUTE +MSG_EOR +MSG_OOB +MSG_PEEK +MSG_TRUNC +MSG_WAITALL + +AF_INET +AF_UNIX +AF_UNSPEC + +SHUT_RD +SHUT_RDWR +SHUT_WR + +int accept(int, sockaddr*, socklen_t*); +int bind(int, in sockaddr*, socklen_t); +int connect(int, in sockaddr*, socklen_t); +int getpeername(int, sockaddr*, socklen_t*); +int getsockname(int, sockaddr*, socklen_t*); +int getsockopt(int, int, int, void*, socklen_t*); +int listen(int, int); +ssize_t recv(int, void*, size_t, int); +ssize_t recvfrom(int, void*, size_t, int, sockaddr*, socklen_t*); +ssize_t recvmsg(int, msghdr*, int); +ssize_t send(int, in void*, size_t, int); +ssize_t sendmsg(int, in msghdr*, int); +ssize_t sendto(int, in void*, size_t, int, in sockaddr*, socklen_t); +int setsockopt(int, int, int, in void*, socklen_t); +int shutdown(int, int); +int socket(int, int, int); +int sockatmark(int); +int socketpair(int, int, int, int[2]); +*/ + +version( linux ) +{ + alias uint socklen_t; + alias ushort sa_family_t; + + struct sockaddr + { + sa_family_t sa_family; + byte[14] sa_data; + } + + private enum : size_t + { + _SS_SIZE = 128, + _SS_PADSIZE = _SS_SIZE - (c_ulong.sizeof * 2) + } + + struct sockaddr_storage + { + sa_family_t ss_family; + c_ulong __ss_align; + byte[_SS_PADSIZE] __ss_padding; + } + + struct msghdr + { + void* msg_name; + socklen_t msg_namelen; + iovec* msg_iov; + size_t msg_iovlen; + void* msg_control; + size_t msg_controllen; + int msg_flags; + } + + struct cmsghdr + { + size_t cmsg_len; + int cmsg_level; + int cmsg_type; + static if( false /* (!is( __STRICT_ANSI__ ) && __GNUC__ >= 2) || __STDC_VERSION__ >= 199901L */ ) + { + ubyte[1] __cmsg_data; + } + } + + enum : uint + { + SCM_RIGHTS = 0x01 + } + + static if( false /* (!is( __STRICT_ANSI__ ) && __GNUC__ >= 2) || __STDC_VERSION__ >= 199901L */ ) + { + extern (D) ubyte[1] CMSG_DATA( cmsghdr* cmsg ) { return cmsg.__cmsg_data; } + } + else + { + extern (D) ubyte* CMSG_DATA( cmsghdr* cmsg ) { return cast(ubyte*)( cmsg + 1 ); } + } + + private cmsghdr* __cmsg_nxthdr(msghdr*, cmsghdr*); + alias __cmsg_nxthdr CMSG_NXTHDR; + + extern (D) size_t CMSG_FIRSTHDR( msghdr* mhdr ) + { + return cast(size_t)( mhdr.msg_controllen >= cmsghdr.sizeof + ? cast(cmsghdr*) mhdr.msg_control + : cast(cmsghdr*) null ); + } + + struct linger + { + int l_onoff; + int l_linger; + } + + enum + { + SOCK_DGRAM = 2, + SOCK_SEQPACKET = 5, + SOCK_STREAM = 1 + } + + enum + { + SOL_SOCKET = 1 + } + + enum + { + SO_ACCEPTCONN = 30, + SO_BROADCAST = 6, + SO_DEBUG = 1, + SO_DONTROUTE = 5, + SO_ERROR = 4, + SO_KEEPALIVE = 9, + SO_LINGER = 13, + SO_OOBINLINE = 10, + SO_RCVBUF = 8, + SO_RCVLOWAT = 18, + SO_RCVTIMEO = 20, + SO_REUSEADDR = 2, + SO_SNDBUF = 7, + SO_SNDLOWAT = 19, + SO_SNDTIMEO = 21, + SO_TYPE = 3 + } + + enum + { + SOMAXCONN = 128 + } + + enum : uint + { + MSG_CTRUNC = 0x08, + MSG_DONTROUTE = 0x04, + MSG_EOR = 0x80, + MSG_OOB = 0x01, + MSG_PEEK = 0x02, + MSG_TRUNC = 0x20, + MSG_WAITALL = 0x100 + } + + enum + { + AF_INET = 2, + AF_UNIX = 1, + AF_UNSPEC = 0 + } + + enum + { + SHUT_RD, + SHUT_WR, + SHUT_RDWR + } + + int accept(int, sockaddr*, socklen_t*); + int bind(int, in sockaddr*, socklen_t); + int connect(int, in sockaddr*, socklen_t); + int getpeername(int, sockaddr*, socklen_t*); + int getsockname(int, sockaddr*, socklen_t*); + int getsockopt(int, int, int, void*, socklen_t*); + int listen(int, int); + ssize_t recv(int, void*, size_t, int); + ssize_t recvfrom(int, void*, size_t, int, sockaddr*, socklen_t*); + ssize_t recvmsg(int, msghdr*, int); + ssize_t send(int, in void*, size_t, int); + ssize_t sendmsg(int, in msghdr*, int); + ssize_t sendto(int, in void*, size_t, int, in sockaddr*, socklen_t); + int setsockopt(int, int, int, in void*, socklen_t); + int shutdown(int, int); + int socket(int, int, int); + int sockatmark(int); + int socketpair(int, int, int, int[2]); +} +else version( OSX ) +{ + alias uint socklen_t; + alias ubyte sa_family_t; + + struct sockaddr + { + ubyte sa_len; + sa_family_t sa_family; + byte[14] sa_data; + } + + private enum : size_t + { + _SS_PAD1 = long.sizeof - ubyte.sizeof - sa_family_t.sizeof, + _SS_PAD2 = 128 - ubyte.sizeof - sa_family_t.sizeof - _SS_PAD1 - long.sizeof + } + + struct sockaddr_storage + { + ubyte ss_len; + sa_family_t ss_family; + byte[_SS_PAD1] __ss_pad1; + long __ss_align; + byte[_SS_PAD2] __ss_pad2; + } + + struct msghdr + { + void* msg_name; + socklen_t msg_namelen; + iovec* msg_iov; + int msg_iovlen; + void* msg_control; + socklen_t msg_controllen; + int msg_flags; + } + + struct cmsghdr + { + socklen_t cmsg_len; + int cmsg_level; + int cmsg_type; + } + + enum : uint + { + SCM_RIGHTS = 0x01 + } + + /+ + CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ + ALIGN(sizeof(struct cmsghdr))) + CMSG_NXTHDR(mhdr, cmsg) \ + (((unsigned char *)(cmsg) + ALIGN((cmsg)->cmsg_len) + \ + ALIGN(sizeof(struct cmsghdr)) > \ + (unsigned char *)(mhdr)->msg_control +(mhdr)->msg_controllen) ? \ + (struct cmsghdr *)0 /* NULL */ : \ + (struct cmsghdr *)((unsigned char *)(cmsg) + ALIGN((cmsg)->cmsg_len))) + CMSG_FIRSTHDR(mhdr) ((struct cmsghdr *)(mhdr)->msg_control) + +/ + + struct linger + { + int l_onoff; + int l_linger; + } + + enum + { + SOCK_DGRAM = 2, + SOCK_SEQPACKET = 5, + SOCK_STREAM = 1 + } + + enum : uint + { + SOL_SOCKET = 0xffff + } + + enum : uint + { + SO_ACCEPTCONN = 0x0002, + SO_BROADCAST = 0x0020, + SO_DEBUG = 0x0001, + SO_DONTROUTE = 0x0010, + SO_ERROR = 0x1007, + SO_KEEPALIVE = 0x0008, + SO_LINGER = 0x1080, + SO_OOBINLINE = 0x0100, + SO_RCVBUF = 0x1002, + SO_RCVLOWAT = 0x1004, + SO_RCVTIMEO = 0x1006, + SO_REUSEADDR = 0x0004, + SO_SNDBUF = 0x1001, + SO_SNDLOWAT = 0x1003, + SO_SNDTIMEO = 0x1005, + SO_TYPE = 0x1008 + } + + enum + { + SOMAXCONN = 128 + } + + enum : uint + { + MSG_CTRUNC = 0x20, + MSG_DONTROUTE = 0x4, + MSG_EOR = 0x8, + MSG_OOB = 0x1, + MSG_PEEK = 0x2, + MSG_TRUNC = 0x10, + MSG_WAITALL = 0x40 + } + + enum + { + AF_INET = 2, + AF_UNIX = 1, + AF_UNSPEC = 0 + } + + enum + { + SHUT_RD, + SHUT_WR, + SHUT_RDWR + } + + int accept(int, sockaddr*, socklen_t*); + int bind(int, in sockaddr*, socklen_t); + int connect(int, in sockaddr*, socklen_t); + int getpeername(int, sockaddr*, socklen_t*); + int getsockname(int, sockaddr*, socklen_t*); + int getsockopt(int, int, int, void*, socklen_t*); + int listen(int, int); + ssize_t recv(int, void*, size_t, int); + ssize_t recvfrom(int, void*, size_t, int, sockaddr*, socklen_t*); + ssize_t recvmsg(int, msghdr*, int); + ssize_t send(int, in void*, size_t, int); + ssize_t sendmsg(int, in msghdr*, int); + ssize_t sendto(int, in void*, size_t, int, in sockaddr*, socklen_t); + int setsockopt(int, int, int, in void*, socklen_t); + int shutdown(int, int); + int socket(int, int, int); + int sockatmark(int); + int socketpair(int, int, int, int[2]); +} +else version( freebsd ) +{ + alias uint socklen_t; + alias ubyte sa_family_t; + + struct sockaddr + { + ubyte sa_len; + sa_family_t sa_family; + byte[14] sa_data; + } + + private + { + enum _SS_ALIGNSIZE = long.sizeof; + enum _SS_MAXSIZE = 128; + enum _SS_PAD1SIZE = _SS_ALIGNSIZE - ubyte.sizeof - sa_family_t.sizeof; + enum _SS_PAD2SIZE = _SS_MAXSIZE - ubyte.sizeof - sa_family_t.sizeof - _SS_PAD1SIZE - _SS_ALIGNSIZE; + } + + struct sockaddr_storage + { + ubyte ss_len; + sa_family_t ss_family; + byte[_SS_PAD1SIZE] __ss_pad1; + long __ss_align; + byte[_SS_PAD2SIZE] __ss_pad2; + } + + struct msghdr + { + void* msg_name; + socklen_t msg_namelen; + iovec* msg_iov; + int msg_iovlen; + void* msg_control; + socklen_t msg_controllen; + int msg_flags; + } + + struct cmsghdr + { + socklen_t cmsg_len; + int cmsg_level; + int cmsg_type; + } + + enum : uint + { + SCM_RIGHTS = 0x01 + } + + /+ + CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ + ALIGN(sizeof(struct cmsghdr))) + CMSG_NXTHDR(mhdr, cmsg) \ + (((unsigned char *)(cmsg) + ALIGN((cmsg)->cmsg_len) + \ + ALIGN(sizeof(struct cmsghdr)) > \ + (unsigned char *)(mhdr)->msg_control +(mhdr)->msg_controllen) ? \ + (struct cmsghdr *)0 /* NULL */ : \ + (struct cmsghdr *)((unsigned char *)(cmsg) + ALIGN((cmsg)->cmsg_len))) + CMSG_FIRSTHDR(mhdr) ((struct cmsghdr *)(mhdr)->msg_control) + +/ + + struct linger + { + int l_onoff; + int l_linger; + } + + enum + { + SOCK_DGRAM = 2, + SOCK_SEQPACKET = 5, + SOCK_STREAM = 1 + } + + enum : uint + { + SOL_SOCKET = 0xffff + } + + enum : uint + { + SO_ACCEPTCONN = 0x0002, + SO_BROADCAST = 0x0020, + SO_DEBUG = 0x0001, + SO_DONTROUTE = 0x0010, + SO_ERROR = 0x1007, + SO_KEEPALIVE = 0x0008, + SO_LINGER = 0x1080, + SO_OOBINLINE = 0x0100, + SO_RCVBUF = 0x1002, + SO_RCVLOWAT = 0x1004, + SO_RCVTIMEO = 0x1006, + SO_REUSEADDR = 0x0004, + SO_SNDBUF = 0x1001, + SO_SNDLOWAT = 0x1003, + SO_SNDTIMEO = 0x1005, + SO_TYPE = 0x1008 + } + + enum + { + SOMAXCONN = 128 + } + + enum : uint + { + MSG_CTRUNC = 0x20, + MSG_DONTROUTE = 0x4, + MSG_EOR = 0x8, + MSG_OOB = 0x1, + MSG_PEEK = 0x2, + MSG_TRUNC = 0x10, + MSG_WAITALL = 0x40 + } + + enum + { + AF_INET = 2, + AF_UNIX = 1, + AF_UNSPEC = 0 + } + + enum + { + SHUT_RD = 0, + SHUT_WR = 1, + SHUT_RDWR = 2 + } + + int accept(int, sockaddr*, socklen_t*); + int bind(int, in sockaddr*, socklen_t); + int connect(int, in sockaddr*, socklen_t); + int getpeername(int, sockaddr*, socklen_t*); + int getsockname(int, sockaddr*, socklen_t*); + int getsockopt(int, int, int, void*, socklen_t*); + int listen(int, int); + ssize_t recv(int, void*, size_t, int); + ssize_t recvfrom(int, void*, size_t, int, sockaddr*, socklen_t*); + ssize_t recvmsg(int, msghdr*, int); + ssize_t send(int, in void*, size_t, int); + ssize_t sendmsg(int, in msghdr*, int); + ssize_t sendto(int, in void*, size_t, int, in sockaddr*, socklen_t); + int setsockopt(int, int, int, in void*, socklen_t); + int shutdown(int, int); + int socket(int, int, int); + int sockatmark(int); + int socketpair(int, int, int, int[2]); +} + +// +// IPV6 (IP6) +// +/* +AF_INET6 +*/ + +version( linux ) +{ + enum + { + AF_INET6 = 10 + } +} +else version( OSX ) +{ + enum + { + AF_INET6 = 30 + } +} +else version( freebsd ) +{ + enum + { + AF_INET6 = 28 + } +} + +// +// Raw Sockets (RS) +// +/* +SOCK_RAW +*/ + +version( linux ) +{ + enum + { + SOCK_RAW = 3 + } +} +else version( OSX ) +{ + enum + { + SOCK_RAW = 3 + } +} +else version( freebsd ) +{ + enum + { + SOCK_RAW = 3 + } +} diff --git a/druntime/import/core/sys/posix/sys/stat.d b/druntime/import/core/sys/posix/sys/stat.d new file mode 100644 index 00000000..2ab97b8b --- /dev/null +++ b/druntime/import/core/sys/posix/sys/stat.d @@ -0,0 +1,425 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.sys.time; + +private import core.sys.posix.config; +public import core.sys.posix.sys.types; // for time_t, suseconds_t +public import core.sys.posix.sys.select; // for fd_set, FD_CLR() FD_ISSET() FD_SET() FD_ZERO() FD_SETSIZE, select() + +extern (C): + +// +// XOpen (XSI) +// +/* +struct timeval +{ + time_t tv_sec; + suseconds_t tv_usec; +} + +struct itimerval +{ + timeval it_interval; + timeval it_value; +} + +ITIMER_REAL +ITIMER_VIRTUAL +ITIMER_PROF + +int getitimer(int, itimerval*); +int gettimeofday(timeval*, void*); +int select(int, fd_set*, fd_set*, fd_set*, timeval*); (defined in core.sys.posix.sys.signal) +int setitimer(int, in itimerval*, itimerval*); +int utimes(in char*, in timeval[2]); // LEGACY +*/ + +version( linux ) +{ + struct timeval + { + time_t tv_sec; + suseconds_t tv_usec; + } + + struct itimerval + { + timeval it_interval; + timeval it_value; + } + + enum ITIMER_REAL = 0; + enum ITIMER_VIRTUAL = 1; + enum ITIMER_PROF = 2; + + int getitimer(int, itimerval*); + int gettimeofday(timeval*, void*); + int setitimer(int, in itimerval*, itimerval*); + int utimes(in char*, in timeval[2]); // LEGACY +} +else version( OSX ) +{ + struct timeval + { + time_t tv_sec; + suseconds_t tv_usec; + } + + struct itimerval + { + timeval it_interval; + timeval it_value; + } + + // non-standard + struct timezone_t + { + int tz_minuteswest; + int tz_dsttime; + } + + int getitimer(int, itimerval*); + int gettimeofday(timeval*, timezone_t*); // timezone_t* is normally void* + int setitimer(int, in itimerval*, itimerval*); + int utimes(in char*, in timeval[2]); +} +else version( freebsd ) +{ + struct timeval + { + time_t tv_sec; + suseconds_t tv_usec; + } + + struct itimerval + { + timeval it_interval; + timeval it_value; + } + + // non-standard + struct timezone_t + { + int tz_minuteswest; + int tz_dsttime; + } + + int getitimer(int, itimerval*); + int gettimeofday(timeval*, timezone_t*); // timezone_t* is normally void* + int setitimer(int, in itimerval*, itimerval*); + int utimes(in char*, in timeval[2]); +} diff --git a/druntime/import/core/sys/posix/sys/types.d b/druntime/import/core/sys/posix/sys/types.d new file mode 100644 index 00000000..6a58fc66 --- /dev/null +++ b/druntime/import/core/sys/posix/sys/types.d @@ -0,0 +1,429 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.sys.uio; + +private import core.sys.posix.config; +public import core.sys.posix.sys.types; // for ssize_t, size_t + +extern (C): + +// +// Required +// +/* +struct iovec +{ + void* iov_base; + size_t iov_len; +} + +ssize_t // from core.sys.posix.sys.types +size_t // from core.sys.posix.sys.types + +ssize_t readv(int, in iovec*, int); +ssize_t writev(int, in iovec*, int); +*/ + +version( linux ) +{ + struct iovec + { + void* iov_base; + size_t iov_len; + } + + ssize_t readv(int, in iovec*, int); + ssize_t writev(int, in iovec*, int); +} +else version( OSX ) +{ + struct iovec + { + void* iov_base; + size_t iov_len; + } + + ssize_t readv(int, in iovec*, int); + ssize_t writev(int, in iovec*, int); +} +else version( freebsd ) +{ + struct iovec + { + void* iov_base; + size_t iov_len; + } + + ssize_t readv(int, in iovec*, int); + ssize_t writev(int, in iovec*, int); +} diff --git a/druntime/import/core/sys/posix/sys/wait.d b/druntime/import/core/sys/posix/sys/wait.d new file mode 100644 index 00000000..e97688cf --- /dev/null +++ b/druntime/import/core/sys/posix/sys/wait.d @@ -0,0 +1,141 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.termios; + +private import core.sys.posix.config; +public import core.sys.posix.sys.types; // for pid_t + +extern (C): + +// +// Required +// +/* +cc_t +speed_t +tcflag_t + +NCCS + +struct termios +{ + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_cflag; + tcflag_t c_lflag; + cc_t[NCCS] c_cc; +} + +VEOF +VEOL +VERASE +VINTR +VKILL +VMIN +VQUIT +VSTART +VSTOP +VSUSP +VTIME + +BRKINT +ICRNL +IGNBRK +IGNCR +IGNPAR +INLCR +INPCK +ISTRIP +IXOFF +IXON +PARMRK + +OPOST + +B0 +B50 +B75 +B110 +B134 +B150 +B200 +B300 +B600 +B1200 +B1800 +B2400 +B4800 +B9600 +B19200 +B38400 + +CSIZE + CS5 + CS6 + CS7 + CS8 +CSTOPB +CREAD +PARENB +PARODD +HUPCL +CLOCAL + +ECHO +ECHOE +ECHOK +ECHONL +ICANON +IEXTEN +ISIG +NOFLSH +TOSTOP + +TCSANOW +TCSADRAIN +TCSAFLUSH + +TCIFLUSH +TCIOFLUSH +TCOFLUSH + +TCIOFF +TCION +TCOOFF +TCOON + +speed_t cfgetispeed(in termios*); +speed_t cfgetospeed(in termios*); +int cfsetispeed(termios*, speed_t); +int cfsetospeed(termios*, speed_t); +int tcdrain(int); +int tcflow(int, int); +int tcflush(int, int); +int tcgetattr(int, termios*); +int tcsendbreak(int, int); +int tcsetattr(int, int, in termios*); +*/ + +version( OSX ) +{ + alias ubyte cc_t; + alias uint speed_t; + alias uint tcflag_t; + + enum NCCS = 20; + + struct termios + { + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_cflag; + tcflag_t c_lflag; + cc_t[NCCS] c_cc; + speed_t c_ispeed; + speed_t c_ospeed; + } + + enum VEOF = 0; + enum VEOL = 1; + enum VERASE = 3; + enum VINTR = 8; + enum VKILL = 5; + enum VMIN = 16; + enum VQUIT = 9; + enum VSTART = 12; + enum VSTOP = 13; + enum VSUSP = 10; + enum VTIME = 17; + + enum BRKINT = 0x0000002; + enum ICRNL = 0x0000100; + enum IGNBRK = 0x0000001; + enum IGNCR = 0x0000080; + enum IGNPAR = 0x0000004; + enum INLCR = 0x0000040; + enum INPCK = 0x0000010; + enum ISTRIP = 0x0000020; + enum IXOFF = 0x0000400; + enum IXON = 0x0000200; + enum PARMRK = 0x0000008; + + enum OPOST = 0x0000001; + + enum B0 = 0; + enum B50 = 50; + enum B75 = 75; + enum B110 = 110; + enum B134 = 134; + enum B150 = 150; + enum B200 = 200; + enum B300 = 300; + enum B600 = 600; + enum B1200 = 1200; + enum B1800 = 1800; + enum B2400 = 2400; + enum B4800 = 4800; + enum B9600 = 9600; + enum B19200 = 19200; + enum B38400 = 38400; + + enum CSIZE = 0x0000300; + enum CS5 = 0x0000000; + enum CS6 = 0x0000100; + enum CS7 = 0x0000200; + enum CS8 = 0x0000300; + enum CSTOPB = 0x0000400; + enum CREAD = 0x0000800; + enum PARENB = 0x0001000; + enum PARODD = 0x0002000; + enum HUPCL = 0x0004000; + enum CLOCAL = 0x0008000; + + enum ECHO = 0x00000008; + enum ECHOE = 0x00000002; + enum ECHOK = 0x00000004; + enum ECHONL = 0x00000010; + enum ICANON = 0x00000100; + enum IEXTEN = 0x00000400; + enum ISIG = 0x00000080; + enum NOFLSH = 0x80000000; + enum TOSTOP = 0x00400000; + + enum TCSANOW = 0; + enum TCSADRAIN = 1; + enum TCSAFLUSH = 2; + + enum TCIFLUSH = 1; + enum TCOFLUSH = 2; + enum TCIOFLUSH = 3; + + enum TCIOFF = 3; + enum TCION = 4; + enum TCOOFF = 1; + enum TCOON = 2; + + speed_t cfgetispeed(in termios*); + speed_t cfgetospeed(in termios*); + int cfsetispeed(termios*, speed_t); + int cfsetospeed(termios*, speed_t); + int tcdrain(int); + int tcflow(int, int); + int tcflush(int, int); + int tcgetattr(int, termios*); + int tcsendbreak(int, int); + int tcsetattr(int, int, in termios*); + +} +else version( linux ) +{ + alias ubyte cc_t; + alias uint speed_t; + alias uint tcflag_t; + + enum NCCS = 32; + + struct termios + { + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_cflag; + tcflag_t c_lflag; + cc_t c_line; + cc_t[NCCS] c_cc; + speed_t c_ispeed; + speed_t c_ospeed; + } + + enum VEOF = 4; + enum VEOL = 11; + enum VERASE = 2; + enum VINTR = 0; + enum VKILL = 3; + enum VMIN = 6; + enum VQUIT = 1; + enum VSTART = 8; + enum VSTOP = 9; + enum VSUSP = 10; + enum VTIME = 5; + + enum BRKINT = 0000002; + enum ICRNL = 0000400; + enum IGNBRK = 0000001; + enum IGNCR = 0000200; + enum IGNPAR = 0000004; + enum INLCR = 0000100; + enum INPCK = 0000020; + enum ISTRIP = 0000040; + enum IXOFF = 0010000; + enum IXON = 0002000; + enum PARMRK = 0000010; + + enum OPOST = 0000001; + + enum B0 = 0000000; + enum B50 = 0000001; + enum B75 = 0000002; + enum B110 = 0000003; + enum B134 = 0000004; + enum B150 = 0000005; + enum B200 = 0000006; + enum B300 = 0000007; + enum B600 = 0000010; + enum B1200 = 0000011; + enum B1800 = 0000012; + enum B2400 = 0000013; + enum B4800 = 0000014; + enum B9600 = 0000015; + enum B19200 = 0000016; + enum B38400 = 0000017; + + enum CSIZE = 0000060; + enum CS5 = 0000000; + enum CS6 = 0000020; + enum CS7 = 0000040; + enum CS8 = 0000060; + enum CSTOPB = 0000100; + enum CREAD = 0000200; + enum PARENB = 0000400; + enum PARODD = 0001000; + enum HUPCL = 0002000; + enum CLOCAL = 0004000; + + enum ECHO = 0000010; + enum ECHOE = 0000020; + enum ECHOK = 0000040; + enum ECHONL = 0000100; + enum ICANON = 0000002; + enum IEXTEN = 0100000; + enum ISIG = 0000001; + enum NOFLSH = 0000200; + enum TOSTOP = 0000400; + + enum TCSANOW = 0; + enum TCSADRAIN = 1; + enum TCSAFLUSH = 2; + + enum TCIFLUSH = 0; + enum TCOFLUSH = 1; + enum TCIOFLUSH = 2; + + enum TCIOFF = 2; + enum TCION = 3; + enum TCOOFF = 0; + enum TCOON = 1; + + speed_t cfgetispeed(in termios*); + speed_t cfgetospeed(in termios*); + int cfsetispeed(termios*, speed_t); + int cfsetospeed(termios*, speed_t); + int tcdrain(int); + int tcflow(int, int); + int tcflush(int, int); + int tcgetattr(int, termios*); + int tcsendbreak(int, int); + int tcsetattr(int, int, in termios*); +} +else version ( freebsd ) +{ + alias ubyte cc_t; + alias uint speed_t; + alias uint tcflag_t; + + enum NCCS = 20; + + struct termios + { + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_cflag; + tcflag_t c_lflag; + cc_t[NCCS] c_cc; + speed_t c_ispeed; + speed_t c_ospeed; + } + + enum VEOF = 0; + enum VEOL = 1; + enum VERASE = 3; + enum VINTR = 8; + enum VKILL = 5; + enum VMIN = 16; + enum VQUIT = 9; + enum VSTART = 12; + enum VSTOP = 13; + enum VSUSP = 10; + enum VTIME = 17; + + enum BRKINT = 0x0000002; + enum ICRNL = 0x0000100; + enum IGNBRK = 0x0000001; + enum IGNCR = 0x0000080; + enum IGNPAR = 0x0000004; + enum INLCR = 0x0000040; + enum INPCK = 0x0000010; + enum ISTRIP = 0x0000020; + enum IXOFF = 0x0000400; + enum IXON = 0x0000200; + enum PARMRK = 0x0000008; + + enum OPOST = 0x0000001; + + enum B0 = 0; + enum B50 = 50; + enum B75 = 75; + enum B110 = 110; + enum B134 = 134; + enum B150 = 150; + enum B200 = 200; + enum B300 = 300; + enum B600 = 600; + enum B1200 = 1200; + enum B1800 = 1800; + enum B2400 = 2400; + enum B4800 = 4800; + enum B9600 = 9600; + enum B19200 = 19200; + enum B38400 = 38400; + + enum CSIZE = 0x0000300; + enum CS5 = 0x0000000; + enum CS6 = 0x0000100; + enum CS7 = 0x0000200; + enum CS8 = 0x0000300; + enum CSTOPB = 0x0000400; + enum CREAD = 0x0000800; + enum PARENB = 0x0001000; + enum PARODD = 0x0002000; + enum HUPCL = 0x0004000; + enum CLOCAL = 0x0008000; + + enum ECHO = 0x00000008; + enum ECHOE = 0x00000002; + enum ECHOK = 0x00000004; + enum ECHONL = 0x00000010; + enum ICANON = 0x00000100; + enum IEXTEN = 0x00000400; + enum ISIG = 0x00000080; + enum NOFLSH = 0x80000000; + enum TOSTOP = 0x00400000; + + enum TCSANOW = 0; + enum TCSADRAIN = 1; + enum TCSAFLUSH = 2; + + enum TCIFLUSH = 1; + enum TCOFLUSH = 2; + enum TCIOFLUSH = 3; + + enum TCIOFF = 3; + enum TCION = 4; + enum TCOOFF = 1; + enum TCOON = 2; + + speed_t cfgetispeed(in termios*); + speed_t cfgetospeed(in termios*); + int cfsetispeed(termios*, speed_t); + int cfsetospeed(termios*, speed_t); + int tcdrain(int); + int tcflow(int, int); + int tcflush(int, int); + int tcgetattr(int, termios*); + int tcsendbreak(int, int); + int tcsetattr(int, int, in termios*); + +} + +// +// XOpen (XSI) +// +/* +IXANY + +ONLCR +OCRNL +ONOCR +ONLRET +OFILL +NLDLY + NL0 + NL1 +CRDLY + CR0 + CR1 + CR2 + CR3 +TABDLY + TAB0 + TAB1 + TAB2 + TAB3 +BSDLY + BS0 + BS1 +VTDLY + VT0 + VT1 +FFDLY + FF0 + FF1 + +pid_t tcgetsid(int); +*/ + +version( linux ) +{ + enum IXANY = 0004000; + + enum ONLCR = 0000004; + enum OCRNL = 0000010; + enum ONOCR = 0000020; + enum ONLRET = 0000040; + enum OFILL = 0000100; + enum NLDLY = 0000400; + enum NL0 = 0000000; + enum NL1 = 0000400; + enum CRDLY = 0003000; + enum CR0 = 0000000; + enum CR1 = 0001000; + enum CR2 = 0002000; + enum CR3 = 0003000; + enum TABDLY = 0014000; + enum TAB0 = 0000000; + enum TAB1 = 0004000; + enum TAB2 = 0010000; + enum TAB3 = 0014000; + enum BSDLY = 0020000; + enum BS0 = 0000000; + enum BS1 = 0020000; + enum VTDLY = 0040000; + enum VT0 = 0000000; + enum VT1 = 0040000; + enum FFDLY = 0100000; + enum FF0 = 0000000; + enum FF1 = 0100000; + + pid_t tcgetsid(int); +} diff --git a/druntime/import/core/sys/posix/time.d b/druntime/import/core/sys/posix/time.d new file mode 100644 index 00000000..1cbed254 --- /dev/null +++ b/druntime/import/core/sys/posix/time.d @@ -0,0 +1,270 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.ucontext; + +private import core.sys.posix.config; +public import core.sys.posix.signal; // for sigset_t, stack_t + +extern (C): + +// +// XOpen (XSI) +// +/* +mcontext_t + +struct ucontext_t +{ + ucontext_t* uc_link; + sigset_t uc_sigmask; + stack_t uc_stack; + mcontext_t uc_mcontext; +} +*/ + +version( linux ) +{ + + version( X86_64 ) + { + private + { + struct _libc_fpxreg + { + ushort[4] significand; + ushort exponent; + ushort[3] padding; + } + + struct _libc_xmmreg + { + uint[4] element; + } + + struct _libc_fpstate + { + ushort cwd; + ushort swd; + ushort ftw; + ushort fop; + ulong rip; + ulong rdp; + uint mxcsr; + uint mxcr_mask; + _libc_fpxreg[8] _st; + _libc_xmmreg[16] _xmm; + uint[24] padding; + } + + enum NGREG = 23; + + alias c_long greg_t; + alias greg_t[NGREG] gregset_t; + alias _libc_fpstate* fpregset_t; + } + + struct mcontext_t + { + gregset_t gregs; + fpregset_t fpregs; + c_ulong[8] __reserved1; + } + + struct ucontext_t + { + c_ulong uc_flags; + ucontext_t* uc_link; + stack_t uc_stack; + mcontext_t uc_mcontext; + sigset_t uc_sigmask; + _libc_fpstate __fpregs_mem; + } + } + else version( X86 ) + { + private + { + struct _libc_fpreg + { + ushort[4] significand; + ushort exponent; + } + + struct _libc_fpstate + { + c_ulong cw; + c_ulong sw; + c_ulong tag; + c_ulong ipoff; + c_ulong cssel; + c_ulong dataoff; + c_ulong datasel; + _libc_fpreg[8] _st; + c_ulong status; + } + + enum NGREG = 19; + + alias int greg_t; + alias greg_t[NGREG] gregset_t; + alias _libc_fpstate* fpregset_t; + } + + struct mcontext_t + { + gregset_t gregs; + fpregset_t fpregs; + c_ulong oldmask; + c_ulong cr2; + } + + struct ucontext_t + { + c_ulong uc_flags; + ucontext_t* uc_link; + stack_t uc_stack; + mcontext_t uc_mcontext; + sigset_t uc_sigmask; + _libc_fpstate __fpregs_mem; + } + } +} + +// +// Obsolescent (OB) +// +/* +int getcontext(ucontext_t*); +void makecontext(ucontext_t*, void function(), int, ...); +int setcontext(in ucontext_t*); +int swapcontext(ucontext_t*, in ucontext_t*); +*/ + +static if( is( ucontext_t ) ) +{ + int getcontext(ucontext_t*); + void makecontext(ucontext_t*, void function(), int, ...); + int setcontext(in ucontext_t*); + int swapcontext(ucontext_t*, in ucontext_t*); +} diff --git a/druntime/import/core/sys/posix/unistd.d b/druntime/import/core/sys/posix/unistd.d new file mode 100644 index 00000000..e8d85125 --- /dev/null +++ b/druntime/import/core/sys/posix/unistd.d @@ -0,0 +1,599 @@ +/** + * D header file for POSIX. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * Standards: The Open Group Base Specifications Issue 6, IEEE Std 1003.1, 2004 Edition + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sys.posix.utime; + +private import core.sys.posix.config; +public import core.sys.posix.sys.types; // for time_t + +extern (C): + +// +// Required +// +/* +struct utimbuf +{ + time_t actime; + time_t modtime; +} + +int utime(in char*, in utimbuf*); +*/ + +version( linux ) +{ + struct utimbuf + { + time_t actime; + time_t modtime; + } + + int utime(in char*, in utimbuf*); +} +else version( OSX ) +{ + struct utimbuf + { + time_t actime; + time_t modtime; + } + + int utime(in char*, in utimbuf*); +} +else version( freebsd ) +{ + struct utimbuf + { + time_t actime; + time_t modtime; + } + + int utime(in char*, in utimbuf*); +} diff --git a/druntime/import/core/sys/windows/windows.d b/druntime/import/core/sys/windows/windows.d new file mode 100644 index 00000000..65132240 --- /dev/null +++ b/druntime/import/core/sys/windows/windows.d @@ -0,0 +1,3181 @@ +/** + * Windows is a registered trademark of Microsoft Corporation in the United + * States and other countries. + * + * Copyright: Copyright Digital Mars 2000 - 2009. + * License: + * bsr(x21) = 5 + */ +pure nothrow int bsr( uint v ); + + +/** + * Tests the bit. + */ +pure nothrow int bt( in uint* p, uint bitnum ); + + +/** + * Tests and complements the bit. + */ +nothrow int btc( uint* p, uint bitnum ); + + +/** + * Tests and resets (sets to 0) the bit. + */ +nothrow int btr( uint* p, uint bitnum ); + + +/** + * Tests and sets the bit. + * Params: + * p = a non-NULL pointer to an array of uints. + * index = a bit number, starting with bit 0 of p[0], + * and progressing. It addresses bits like the expression: +--- +p[index / (uint.sizeof*8)] & (1 << (index & ((uint.sizeof*8) - 1))) +--- + * Returns: + * A non-zero value if the bit was set, and a zero + * if it was clear. + * + * Example: + * --- +import std.intrinsic; + +int main() +{ + uint array[2]; + + array[0] = 2; + array[1] = 0x100; + + printf("btc(array, 35) = %d\n", btc(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("btc(array, 35) = %d\n", btc(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("bts(array, 35) = %d\n", bts(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("btr(array, 35) = %d\n", btr(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("bt(array, 1) = %d\n", bt(array, 1)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + return 0; +} + * --- + * Output: +
+btc(array, 35) = 0 +array = [0]:x2, [1]:x108 +btc(array, 35) = -1 +array = [0]:x2, [1]:x100 +bts(array, 35) = 0 +array = [0]:x2, [1]:x108 +btr(array, 35) = -1 +array = [0]:x2, [1]:x100 +bt(array, 1) = -1 +array = [0]:x2, [1]:x100 ++ */ +nothrow int bts( uint* p, uint bitnum ); + + +/** + * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes + * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3 + * becomes byte 0. + */ +pure nothrow uint bswap( uint v ); + + +/** + * Reads I/O port at port_address. + */ +nothrow ubyte inp( uint port_address ); + + +/** + * ditto + */ +nothrow ushort inpw( uint port_address ); + + +/** + * ditto + */ +nothrow uint inpl( uint port_address ); + + +/** + * Writes and returns value to I/O port at port_address. + */ +nothrow ubyte outp( uint port_address, ubyte value ); + + +/** + * ditto + */ +nothrow ushort outpw( uint port_address, ushort value ); + + +/** + * ditto + */ +nothrow uint outpl( uint port_address, uint value ); diff --git a/druntime/import/std/stdarg.di b/druntime/import/std/stdarg.di new file mode 100644 index 00000000..8fccb13a --- /dev/null +++ b/druntime/import/std/stdarg.di @@ -0,0 +1,32 @@ +/** + * These functions are built-in intrinsics to the compiler. + * + * Copyright: Public Domain + * License: Public Domain + * Authors: David Friedman + */ +module std.stdarg; + +version( GNU ) +{ + private import gcc.builtins; + alias __builtin_va_list va_list; + alias __builtin_va_end va_end; + alias __builtin_va_copy va_copy; +} + +template va_start(T) +{ + void va_start( out va_list ap, inout T parmn ) + { + + } +} + +template va_arg(T) +{ + T va_arg( inout va_list ap ) + { + return T.init; + } +} diff --git a/druntime/src/build-dmd.bat b/druntime/src/build-dmd.bat new file mode 100644 index 00000000..8e54facc --- /dev/null +++ b/druntime/src/build-dmd.bat @@ -0,0 +1,5 @@ +@echo off +set OLDHOME=%HOME% +set HOME=%CD% +make clean all -fdmd-win32.mak +set HOME=%OLDHOME% \ No newline at end of file diff --git a/druntime/src/build-dmd.sh b/druntime/src/build-dmd.sh new file mode 100644 index 00000000..fae34f47 --- /dev/null +++ b/druntime/src/build-dmd.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +OLDHOME=$HOME +export HOME=`pwd` + +goerror(){ + export HOME=$OLDHOME + echo "=" + echo "= *** Error ***" + echo "=" + exit 1 +} + +make clean -fdmd-posix.mak || goerror +make -fdmd-posix.mak || goerror +chmod 644 ../import/core/*.di || goerror +chmod 644 ../import/core/sync/*.di || goerror + +export HOME=$OLDHOME diff --git a/druntime/src/build-ldc.sh b/druntime/src/build-ldc.sh new file mode 100755 index 00000000..4a08fe0f --- /dev/null +++ b/druntime/src/build-ldc.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +OLDHOME=$HOME +export HOME=`pwd` + +goerror(){ + export HOME=$OLDHOME + echo "=" + echo "= *** Error ***" + echo "=" + exit 1 +} + +make clean -fldc-posix.mak || goerror +make -fldc-posix.mak || goerror +chmod 644 ../import/core/*.di || goerror +chmod 644 ../import/core/sync/*.di || goerror + +export HOME=$OLDHOME diff --git a/druntime/src/common/core/bitmanip.d b/druntime/src/common/core/bitmanip.d new file mode 100644 index 00000000..11b42a8b --- /dev/null +++ b/druntime/src/common/core/bitmanip.d @@ -0,0 +1,286 @@ +/** + * This module contains a collection of bit-level operations. + * + * Copyright: Copyright (c) 2005-2008, The D Runtime Project + * License: BSD Style, see LICENSE + * Authors: Walter Bright, Don Clugston, Sean Kelly + */ +module core.bitmanip; + + +version( DDoc ) +{ + /** + * Scans the bits in v starting with bit 0, looking + * for the first set bit. + * Returns: + * The bit number of the first bit set. + * The return value is undefined if v is zero. + */ + int bsf( uint v ); + + + /** + * Scans the bits in v from the most significant bit + * to the least significant bit, looking + * for the first set bit. + * Returns: + * The bit number of the first bit set. + * The return value is undefined if v is zero. + * Example: + * --- + * import core.bitmanip; + * + * int main() + * { + * uint v; + * int x; + * + * v = 0x21; + * x = bsf(v); + * printf("bsf(x%x) = %d\n", v, x); + * x = bsr(v); + * printf("bsr(x%x) = %d\n", v, x); + * return 0; + * } + * --- + * Output: + * bsf(x21) = 0
+ btc(array, 35) = 0 + array = [0]:x2, [1]:x108 + btc(array, 35) = -1 + array = [0]:x2, [1]:x100 + bts(array, 35) = 0 + array = [0]:x2, [1]:x108 + btr(array, 35) = -1 + array = [0]:x2, [1]:x100 + bt(array, 1) = -1 + array = [0]:x2, [1]:x100 ++ */ + int bts( uint* p, uint bitnum ); + + + /** + * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes + * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3 + * becomes byte 0. + */ + uint bswap( uint v ); + + + /** + * Reads I/O port at port_address. + */ + ubyte inp( uint port_address ); + + + /** + * ditto + */ + ushort inpw( uint port_address ); + + + /** + * ditto + */ + uint inpl( uint port_address ); + + + /** + * Writes and returns value to I/O port at port_address. + */ + ubyte outp( uint port_address, ubyte value ); + + + /** + * ditto + */ + ushort outpw( uint port_address, ushort value ); + + + /** + * ditto + */ + uint outpl( uint port_address, uint value ); +} +else +{ + public import std.intrinsic; +} + + +/** + * Calculates the number of set bits in a 32-bit integer. + */ +int popcnt( uint x ) +{ + // Avoid branches, and the potential for cache misses which + // could be incurred with a table lookup. + + // We need to mask alternate bits to prevent the + // sum from overflowing. + // add neighbouring bits. Each bit is 0 or 1. + x = x - ((x>>1) & 0x5555_5555); + // now each two bits of x is a number 00,01 or 10. + // now add neighbouring pairs + x = ((x&0xCCCC_CCCC)>>2) + (x&0x3333_3333); + // now each nibble holds 0000-0100. Adding them won't + // overflow any more, so we don't need to mask any more + + // Now add the nibbles, then the bytes, then the words + // We still need to mask to prevent double-counting. + // Note that if we used a rotate instead of a shift, we + // wouldn't need the masks, and could just divide the sum + // by 8 to account for the double-counting. + // On some CPUs, it may be faster to perform a multiply. + + x += (x>>4); + x &= 0x0F0F_0F0F; + x += (x>>8); + x &= 0x00FF_00FF; + x += (x>>16); + x &= 0xFFFF; + return x; +} + + +debug( UnitTest ) +{ + unittest + { + assert( popcnt( 0 ) == 0 ); + assert( popcnt( 7 ) == 3 ); + assert( popcnt( 0xAA )== 4 ); + assert( popcnt( 0x8421_1248 ) == 8 ); + assert( popcnt( 0xFFFF_FFFF ) == 32 ); + assert( popcnt( 0xCCCC_CCCC ) == 16 ); + assert( popcnt( 0x7777_7777 ) == 24 ); + } +} + + +/** + * Reverses the order of bits in a 32-bit integer. + */ +uint bitswap( uint x ) +{ + + version( D_InlineAsm_X86 ) + { + asm + { + // Author: Tiago Gasiba. + mov EDX, EAX; + shr EAX, 1; + and EDX, 0x5555_5555; + and EAX, 0x5555_5555; + shl EDX, 1; + or EAX, EDX; + mov EDX, EAX; + shr EAX, 2; + and EDX, 0x3333_3333; + and EAX, 0x3333_3333; + shl EDX, 2; + or EAX, EDX; + mov EDX, EAX; + shr EAX, 4; + and EDX, 0x0f0f_0f0f; + and EAX, 0x0f0f_0f0f; + shl EDX, 4; + or EAX, EDX; + bswap EAX; + } + } + else + { + // swap odd and even bits + x = ((x >> 1) & 0x5555_5555) | ((x & 0x5555_5555) << 1); + // swap consecutive pairs + x = ((x >> 2) & 0x3333_3333) | ((x & 0x3333_3333) << 2); + // swap nibbles + x = ((x >> 4) & 0x0F0F_0F0F) | ((x & 0x0F0F_0F0F) << 4); + // swap bytes + x = ((x >> 8) & 0x00FF_00FF) | ((x & 0x00FF_00FF) << 8); + // swap 2-byte long pairs + x = ( x >> 16 ) | ( x << 16); + return x; + + } +} + + +debug( UnitTest ) +{ + unittest + { + assert( bitswap( 0x8000_0100 ) == 0x0080_0001 ); + } +} diff --git a/druntime/src/common/core/bitop.d b/druntime/src/common/core/bitop.d new file mode 100644 index 00000000..912bdab5 --- /dev/null +++ b/druntime/src/common/core/bitop.d @@ -0,0 +1,290 @@ +/** + * This module contains a collection of bit-level operations. + * + * Copyright: Copyright Don Clugston 2005 - 2009. + * License: + * bsr(x21) = 5 + */ + int bsr( uint v ); + + + /** + * Tests the bit. + */ + int bt( uint* p, uint bitnum ); + + + /** + * Tests and complements the bit. + */ + int btc( uint* p, uint bitnum ); + + + /** + * Tests and resets (sets to 0) the bit. + */ + int btr( uint* p, uint bitnum ); + + + /** + * Tests and sets the bit. + * Params: + * p = a non-NULL pointer to an array of uints. + * index = a bit number, starting with bit 0 of p[0], + * and progressing. It addresses bits like the expression: + --- + p[index / (uint.sizeof*8)] & (1 << (index & ((uint.sizeof*8) - 1))) + --- + * Returns: + * A non-zero value if the bit was set, and a zero + * if it was clear. + * + * Example: + * --- + import core.bitop; + + int main() + { + uint array[2]; + + array[0] = 2; + array[1] = 0x100; + + printf("btc(array, 35) = %d\n", btc(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("btc(array, 35) = %d\n", btc(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("bts(array, 35) = %d\n", bts(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("btr(array, 35) = %d\n", btr(array, 35)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + printf("bt(array, 1) = %d\n", bt(array, 1)); + printf("array = [0]:x%x, [1]:x%x\n", array[0], array[1]); + + return 0; + } + * --- + * Output: +
+ btc(array, 35) = 0 + array = [0]:x2, [1]:x108 + btc(array, 35) = -1 + array = [0]:x2, [1]:x100 + bts(array, 35) = 0 + array = [0]:x2, [1]:x108 + btr(array, 35) = -1 + array = [0]:x2, [1]:x100 + bt(array, 1) = -1 + array = [0]:x2, [1]:x100 ++ */ + int bts( uint* p, uint bitnum ); + + + /** + * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes + * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3 + * becomes byte 0. + */ + uint bswap( uint v ); + + + /** + * Reads I/O port at port_address. + */ + ubyte inp( uint port_address ); + + + /** + * ditto + */ + ushort inpw( uint port_address ); + + + /** + * ditto + */ + uint inpl( uint port_address ); + + + /** + * Writes and returns value to I/O port at port_address. + */ + ubyte outp( uint port_address, ubyte value ); + + + /** + * ditto + */ + ushort outpw( uint port_address, ushort value ); + + + /** + * ditto + */ + uint outpl( uint port_address, uint value ); +} +else +{ + public import std.intrinsic; +} + + +/** + * Calculates the number of set bits in a 32-bit integer. + */ +int popcnt( uint x ) +{ + // Avoid branches, and the potential for cache misses which + // could be incurred with a table lookup. + + // We need to mask alternate bits to prevent the + // sum from overflowing. + // add neighbouring bits. Each bit is 0 or 1. + x = x - ((x>>1) & 0x5555_5555); + // now each two bits of x is a number 00,01 or 10. + // now add neighbouring pairs + x = ((x&0xCCCC_CCCC)>>2) + (x&0x3333_3333); + // now each nibble holds 0000-0100. Adding them won't + // overflow any more, so we don't need to mask any more + + // Now add the nibbles, then the bytes, then the words + // We still need to mask to prevent double-counting. + // Note that if we used a rotate instead of a shift, we + // wouldn't need the masks, and could just divide the sum + // by 8 to account for the double-counting. + // On some CPUs, it may be faster to perform a multiply. + + x += (x>>4); + x &= 0x0F0F_0F0F; + x += (x>>8); + x &= 0x00FF_00FF; + x += (x>>16); + x &= 0xFFFF; + return x; +} + + +debug( UnitTest ) +{ + unittest + { + assert( popcnt( 0 ) == 0 ); + assert( popcnt( 7 ) == 3 ); + assert( popcnt( 0xAA )== 4 ); + assert( popcnt( 0x8421_1248 ) == 8 ); + assert( popcnt( 0xFFFF_FFFF ) == 32 ); + assert( popcnt( 0xCCCC_CCCC ) == 16 ); + assert( popcnt( 0x7777_7777 ) == 24 ); + } +} + + +/** + * Reverses the order of bits in a 32-bit integer. + */ +uint bitswap( uint x ) +{ + + version( D_InlineAsm_X86 ) + { + asm + { + // Author: Tiago Gasiba. + mov EDX, EAX; + shr EAX, 1; + and EDX, 0x5555_5555; + and EAX, 0x5555_5555; + shl EDX, 1; + or EAX, EDX; + mov EDX, EAX; + shr EAX, 2; + and EDX, 0x3333_3333; + and EAX, 0x3333_3333; + shl EDX, 2; + or EAX, EDX; + mov EDX, EAX; + shr EAX, 4; + and EDX, 0x0f0f_0f0f; + and EAX, 0x0f0f_0f0f; + shl EDX, 4; + or EAX, EDX; + bswap EAX; + } + } + else + { + // swap odd and even bits + x = ((x >> 1) & 0x5555_5555) | ((x & 0x5555_5555) << 1); + // swap consecutive pairs + x = ((x >> 2) & 0x3333_3333) | ((x & 0x3333_3333) << 2); + // swap nibbles + x = ((x >> 4) & 0x0F0F_0F0F) | ((x & 0x0F0F_0F0F) << 4); + // swap bytes + x = ((x >> 8) & 0x00FF_00FF) | ((x & 0x00FF_00FF) << 8); + // swap 2-byte long pairs + x = ( x >> 16 ) | ( x << 16); + return x; + + } +} + + +debug( UnitTest ) +{ + unittest + { + assert( bitswap( 0x8000_0100 ) == 0x0080_0001 ); + } +} diff --git a/druntime/src/common/core/exception.d b/druntime/src/common/core/exception.d new file mode 100644 index 00000000..1741746e --- /dev/null +++ b/druntime/src/common/core/exception.d @@ -0,0 +1,279 @@ +/** + * The exception module defines all system-level exceptions and provides a + * mechanism to alter system-level error handling. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.runtime; + + +private +{ + extern (C) bool rt_isHalting(); + + alias bool function() ModuleUnitTester; + alias bool function(Object) CollectHandler; + alias Exception.TraceInfo function( void* ptr = null ) TraceHandler; + + extern (C) void rt_setCollectHandler( CollectHandler h ); + extern (C) void rt_setTraceHandler( TraceHandler h ); + + alias void delegate( Throwable ) ExceptionHandler; + extern (C) bool rt_init( ExceptionHandler dg = null ); + extern (C) bool rt_term( ExceptionHandler dg = null ); + + extern (C) void* rt_loadLibrary( in char[] name ); + extern (C) bool rt_unloadLibrary( void* ptr ); +} + + +/////////////////////////////////////////////////////////////////////////////// +// Runtime +/////////////////////////////////////////////////////////////////////////////// + + +/** + * This struct encapsulates all functionality related to the underlying runtime + * module for the calling context. + */ +struct Runtime +{ + /** + * Initializes the runtime. This call is to be used in instances where the + * standard program initialization process is not executed. This is most + * often in shared libraries or in libraries linked to a C program. + * + * Params: + * dg = A delegate which will receive any exception thrown during the + * initialization process or null if such exceptions should be + * discarded. + * + * Returns: + * true if initialization succeeds and false if initialization fails. + */ + static bool initialize( ExceptionHandler dg = null ) + { + return rt_init( dg ); + } + + + /** + * Terminates the runtime. This call is to be used in instances where the + * standard program termination process will not be not executed. This is + * most often in shared libraries or in libraries linked to a C program. + * + * Params: + * dg = A delegate which will receive any exception thrown during the + * termination process or null if such exceptions should be + * discarded. + * + * Returns: + * true if termination succeeds and false if termination fails. + */ + static bool terminate( ExceptionHandler dg = null ) + { + return rt_term( dg ); + } + + + /** + * Returns true if the runtime is halting. Under normal circumstances, + * this will be set between the time that normal application code has + * exited and before module dtors are called. + * + * Returns: + * true if the runtime is halting. + */ + static bool isHalting() + { + return rt_isHalting(); + } + + + /** + * Locates a dynamic library with the supplied library name and dynamically + * loads it into the caller's address space. If the library contains a D + * runtime it will be integrated with the current runtime. + * + * Params: + * name = The name of the dynamic library to load. + * + * Returns: + * A reference to the library or null on error. + */ + static void* loadLibrary( in char[] name ) + { + return rt_loadLibrary( name ); + } + + + /** + * Unloads the dynamic library referenced by p. If this library contains a + * D runtime then any necessary finalization or cleanup of that runtime + * will be performed. + * + * Params: + * p = A reference to the library to unload. + */ + static bool unloadLibrary( void* p ) + { + return rt_unloadLibrary( p ); + } + + + /** + * Overrides the default trace mechanism with s user-supplied version. A + * trace represents the context from which an exception was thrown, and the + * trace handler will be called when this occurs. The pointer supplied to + * this routine indicates the base address from which tracing should occur. + * If the supplied pointer is null then the trace routine should determine + * an appropriate calling context from which to begin the trace. + * + * Params: + * h = The new trace handler. Set to null to use the default handler. + */ + static void traceHandler( TraceHandler h ) + { + rt_setTraceHandler( h ); + } + + + /** + * Overrides the default collect hander with a user-supplied version. This + * routine will be called for each resource object that is finalized in a + * non-deterministic manner--typically during a garbage collection cycle. + * If the supplied routine returns true then the object's dtor will called + * as normal, but if the routine returns false than the dtor will not be + * called. The default behavior is for all object dtors to be called. + * + * Params: + * h = The new collect handler. Set to null to use the default handler. + */ + static void collectHandler( CollectHandler h ) + { + rt_setCollectHandler( h ); + } + + + /** + * Overrides the default module unit tester with a user-supplied version. + * This routine will be called once on program initialization. The return + * value of this routine indicates to the runtime whether the body of the + * program will be executed. + * + * Params: + * h = The new unit tester. Set to null to use the default unit tester. + */ + static void moduleUnitTester( ModuleUnitTester h ) + { + sm_moduleUnitTester = h; + } + + +private: + // Unit tests should only be run in single-threaded + __gshared ModuleUnitTester sm_moduleUnitTester = null; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Overridable Callbacks +/////////////////////////////////////////////////////////////////////////////// + + +/** + * This routine is called by the runtime to run module unit tests on startup. + * The user-supplied unit tester will be called if one has been supplied, + * otherwise all unit tests will be run in sequence. + * + * Returns: + * true if execution should continue after testing is complete and false if + * not. Default behavior is to return true. + */ +extern (C) bool runModuleUnitTests() +{ + if( Runtime.sm_moduleUnitTester is null ) + { + foreach( m; ModuleInfo ) + { + if( m.unitTest ) + m.unitTest(); + } + return true; + } + return Runtime.sm_moduleUnitTester(); +} diff --git a/druntime/src/common/core/stdc/errno.c b/druntime/src/common/core/stdc/errno.c new file mode 100644 index 00000000..27446152 --- /dev/null +++ b/druntime/src/common/core/stdc/errno.c @@ -0,0 +1,26 @@ +/** + * This file contains wrapper functions for macro-defined C rouines. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sync.barrier; + + +public import core.sync.exception; +private import core.sync.condition; +private import core.sync.mutex; + +version( Win32 ) +{ + private import core.sys.windows.windows; +} +else version( Posix ) +{ + private import core.stdc.errno; + private import core.sys.posix.pthread; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Barrier +// +// void wait(); +//////////////////////////////////////////////////////////////////////////////// + + +/** + * This class represents a barrier across which threads may only travel in + * groups of a specific size. + */ +class Barrier +{ + //////////////////////////////////////////////////////////////////////////// + // Initialization + //////////////////////////////////////////////////////////////////////////// + + + /** + * Initializes a barrier object which releases threads in groups of limit + * in size. + * + * Params: + * limit = The number of waiting threads to release in unison. + * + * Throws: + * SyncException on error. + */ + this( uint limit ) + in + { + assert( limit > 0 ); + } + body + { + m_lock = new Mutex; + m_cond = new Condition( m_lock ); + m_group = 0; + m_limit = limit; + m_count = limit; + } + + + //////////////////////////////////////////////////////////////////////////// + // General Actions + //////////////////////////////////////////////////////////////////////////// + + + /** + * Wait for the pre-determined number of threads and then proceed. + * + * Throws: + * SyncException on error. + */ + void wait() + { + synchronized( m_lock ) + { + uint group = m_group; + + if( --m_count == 0 ) + { + m_group++; + m_count = m_limit; + m_cond.notifyAll(); + } + while( group == m_group ) + m_cond.wait(); + } + } + + +private: + Mutex m_lock; + Condition m_cond; + uint m_group; + uint m_limit; + uint m_count; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Unit Tests +//////////////////////////////////////////////////////////////////////////////// + + +version( unittest ) +{ + private import core.thread; + + + unittest + { + int numThreads = 10; + auto barrier = new Barrier( numThreads ); + auto synInfo = new Object; + int numReady = 0; + int numPassed = 0; + + void threadFn() + { + synchronized( synInfo ) + { + ++numReady; + } + barrier.wait(); + synchronized( synInfo ) + { + ++numPassed; + } + } + + auto group = new ThreadGroup; + + for( int i = 0; i < numThreads; ++i ) + { + group.create( &threadFn ); + } + group.joinAll(); + assert( numReady == numThreads && numPassed == numThreads ); + } +} diff --git a/druntime/src/common/core/sync/condition.d b/druntime/src/common/core/sync/condition.d new file mode 100644 index 00000000..be2b5d20 --- /dev/null +++ b/druntime/src/common/core/sync/condition.d @@ -0,0 +1,573 @@ +/** + * The condition module provides a primitive for synchronized condition + * checking. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: = 0 ); + } + body + { + version( Win32 ) + { + enum : uint + { + TICKS_PER_MILLI = 10_000, + MAX_WAIT_MILLIS = uint.max - 1 + } + + period /= TICKS_PER_MILLI; + if( period > MAX_WAIT_MILLIS ) + period = MAX_WAIT_MILLIS; + return timedWait( cast(uint) period ); + } + else version( Posix ) + { + timespec t = void; + mktspec( t, period ); + + int rc = pthread_cond_timedwait( &m_hndl, m_mutexAddr, &t ); + if( !rc ) + return true; + if( rc == ETIMEDOUT ) + return false; + throw new SyncException( "Unable to wait for condition" ); + } + } + + /** + * Notifies one waiter. + * + * Throws: + * SyncException on error. + */ + void notify() + { + version( Win32 ) + { + notify( false ); + } + else version( Posix ) + { + int rc = pthread_cond_signal( &m_hndl ); + if( rc ) + throw new SyncException( "Unable to notify condition" ); + } + } + + + /** + * Notifies all waiters. + * + * Throws: + * SyncException on error. + */ + void notifyAll() + { + version( Win32 ) + { + notify( true ); + } + else version( Posix ) + { + int rc = pthread_cond_broadcast( &m_hndl ); + if( rc ) + throw new SyncException( "Unable to notify condition" ); + } + } + + +private: + version( Win32 ) + { + bool timedWait( DWORD timeout ) + { + int numSignalsLeft; + int numWaitersGone; + DWORD rc; + + rc = WaitForSingleObject( m_blockLock, INFINITE ); + assert( rc == WAIT_OBJECT_0 ); + + m_numWaitersBlocked++; + + rc = ReleaseSemaphore( m_blockLock, 1, null ); + assert( rc ); + + m_assocMutex.unlock(); + scope(failure) m_assocMutex.lock(); + + rc = WaitForSingleObject( m_blockQueue, timeout ); + assert( rc == WAIT_OBJECT_0 || rc == WAIT_TIMEOUT ); + bool timedOut = (rc == WAIT_TIMEOUT); + + EnterCriticalSection( &m_unblockLock ); + scope(failure) LeaveCriticalSection( &m_unblockLock ); + + if( (numSignalsLeft = m_numWaitersToUnblock) != 0 ) + { + if ( timedOut ) + { + // timeout (or canceled) + if( m_numWaitersBlocked != 0 ) + { + m_numWaitersBlocked--; + // do not unblock next waiter below (already unblocked) + numSignalsLeft = 0; + } + else + { + // spurious wakeup pending!! + m_numWaitersGone = 1; + } + } + if( --m_numWaitersToUnblock == 0 ) + { + if( m_numWaitersBlocked != 0 ) + { + // open the gate + rc = ReleaseSemaphore( m_blockLock, 1, null ); + assert( rc ); + // do not open the gate below again + numSignalsLeft = 0; + } + else if( (numWaitersGone = m_numWaitersGone) != 0 ) + { + m_numWaitersGone = 0; + } + } + } + else if( ++m_numWaitersGone == int.max / 2 ) + { + // timeout/canceled or spurious event :-) + rc = WaitForSingleObject( m_blockLock, INFINITE ); + assert( rc == WAIT_OBJECT_0 ); + // something is going on here - test of timeouts? + m_numWaitersBlocked -= m_numWaitersGone; + rc = ReleaseSemaphore( m_blockLock, 1, null ); + assert( rc == WAIT_OBJECT_0 ); + m_numWaitersGone = 0; + } + + LeaveCriticalSection( &m_unblockLock ); + + if( numSignalsLeft == 1 ) + { + // better now than spurious later (same as ResetEvent) + for( ; numWaitersGone > 0; --numWaitersGone ) + { + rc = WaitForSingleObject( m_blockQueue, INFINITE ); + assert( rc == WAIT_OBJECT_0 ); + } + // open the gate + rc = ReleaseSemaphore( m_blockLock, 1, null ); + assert( rc ); + } + else if( numSignalsLeft != 0 ) + { + // unblock next waiter + rc = ReleaseSemaphore( m_blockQueue, 1, null ); + assert( rc ); + } + m_assocMutex.lock(); + return !timedOut; + } + + + void notify( bool all ) + { + DWORD rc; + + EnterCriticalSection( &m_unblockLock ); + scope(failure) LeaveCriticalSection( &m_unblockLock ); + + if( m_numWaitersToUnblock != 0 ) + { + if( m_numWaitersBlocked == 0 ) + { + LeaveCriticalSection( &m_unblockLock ); + return; + } + if( all ) + { + m_numWaitersToUnblock += m_numWaitersBlocked; + m_numWaitersBlocked = 0; + } + else + { + m_numWaitersToUnblock++; + m_numWaitersBlocked--; + } + LeaveCriticalSection( &m_unblockLock ); + } + else if( m_numWaitersBlocked > m_numWaitersGone ) + { + rc = WaitForSingleObject( m_blockLock, INFINITE ); + assert( rc == WAIT_OBJECT_0 ); + if( 0 != m_numWaitersGone ) + { + m_numWaitersBlocked -= m_numWaitersGone; + m_numWaitersGone = 0; + } + if( all ) + { + m_numWaitersToUnblock = m_numWaitersBlocked; + m_numWaitersBlocked = 0; + } + else + { + m_numWaitersToUnblock = 1; + m_numWaitersBlocked--; + } + LeaveCriticalSection( &m_unblockLock ); + rc = ReleaseSemaphore( m_blockQueue, 1, null ); + assert( rc ); + } + else + { + LeaveCriticalSection( &m_unblockLock ); + } + } + + + // NOTE: This implementation uses Algorithm 8c as described here: + // http://groups.google.com/group/comp.programming.threads/ + // browse_frm/thread/1692bdec8040ba40/e7a5f9d40e86503a + HANDLE m_blockLock; // auto-reset event (now semaphore) + HANDLE m_blockQueue; // auto-reset event (now semaphore) + Mutex m_assocMutex; // external mutex/CS + CRITICAL_SECTION m_unblockLock; // internal mutex/CS + int m_numWaitersGone = 0; + int m_numWaitersBlocked = 0; + int m_numWaitersToUnblock = 0; + } + else version( Posix ) + { + pthread_cond_t m_hndl; + pthread_mutex_t* m_mutexAddr; + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// Unit Tests +//////////////////////////////////////////////////////////////////////////////// + + +version( unittest ) +{ + private import core.thread; + private import core.sync.mutex; + private import core.sync.semaphore; + + + void testNotify() + { + auto mutex = new Mutex; + auto condReady = new Condition( mutex ); + auto semDone = new Semaphore; + auto synLoop = new Object; + int numWaiters = 10; + int numTries = 10; + int numReady = 0; + int numTotal = 0; + int numDone = 0; + int numPost = 0; + + void waiter() + { + for( int i = 0; i < numTries; ++i ) + { + synchronized( mutex ) + { + while( numReady < 1 ) + { + condReady.wait(); + } + --numReady; + ++numTotal; + } + + synchronized( synLoop ) + { + ++numDone; + } + semDone.wait(); + } + } + + auto group = new ThreadGroup; + + for( int i = 0; i < numWaiters; ++i ) + group.create( &waiter ); + + for( int i = 0; i < numTries; ++i ) + { + for( int j = 0; j < numWaiters; ++j ) + { + synchronized( mutex ) + { + ++numReady; + condReady.notify(); + } + } + while( true ) + { + synchronized( synLoop ) + { + if( numDone >= numWaiters ) + break; + } + Thread.yield(); + } + for( int j = 0; j < numWaiters; ++j ) + { + semDone.notify(); + } + } + + group.joinAll(); + assert( numTotal == numWaiters * numTries ); + } + + + void testNotifyAll() + { + auto mutex = new Mutex; + auto condReady = new Condition( mutex ); + int numWaiters = 10; + int numReady = 0; + int numDone = 0; + bool alert = false; + + void waiter() + { + synchronized( mutex ) + { + ++numReady; + while( !alert ) + condReady.wait(); + ++numDone; + } + } + + auto group = new ThreadGroup; + + for( int i = 0; i < numWaiters; ++i ) + group.create( &waiter ); + + while( true ) + { + synchronized( mutex ) + { + if( numReady >= numWaiters ) + { + alert = true; + condReady.notifyAll(); + break; + } + } + Thread.yield(); + } + group.joinAll(); + assert( numReady == numWaiters && numDone == numWaiters ); + } + + + void testWaitTimeout() + { + auto mutex = new Mutex; + auto condReady = new Condition( mutex ); + bool waiting = false; + bool alertedOne = true; + bool alertedTwo = true; + + void waiter() + { + synchronized( mutex ) + { + waiting = true; + alertedOne = condReady.wait( 10_000_000 ); // 1s + alertedTwo = condReady.wait( 10_000_000 ); // 1s + } + } + + auto thread = new Thread( &waiter ); + thread.start(); + + while( true ) + { + synchronized( mutex ) + { + if( waiting ) + { + condReady.notify(); + break; + } + } + Thread.yield(); + } + thread.join(); + assert( waiting && alertedOne && !alertedTwo ); + } + + + unittest + { + testNotify(); + testNotifyAll(); + testWaitTimeout(); + } +} diff --git a/druntime/src/common/core/sync/config.d b/druntime/src/common/core/sync/config.d new file mode 100644 index 00000000..0dde726a --- /dev/null +++ b/druntime/src/common/core/sync/config.d @@ -0,0 +1,72 @@ +/** + * The config module contains utility routines and configuration information + * specific to this package. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: Boost License 1.0. + * Authors: Sean Kelly + * + * Copyright Sean Kelly 2005 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module core.sync.exception; + + +/** + * Base class for synchronization exceptions. + */ +class SyncException : Exception +{ + this( string msg ) + { + super( msg ); + } +} diff --git a/druntime/src/common/core/sync/mutex.d b/druntime/src/common/core/sync/mutex.d new file mode 100644 index 00000000..5c27c083 --- /dev/null +++ b/druntime/src/common/core/sync/mutex.d @@ -0,0 +1,268 @@ +/** + * The mutex module provides a primitive for maintaining mutually exclusive + * access. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: 0 ) + m_writerQueue.notify(); + } + } + } + + + /** + * Attempts to acquire a read lock on the enclosing mutex. If one can + * be obtained without blocking, the lock is acquired and true is + * returned. If not, the lock is not acquired and false is returned. + * + * Returns: + * true if the lock was acquired and false if not. + */ + bool tryLock() + { + synchronized( m_commonMutex ) + { + if( shouldQueueReader() ) + return false; + ++m_numActiveReaders; + return true; + } + } + + + private: + bool shouldQueueReader() + { + if( m_numActiveWriters > 0 ) + return true; + + switch( m_policy ) + { + case Policy.PREFER_WRITERS: + return m_numQueuedWriters > 0; + + case Policy.PREFER_READERS: + default: + break; + } + + return false; + } + + struct MonitorProxy + { + Object.Monitor link; + } + + MonitorProxy m_proxy; + } + + + //////////////////////////////////////////////////////////////////////////// + // Writer + //////////////////////////////////////////////////////////////////////////// + + + /** + * This class can be considered a mutex in its own right, and is used to + * negotiate a write lock for the enclosing mutex. + */ + class Writer : + Object.Monitor + { + /** + * Initializes a read/write mutex writer proxy object. + */ + this() + { + m_proxy.link = this; + (cast(void**) this)[1] = &m_proxy; + } + + + /** + * Acquires a write lock on the enclosing mutex. + */ + void lock() + { + synchronized( m_commonMutex ) + { + ++m_numQueuedWriters; + scope(exit) --m_numQueuedWriters; + + while( shouldQueueWriter() ) + m_writerQueue.wait(); + ++m_numActiveWriters; + } + } + + + /** + * Releases a write lock on the enclosing mutex. + */ + void unlock() + { + synchronized( m_commonMutex ) + { + if( --m_numActiveWriters < 1 ) + { + switch( m_policy ) + { + default: + case Policy.PREFER_READERS: + if( m_numQueuedReaders > 0 ) + m_readerQueue.notifyAll(); + else if( m_numQueuedWriters > 0 ) + m_writerQueue.notify(); + break; + case Policy.PREFER_WRITERS: + if( m_numQueuedWriters > 0 ) + m_writerQueue.notify(); + else if( m_numQueuedReaders > 0 ) + m_readerQueue.notifyAll(); + } + } + } + } + + + /** + * Attempts to acquire a write lock on the enclosing mutex. If one can + * be obtained without blocking, the lock is acquired and true is + * returned. If not, the lock is not acquired and false is returned. + * + * Returns: + * true if the lock was acquired and false if not. + */ + bool tryLock() + { + synchronized( m_commonMutex ) + { + if( shouldQueueWriter() ) + return false; + ++m_numActiveWriters; + return true; + } + } + + + private: + bool shouldQueueWriter() + { + if( m_numActiveWriters > 0 || + m_numActiveReaders > 0 ) + return true; + switch( m_policy ) + { + case Policy.PREFER_READERS: + return m_numQueuedReaders > 0; + + case Policy.PREFER_WRITERS: + default: + break; + } + + return false; + } + + struct MonitorProxy + { + Object.Monitor link; + } + + MonitorProxy m_proxy; + } + + +private: + Policy m_policy; + Reader m_reader; + Writer m_writer; + + Mutex m_commonMutex; + Condition m_readerQueue; + Condition m_writerQueue; + + int m_numQueuedReaders; + int m_numActiveReaders; + int m_numQueuedWriters; + int m_numActiveWriters; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Unit Tests +//////////////////////////////////////////////////////////////////////////////// + + +version( unittest ) +{ + static if( !is( typeof( Thread ) ) ) + private import core.thread; + + + void testRead( ReadWriteMutex.Policy policy ) + { + auto mutex = new ReadWriteMutex( policy ); + auto synInfo = new Object; + int numThreads = 10; + int numReaders = 0; + int maxReaders = 0; + + void readerFn() + { + synchronized( mutex.reader() ) + { + synchronized( synInfo ) + { + if( ++numReaders > maxReaders ) + maxReaders = numReaders; + } + Thread.sleep( 100_000 ); // 1ms + synchronized( synInfo ) + { + --numReaders; + } + } + } + + auto group = new ThreadGroup; + + for( int i = 0; i < numThreads; ++i ) + { + group.create( &readerFn ); + } + group.joinAll(); + assert( numReaders < 1 && maxReaders > 1 ); + } + + + void testReadWrite( ReadWriteMutex.Policy policy ) + { + auto mutex = new ReadWriteMutex( policy ); + auto synInfo = new Object; + int numThreads = 10; + int numReaders = 0; + int numWriters = 0; + int maxReaders = 0; + int maxWriters = 0; + int numTries = 20; + + void readerFn() + { + for( int i = 0; i < numTries; ++i ) + { + synchronized( mutex.reader() ) + { + synchronized( synInfo ) + { + if( ++numReaders > maxReaders ) + maxReaders = numReaders; + } + Thread.sleep( 100_000 ); // 1ms + synchronized( synInfo ) + { + --numReaders; + } + } + } + } + + void writerFn() + { + for( int i = 0; i < numTries; ++i ) + { + synchronized( mutex.writer() ) + { + synchronized( synInfo ) + { + if( ++numWriters > maxWriters ) + maxWriters = numWriters; + } + Thread.sleep( 100_000 ); // 1ms + synchronized( synInfo ) + { + --numWriters; + } + } + } + } + + auto group = new ThreadGroup; + + for( int i = 0; i < numThreads; ++i ) + { + group.create( &readerFn ); + group.create( &writerFn ); + } + group.joinAll(); + assert( numReaders < 1 && maxReaders > 1 && + numWriters < 1 && maxWriters < 2 ); + } + + + unittest + { + testRead( ReadWriteMutex.Policy.PREFER_READERS ); + testRead( ReadWriteMutex.Policy.PREFER_WRITERS ); + testReadWrite( ReadWriteMutex.Policy.PREFER_READERS ); + testReadWrite( ReadWriteMutex.Policy.PREFER_WRITERS ); + } +} diff --git a/druntime/src/common/core/sync/semaphore.d b/druntime/src/common/core/sync/semaphore.d new file mode 100644 index 00000000..1950033f --- /dev/null +++ b/druntime/src/common/core/sync/semaphore.d @@ -0,0 +1,506 @@ +/** + * The semaphore module provides a general use semaphore for synchronization. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: = 0 ); + } + body + { + version( Win32 ) + { + enum : uint + { + TICKS_PER_MILLI = 10_000, + MAX_WAIT_MILLIS = uint.max - 1 + } + + period /= TICKS_PER_MILLI; + if( period > MAX_WAIT_MILLIS ) + period = MAX_WAIT_MILLIS; + switch( WaitForSingleObject( m_hndl, cast(uint) period ) ) + { + case WAIT_OBJECT_0: + return true; + case WAIT_TIMEOUT: + return false; + default: + throw new SyncException( "Unable to wait for semaphore" ); + } + } + else version( OSX ) + { + mach_timespec_t t = void; + (cast(byte*) &t)[0 .. t.sizeof] = 0; + + if( period != 0 ) + { + enum : uint + { + NANOS_PER_TICK = 100, + TICKS_PER_SECOND = 10_000_000, + NANOS_PER_SECOND = NANOS_PER_TICK * TICKS_PER_SECOND, + } + + if( t.tv_sec.max - t.tv_sec < period / TICKS_PER_SECOND ) + { + t.tv_sec = t.tv_sec.max; + t.tv_nsec = 0; + } + else + { + t.tv_sec += cast(typeof(t.tv_sec)) (period / TICKS_PER_SECOND); + long ns = (period % TICKS_PER_SECOND) * NANOS_PER_TICK; + if( NANOS_PER_SECOND - t.tv_nsec > ns ) + t.tv_nsec = cast(typeof(t.tv_nsec)) ns; + else + { + t.tv_sec += 1; + t.tv_nsec += ns - NANOS_PER_SECOND; + } + } + } + while( true ) + { + auto rc = semaphore_timedwait( m_hndl, t ); + if( !rc ) + return true; + if( rc == KERN_OPERATION_TIMED_OUT ) + return false; + if( rc != KERN_ABORTED || errno != EINTR ) + throw new SyncException( "Unable to wait for semaphore" ); + } + // -w trip + return false; + } + else version( Posix ) + { + timespec t = void; + mktspec( t, period ); + + while( true ) + { + if( !sem_timedwait( &m_hndl, &t ) ) + return true; + if( errno == ETIMEDOUT ) + return false; + if( errno != EINTR ) + throw new SyncException( "Unable to wait for semaphore" ); + } + // -w trip + return false; + } + } + + + /** + * Atomically increment the current count by one. This will notify one + * waiter, if there are any in the queue. + * + * Throws: + * SyncException on error. + */ + void notify() + { + version( Win32 ) + { + if( !ReleaseSemaphore( m_hndl, 1, null ) ) + throw new SyncException( "Unable to notify semaphore" ); + } + else version( OSX ) + { + auto rc = semaphore_signal( m_hndl ); + if( rc ) + throw new SyncException( "Unable to notify semaphore" ); + } + else version( Posix ) + { + int rc = sem_post( &m_hndl ); + if( rc ) + throw new SyncException( "Unable to notify semaphore" ); + } + } + + + /** + * If the current count is equal to zero, return. Otherwise, atomically + * decrement the count by one and return true. + * + * Throws: + * SyncException on error. + * + * Returns: + * true if the count was above zero and false if not. + */ + bool tryWait() + { + version( Win32 ) + { + switch( WaitForSingleObject( m_hndl, 0 ) ) + { + case WAIT_OBJECT_0: + return true; + case WAIT_TIMEOUT: + return false; + default: + throw new SyncException( "Unable to wait for semaphore" ); + } + } + else version( OSX ) + { + return wait( 0 ); + } + else version( Posix ) + { + while( true ) + { + if( !sem_trywait( &m_hndl ) ) + return true; + if( errno == EAGAIN ) + return false; + if( errno != EINTR ) + throw new SyncException( "Unable to wait for semaphore" ); + } + // -w trip + return false; + } + } + + +private: + version( Win32 ) + { + HANDLE m_hndl; + } + else version( OSX ) + { + semaphore_t m_hndl; + } + else version( Posix ) + { + sem_t m_hndl; + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// Unit Tests +//////////////////////////////////////////////////////////////////////////////// + + +version( unittest ) +{ + private import core.thread; + + + void testWait() + { + auto semaphore = new Semaphore; + int numToProduce = 10; + bool allProduced = false; + auto synProduced = new Object; + int numConsumed = 0; + auto synConsumed = new Object; + int numConsumers = 10; + int numComplete = 0; + auto synComplete = new Object; + + void consumer() + { + while( true ) + { + semaphore.wait(); + + synchronized( synProduced ) + { + if( allProduced ) + break; + } + + synchronized( synConsumed ) + { + ++numConsumed; + } + } + + synchronized( synComplete ) + { + ++numComplete; + } + } + + void producer() + { + assert( !semaphore.tryWait() ); + + for( int i = 0; i < numToProduce; ++i ) + { + semaphore.notify(); + Thread.yield(); + } + Thread.sleep( 10_000_000 ); // 1s + synchronized( synProduced ) + { + allProduced = true; + } + + for( int i = 0; i < numConsumers; ++i ) + { + semaphore.notify(); + Thread.yield(); + } + + for( int i = numConsumers * 10000; i > 0; --i ) + { + synchronized( synComplete ) + { + if( numComplete == numConsumers ) + break; + } + Thread.yield(); + } + + synchronized( synComplete ) + { + assert( numComplete == numConsumers ); + } + + synchronized( synConsumed ) + { + assert( numConsumed == numToProduce ); + } + + assert( !semaphore.tryWait() ); + semaphore.notify(); + assert( semaphore.tryWait() ); + assert( !semaphore.tryWait() ); + } + + auto group = new ThreadGroup; + + for( int i = 0; i < numConsumers; ++i ) + group.create( &consumer ); + group.create( &producer ); + group.joinAll(); + } + + + void testWaitTimeout() + { + auto synReady = new Object; + auto semReady = new Semaphore; + bool waiting = false; + bool alertedOne = true; + bool alertedTwo = true; + + void waiter() + { + synchronized( synReady ) + { + waiting = true; + } + alertedOne = semReady.wait( 10_000_000 ); // 100ms + alertedTwo = semReady.wait( 10_000_000 ); // 100ms + } + + auto thread = new Thread( &waiter ); + thread.start(); + + while( true ) + { + synchronized( synReady ) + { + if( waiting ) + { + semReady.notify(); + break; + } + } + Thread.yield(); + } + thread.join(); + assert( waiting && alertedOne && !alertedTwo ); + } + + + unittest + { + testWait(); + testWaitTimeout(); + } +} diff --git a/druntime/src/common/core/thread.d b/druntime/src/common/core/thread.d new file mode 100644 index 00000000..e20467e4 --- /dev/null +++ b/druntime/src/common/core/thread.d @@ -0,0 +1,3516 @@ +/** + * The thread module provides support for thread creation and management. + * + * Copyright: Copyright Sean Kelly 2005 - 2009. + * License: = 0 ); + } + body + { + version( Windows ) + { + enum : uint + { + TICKS_PER_MILLI = 10_000, + MAX_SLEEP_MILLIS = uint.max - 1 + } + + // NOTE: In instances where all other threads in the process have a + // lower priority than the current thread, the current thread + // will not yield with a sleep time of zero. However, unlike + // yield(), the user is not asking for a yield to occur but + // only for execution to suspend for the requested interval. + // Therefore, expected performance may not be met if a yield + // is forced upon the user. + period /= TICKS_PER_MILLI; + while( period > MAX_SLEEP_MILLIS ) + { + Sleep( MAX_SLEEP_MILLIS ); + period -= MAX_SLEEP_MILLIS; + } + Sleep( cast(uint) period ); + } + else version( Posix ) + { + timespec tin = void; + timespec tout = void; + + enum : uint + { + NANOS_PER_TICK = 100, + TICKS_PER_SECOND = 10_000_000, + } + enum : typeof(period) + { + MAX_SLEEP_TICKS = cast(typeof(period)) tin.tv_sec.max * TICKS_PER_SECOND + } + + do + { + if( period > MAX_SLEEP_TICKS ) + { + tin.tv_sec = tin.tv_sec.max; + tin.tv_nsec = 0; + } + else + { + tin.tv_sec = cast(typeof(tin.tv_sec)) (period / TICKS_PER_SECOND); + tin.tv_nsec = cast(typeof(tin.tv_nsec)) (period % TICKS_PER_SECOND) * NANOS_PER_TICK; + } + while( true ) + { + if( !nanosleep( &tin, &tout ) ) + return; + if( getErrno() != EINTR ) + throw new ThreadException( "Unable to sleep for the specified duration" ); + tin = tout; + } + period -= (cast(typeof(period)) tin.tv_sec) * TICKS_PER_SECOND; + period -= (cast(typeof(period)) tin.tv_nsec) / NANOS_PER_TICK; + } while( period > 0 ); + } + } + + + /** + * Forces a context switch to occur away from the calling thread. + */ + static void yield() + { + version( Windows ) + { + // NOTE: Sleep(1) is necessary because Sleep(0) does not give + // lower priority threads any timeslice, so looping on + // Sleep(0) could be resource-intensive in some cases. + Sleep( 1 ); + } + else version( Posix ) + { + sched_yield(); + } + } + + + /////////////////////////////////////////////////////////////////////////// + // Thread Accessors + /////////////////////////////////////////////////////////////////////////// + + + /** + * Provides a reference to the calling thread. + * + * Returns: + * The thread object representing the calling thread. The result of + * deleting this object is undefined. + */ + static Thread getThis() + { + // NOTE: This function may not be called until thread_init has + // completed. See thread_suspendAll for more information + // on why this might occur. + version( Windows ) + { + return cast(Thread) TlsGetValue( sm_this ); + } + else version( Posix ) + { + return cast(Thread) pthread_getspecific( sm_this ); + } + } + + + /** + * Provides a list of all threads currently being tracked by the system. + * + * Returns: + * An array containing references to all threads currently being + * tracked by the system. The result of deleting any contained + * objects is undefined. + */ + static Thread[] getAll() + { + synchronized( slock ) + { + size_t pos = 0; + Thread[] buf = new Thread[sm_tlen]; + + foreach( Thread t; Thread ) + { + buf[pos++] = t; + } + return buf; + } + } + + + /** + * Operates on all threads currently being tracked by the system. The + * result of deleting any Thread object is undefined. + * + * Params: + * dg = The supplied code as a delegate. + * + * Returns: + * Zero if all elemented are visited, nonzero if not. + */ + static int opApply( int delegate( inout Thread ) dg ) + { + synchronized( slock ) + { + int ret = 0; + + for( Thread t = sm_tbeg; t; t = t.next ) + { + ret = dg( t ); + if( ret ) + break; + } + return ret; + } + } + + + /////////////////////////////////////////////////////////////////////////// + // Local Storage Actions + /////////////////////////////////////////////////////////////////////////// + + + /** + * Indicates the number of local storage pointers available at program + * startup. It is recommended that this number be at least 64. + */ + static const uint LOCAL_MAX = 64; + + + /** + * Reserves a local storage pointer for use and initializes this location + * to null for all running threads. + * + * Returns: + * A key representing the array offset of this memory location. + */ + static uint createLocal() + { + synchronized( slock ) + { + foreach( uint key, inout bool set; sm_local ) + { + if( !set ) + { + //foreach( Thread t; sm_tbeg ) Bug in GDC 0.24 SVN (r139) + for( Thread t = sm_tbeg; t; t = t.next ) + { + t.m_local[key] = null; + } + set = true; + return key; + } + } + throw new ThreadException( "No more local storage slots available" ); + } + } + + + /** + * Marks the supplied key as available and sets the associated location + * to null for all running threads. It is assumed that any key passed + * to this function is valid. The result of calling this function for + * a key which is still in use is undefined. + * + * Params: + * key = The key to delete. + */ + static void deleteLocal( uint key ) + { + synchronized( slock ) + { + sm_local[key] = false; + // foreach( Thread t; sm_tbeg ) Bug in GDC 0.24 SVN (r139) + for( Thread t = sm_tbeg; t; t = t.next ) + { + t.m_local[key] = null; + } + } + } + + + /** + * Loads the value stored at key within a thread-local static array. It is + * assumed that any key passed to this function is valid. + * + * Params: + * key = The location which holds the desired data. + * + * Returns: + * The data associated with the supplied key. + */ + static void* getLocal( uint key ) + { + return getThis().m_local[key]; + } + + + /** + * Stores the supplied value at key within a thread-local static array. It + * is assumed that any key passed to this function is valid. + * + * Params: + * key = The location to store the supplied data. + * val = The data to store. + * + * Returns: + * A copy of the data which has just been stored. + */ + static void* setLocal( uint key, void* val ) + { + return getThis().m_local[key] = val; + } + + + /////////////////////////////////////////////////////////////////////////// + // Static Initalizer + /////////////////////////////////////////////////////////////////////////// + + + /** + * This initializer is used to set thread constants. All functional + * initialization occurs within thread_init(). + */ + static this() + { + version( Windows ) + { + PRIORITY_MIN = -15; + PRIORITY_MAX = 15; + } + else version( Posix ) + { + int policy; + sched_param param; + pthread_t self = pthread_self(); + + int status = pthread_getschedparam( self, &policy, ¶m ); + assert( status == 0 ); + + PRIORITY_MIN = sched_get_priority_min( policy ); + assert( PRIORITY_MIN != -1 ); + + PRIORITY_MAX = sched_get_priority_max( policy ); + assert( PRIORITY_MAX != -1 ); + } + } + + +private: + // + // Initializes a thread object which has no associated executable function. + // This is used for the main thread initialized in thread_init(). + // + this() + { + m_call = Call.NO; + m_curr = &m_main; + + void* pstart = cast(void*) &_tlsstart; + void* pend = cast(void*) &_tlsend; + m_tls = pstart[0 .. pend - pstart]; + } + + + // + // Thread entry point. Invokes the function or delegate passed on + // construction (if any). + // + final void run() + { + switch( m_call ) + { + case Call.FN: + m_fn(); + break; + case Call.DG: + m_dg(); + break; + default: + break; + } + } + + +private: + // + // The type of routine passed on thread construction. + // + enum Call + { + NO, + FN, + DG + } + + + // + // Standard types + // + version( Windows ) + { + alias uint TLSKey; + alias uint ThreadAddr; + } + else version( Posix ) + { + alias pthread_key_t TLSKey; + alias pthread_t ThreadAddr; + } + + + // + // Local storage + // + __gshared bool[LOCAL_MAX] sm_local; + __gshared TLSKey sm_this; + + void*[LOCAL_MAX] m_local; + + + // + // Standard thread data + // + version( Windows ) + { + HANDLE m_hndl; + } + else version( OSX ) + { + mach_port_t m_tmach; + } + ThreadAddr m_addr; + Call m_call; + char[] m_name; + union + { + void function() m_fn; + void delegate() m_dg; + } + size_t m_sz; + version( Posix ) + { + bool m_isRunning; + } + bool m_isDaemon; + Object m_unhandled; + + +private: + /////////////////////////////////////////////////////////////////////////// + // Storage of Active Thread + /////////////////////////////////////////////////////////////////////////// + + + // + // Sets a thread-local reference to the current thread object. + // + static void setThis( Thread t ) + { + version( Windows ) + { + TlsSetValue( sm_this, cast(void*) t ); + } + else version( Posix ) + { + pthread_setspecific( sm_this, cast(void*) t ); + } + } + + +private: + /////////////////////////////////////////////////////////////////////////// + // Thread Context and GC Scanning Support + /////////////////////////////////////////////////////////////////////////// + + + final void pushContext( Context* c ) + in + { + assert( !c.within ); + } + body + { + c.within = m_curr; + m_curr = c; + } + + + final void popContext() + in + { + assert( m_curr && m_curr.within ); + } + body + { + Context* c = m_curr; + m_curr = c.within; + c.within = null; + } + + + final Context* topContext() + in + { + assert( m_curr ); + } + body + { + return m_curr; + } + + + static struct Context + { + void* bstack, + tstack; + Context* within; + Context* next, + prev; + } + + + Context m_main; + Context* m_curr; + bool m_lock; + void[] m_tls; // spans implicit thread local storage + + version( Windows ) + { + version( X86 ) + { + uint[8] m_reg; // edi,esi,ebp,esp,ebx,edx,ecx,eax + } + else version( X86_64 ) + { + ulong[16] m_reg; // rdi,rsi,rbp,rsp,rbx,rdx,rcx,rax + // r8,r9,r10,r11,r12,r13,r14,r15 + } + else + { + static assert( "Architecture not supported." ); + } + } + else version( OSX ) + { + version( X86 ) + { + uint[8] m_reg; // edi,esi,ebp,esp,ebx,edx,ecx,eax + } + else version( X86_64 ) + { + ulong[16] m_reg; // rdi,rsi,rbp,rsp,rbx,rdx,rcx,rax + // r8,r9,r10,r11,r12,r13,r14,r15 + } + else + { + static assert( "Architecture not supported." ); + } + } + + +private: + /////////////////////////////////////////////////////////////////////////// + // GC Scanning Support + /////////////////////////////////////////////////////////////////////////// + + + // NOTE: The GC scanning process works like so: + // + // 1. Suspend all threads. + // 2. Scan the stacks of all suspended threads for roots. + // 3. Resume all threads. + // + // Step 1 and 3 require a list of all threads in the system, while + // step 2 requires a list of all thread stacks (each represented by + // a Context struct). Traditionally, there was one stack per thread + // and the Context structs were not necessary. However, Fibers have + // changed things so that each thread has its own 'main' stack plus + // an arbitrary number of nested stacks (normally referenced via + // m_curr). Also, there may be 'free-floating' stacks in the system, + // which are Fibers that are not currently executing on any specific + // thread but are still being processed and still contain valid + // roots. + // + // To support all of this, the Context struct has been created to + // represent a stack range, and a global list of Context structs has + // been added to enable scanning of these stack ranges. The lifetime + // (and presence in the Context list) of a thread's 'main' stack will + // be equivalent to the thread's lifetime. So the Ccontext will be + // added to the list on thread entry, and removed from the list on + // thread exit (which is essentially the same as the presence of a + // Thread object in its own global list). The lifetime of a Fiber's + // context, however, will be tied to the lifetime of the Fiber object + // itself, and Fibers are expected to add/remove their Context struct + // on construction/deletion. + + + // + // All use of the global lists should synchronize on this lock. + // + static Object slock() + { + return Thread.classinfo; + } + + + __gshared + { + Context* sm_cbeg; + size_t sm_clen; + + Thread sm_tbeg; + size_t sm_tlen; + } + + // + // Used for ordering threads in the global thread list. + // + Thread prev; + Thread next; + + + /////////////////////////////////////////////////////////////////////////// + // Global Context List Operations + /////////////////////////////////////////////////////////////////////////// + + + // + // Add a context to the global context list. + // + static void add( Context* c ) + in + { + assert( c ); + assert( !c.next && !c.prev ); + } + body + { + synchronized( slock ) + { + if( sm_cbeg ) + { + c.next = sm_cbeg; + sm_cbeg.prev = c; + } + sm_cbeg = c; + ++sm_clen; + } + } + + + // + // Remove a context from the global context list. + // + static void remove( Context* c ) + in + { + assert( c ); + assert( c.next || c.prev ); + } + body + { + synchronized( slock ) + { + if( c.prev ) + c.prev.next = c.next; + if( c.next ) + c.next.prev = c.prev; + if( sm_cbeg == c ) + sm_cbeg = c.next; + --sm_clen; + } + // NOTE: Don't null out c.next or c.prev because opApply currently + // follows c.next after removing a node. This could be easily + // addressed by simply returning the next node from this + // function, however, a context should never be re-added to the + // list anyway and having next and prev be non-null is a good way + // to ensure that. + } + + + /////////////////////////////////////////////////////////////////////////// + // Global Thread List Operations + /////////////////////////////////////////////////////////////////////////// + + + // + // Add a thread to the global thread list. + // + static void add( Thread t ) + in + { + assert( t ); + assert( !t.next && !t.prev ); + assert( t.isRunning ); + } + body + { + synchronized( slock ) + { + if( sm_tbeg ) + { + t.next = sm_tbeg; + sm_tbeg.prev = t; + } + sm_tbeg = t; + ++sm_tlen; + } + } + + + // + // Remove a thread from the global thread list. + // + static void remove( Thread t ) + in + { + assert( t ); + assert( t.next || t.prev ); + version( Windows ) + { + // NOTE: This doesn't work for Posix as m_isRunning must be set to + // false after the thread is removed during normal execution. + assert( !t.isRunning ); + } + } + body + { + synchronized( slock ) + { + // NOTE: When a thread is removed from the global thread list its + // main context is invalid and should be removed as well. + // It is possible that t.m_curr could reference more + // than just the main context if the thread exited abnormally + // (if it was terminated), but we must assume that the user + // retains a reference to them and that they may be re-used + // elsewhere. Therefore, it is the responsibility of any + // object that creates contexts to clean them up properly + // when it is done with them. + remove( &t.m_main ); + + if( t.prev ) + t.prev.next = t.next; + if( t.next ) + t.next.prev = t.prev; + if( sm_tbeg == t ) + sm_tbeg = t.next; + --sm_tlen; + } + // NOTE: Don't null out t.next or t.prev because opApply currently + // follows t.next after removing a node. This could be easily + // addressed by simply returning the next node from this + // function, however, a thread should never be re-added to the + // list anyway and having next and prev be non-null is a good way + // to ensure that. + } +} + + +/////////////////////////////////////////////////////////////////////////////// +// GC Support Routines +/////////////////////////////////////////////////////////////////////////////// + + +/** + * Initializes the thread module. This function must be called by the + * garbage collector on startup and before any other thread routines + * are called. + */ +extern (C) void thread_init() +{ + // NOTE: If thread_init itself performs any allocations then the thread + // routines reserved for garbage collector use may be called while + // thread_init is being processed. However, since no memory should + // exist to be scanned at this point, it is sufficient for these + // functions to detect the condition and return immediately. + + version( Windows ) + { + Thread.sm_this = TlsAlloc(); + assert( Thread.sm_this != TLS_OUT_OF_INDEXES ); + } + else version( Posix ) + { + int status; + sigaction_t sigusr1 = void; + sigaction_t sigusr2 = void; + + // This is a quick way to zero-initialize the structs without using + // memset or creating a link dependency on their static initializer. + (cast(byte*) &sigusr1)[0 .. sigaction_t.sizeof] = 0; + (cast(byte*) &sigusr2)[0 .. sigaction_t.sizeof] = 0; + + // NOTE: SA_RESTART indicates that system calls should restart if they + // are interrupted by a signal, but this is not available on all + // Posix systems, even those that support multithreading. + static if( is( typeof( SA_RESTART ) ) ) + sigusr1.sa_flags = SA_RESTART; + else + sigusr1.sa_flags = 0; + sigusr1.sa_handler = &thread_suspendHandler; + // NOTE: We want to ignore all signals while in this handler, so fill + // sa_mask to indicate this. + status = sigfillset( &sigusr1.sa_mask ); + assert( status == 0 ); + + // NOTE: Since SIGUSR2 should only be issued for threads within the + // suspend handler, we don't want this signal to trigger a + // restart. + sigusr2.sa_flags = 0; + sigusr2.sa_handler = &thread_resumeHandler; + // NOTE: We want to ignore all signals while in this handler, so fill + // sa_mask to indicate this. + status = sigfillset( &sigusr2.sa_mask ); + assert( status == 0 ); + + status = sigaction( SIGUSR1, &sigusr1, null ); + assert( status == 0 ); + + status = sigaction( SIGUSR2, &sigusr2, null ); + assert( status == 0 ); + + status = sem_init( &suspendCount, 0, 0 ); + assert( status == 0 ); + + status = pthread_key_create( &Thread.sm_this, null ); + assert( status == 0 ); + } + + thread_attachThis(); +} + + +/** + * Registers the calling thread for use with the D Runtime. If this routine + * is called for a thread which is already registered, the result is undefined. + */ +extern (C) void thread_attachThis() +{ + version( Windows ) + { + Thread thisThread = new Thread(); + Thread.Context* thisContext = &thisThread.m_main; + assert( thisContext == thisThread.m_curr ); + + thisThread.m_addr = GetCurrentThreadId(); + thisThread.m_hndl = GetCurrentThreadHandle(); + thisContext.bstack = getStackBottom(); + thisContext.tstack = thisContext.bstack; + + thisThread.m_isDaemon = true; + + Thread.setThis( thisThread ); + } + else version( Posix ) + { + Thread thisThread = new Thread(); + Thread.Context* thisContext = thisThread.m_curr; + assert( thisContext == &thisThread.m_main ); + + thisThread.m_addr = pthread_self(); + thisContext.bstack = getStackBottom(); + thisContext.tstack = thisContext.bstack; + + thisThread.m_isRunning = true; + thisThread.m_isDaemon = true; + + Thread.setThis( thisThread ); + } + version( OSX ) + { + thisThread.m_tmach = pthread_mach_thread_np( thisThread.m_addr ); + assert( thisThread.m_tmach != thisThread.m_tmach.init ); + } + + Thread.add( thisThread ); + Thread.add( thisContext ); +} + + +/** + * Deregisters the calling thread from use with the runtime. If this routine + * is called for a thread which is already registered, the result is undefined. + */ +extern (C) void thread_detachThis() +{ + Thread.remove( Thread.getThis() ); +} + + +/** + * Joins all non-daemon threads that are currently running. This is done by + * performing successive scans through the thread list until a scan consists + * of only daemon threads. + */ +extern (C) void thread_joinAll() +{ + + while( true ) + { + Thread nonDaemon = null; + + foreach( t; Thread ) + { + if( !t.isDaemon ) + { + nonDaemon = t; + break; + } + } + if( nonDaemon is null ) + return; + nonDaemon.join(); + } +} + + +/** + * Performs intermediate shutdown of the thread module. + */ +static ~this() +{ + // NOTE: The functionality related to garbage collection must be minimally + // operable after this dtor completes. Therefore, only minimal + // cleanup may occur. + + for( Thread t = Thread.sm_tbeg; t; t = t.next ) + { + if( !t.isRunning ) + Thread.remove( t ); + } +} + + +// Used for needLock below +private __gshared bool multiThreadedFlag = false; + + +/** + * This function is used to determine whether the the process is + * multi-threaded. Optimizations may only be performed on this + * value if the programmer can guarantee that no path from the + * enclosed code will start a thread. + * + * Returns: + * True if Thread.start() has been called in this process. + */ +extern (C) bool thread_needLock() +{ + return multiThreadedFlag; +} + + +// Used for suspendAll/resumeAll below +private __gshared uint suspendDepth = 0; + + +/** + * Suspend all threads but the calling thread for "stop the world" garbage + * collection runs. This function may be called multiple times, and must + * be followed by a matching number of calls to thread_resumeAll before + * processing is resumed. + * + * Throws: + * ThreadException if the suspend operation fails for a running thread. + */ +extern (C) void thread_suspendAll() +{ + /** + * Suspend the specified thread and load stack and register information for + * use by thread_scanAll. If the supplied thread is the calling thread, + * stack and register information will be loaded but the thread will not + * be suspended. If the suspend operation fails and the thread is not + * running then it will be removed from the global thread list, otherwise + * an exception will be thrown. + * + * Params: + * t = The thread to suspend. + * + * Throws: + * ThreadException if the suspend operation fails for a running thread. + */ + void suspend( Thread t ) + { + version( Windows ) + { + if( t.m_addr != GetCurrentThreadId() && SuspendThread( t.m_hndl ) == 0xFFFFFFFF ) + { + if( !t.isRunning ) + { + Thread.remove( t ); + return; + } + throw new ThreadException( "Unable to suspend thread" ); + } + + CONTEXT context = void; + context.ContextFlags = CONTEXT_INTEGER | CONTEXT_CONTROL; + + if( !GetThreadContext( t.m_hndl, &context ) ) + throw new ThreadException( "Unable to load thread context" ); + + version( X86 ) + { + if( !t.m_lock ) + t.m_curr.tstack = cast(void*) context.Esp; + // eax,ebx,ecx,edx,edi,esi,ebp,esp + t.m_reg[0] = context.Eax; + t.m_reg[1] = context.Ebx; + t.m_reg[2] = context.Ecx; + t.m_reg[3] = context.Edx; + t.m_reg[4] = context.Edi; + t.m_reg[5] = context.Esi; + t.m_reg[6] = context.Ebp; + t.m_reg[7] = context.Esp; + } + else + { + static assert( "Architecture not supported." ); + } + } + else version( OSX ) + { + if( t.m_addr != pthread_self() && thread_suspend( t.m_tmach ) != KERN_SUCCESS ) + { + if( !t.isRunning ) + { + Thread.remove( t ); + return; + } + throw new ThreadException( "Unable to suspend thread" ); + } + + version( X86 ) + { + x86_thread_state32_t state = void; + mach_msg_type_number_t count = x86_THREAD_STATE32_COUNT; + + if( thread_get_state( t.m_tmach, x86_THREAD_STATE32, &state, &count ) != KERN_SUCCESS ) + throw new ThreadException( "Unable to load thread state" ); + if( !t.m_lock ) + t.m_curr.tstack = cast(void*) state.esp; + // eax,ebx,ecx,edx,edi,esi,ebp,esp + t.m_reg[0] = state.eax; + t.m_reg[1] = state.ebx; + t.m_reg[2] = state.ecx; + t.m_reg[3] = state.edx; + t.m_reg[4] = state.edi; + t.m_reg[5] = state.esi; + t.m_reg[6] = state.ebp; + t.m_reg[7] = state.esp; + } + else version( X86_64 ) + { + x86_thread_state64_t state = void; + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; + + if( thread_get_state( t.m_tmach, x86_THREAD_STATE64, &state, &count ) != KERN_SUCCESS ) + throw new ThreadException( "Unable to load thread state" ); + if( !t.m_lock ) + t.m_curr.tstack = cast(void*) state.rsp; + // rax,rbx,rcx,rdx,rdi,rsi,rbp,rsp + t.m_reg[0] = state.rax; + t.m_reg[1] = state.rbx; + t.m_reg[2] = state.rcx; + t.m_reg[3] = state.rdx; + t.m_reg[4] = state.rdi; + t.m_reg[5] = state.rsi; + t.m_reg[6] = state.rbp; + t.m_reg[7] = state.rsp; + // r8,r9,r10,r11,r12,r13,r14,r15 + t.m_reg[8] = state.r8; + t.m_reg[9] = state.r9; + t.m_reg[10] = state.r10; + t.m_reg[11] = state.r11; + t.m_reg[12] = state.r12; + t.m_reg[13] = state.r13; + t.m_reg[14] = state.r14; + t.m_reg[15] = state.r15; + } + else + { + static assert( "Architecture not supported." ); + } + } + else version( Posix ) + { + if( t.m_addr != pthread_self() ) + { + if( pthread_kill( t.m_addr, SIGUSR1 ) != 0 ) + { + if( !t.isRunning ) + { + Thread.remove( t ); + return; + } + throw new ThreadException( "Unable to suspend thread" ); + } + // NOTE: It's really not ideal to wait for each thread to + // signal individually -- rather, it would be better to + // suspend them all and wait once at the end. However, + // semaphores don't really work this way, and the obvious + // alternative (looping on an atomic suspend count) + // requires either the atomic module (which only works on + // x86) or other specialized functionality. It would + // also be possible to simply loop on sem_wait at the + // end, but I'm not convinced that this would be much + // faster than the current approach. + sem_wait( &suspendCount ); + } + else if( !t.m_lock ) + { + t.m_curr.tstack = getStackTop(); + } + } + } + + + // NOTE: We've got an odd chicken & egg problem here, because while the GC + // is required to call thread_init before calling any other thread + // routines, thread_init may allocate memory which could in turn + // trigger a collection. Thus, thread_suspendAll, thread_scanAll, + // and thread_resumeAll must be callable before thread_init + // completes, with the assumption that no other GC memory has yet + // been allocated by the system, and thus there is no risk of losing + // data if the global thread list is empty. The check of + // Thread.sm_tbeg below is done to ensure thread_init has completed, + // and therefore that calling Thread.getThis will not result in an + // error. For the short time when Thread.sm_tbeg is null, there is + // no reason not to simply call the multithreaded code below, with + // the expectation that the foreach loop will never be entered. + if( !multiThreadedFlag && Thread.sm_tbeg ) + { + if( ++suspendDepth == 1 ) + suspend( Thread.getThis() ); + return; + } + synchronized( Thread.slock ) + { + if( ++suspendDepth > 1 ) + return; + + // NOTE: I'd really prefer not to check isRunning within this loop but + // not doing so could be problematic if threads are termianted + // abnormally and a new thread is created with the same thread + // address before the next GC run. This situation might cause + // the same thread to be suspended twice, which would likely + // cause the second suspend to fail, the garbage collection to + // abort, and Bad Things to occur. + for( Thread t = Thread.sm_tbeg; t; t = t.next ) + { + if( t.isRunning ) + suspend( t ); + else + Thread.remove( t ); + } + + version( Posix ) + { + // wait on semaphore -- see note in suspend for + // why this is currently not implemented + } + } +} + + +/** + * Resume all threads but the calling thread for "stop the world" garbage + * collection runs. This function must be called once for each preceding + * call to thread_suspendAll before the threads are actually resumed. + * + * In: + * This routine must be preceded by a call to thread_suspendAll. + * + * Throws: + * ThreadException if the resume operation fails for a running thread. + */ +extern (C) void thread_resumeAll() +in +{ + assert( suspendDepth > 0 ); +} +body +{ + /** + * Resume the specified thread and unload stack and register information. + * If the supplied thread is the calling thread, stack and register + * information will be unloaded but the thread will not be resumed. If + * the resume operation fails and the thread is not running then it will + * be removed from the global thread list, otherwise an exception will be + * thrown. + * + * Params: + * t = The thread to resume. + * + * Throws: + * ThreadException if the resume fails for a running thread. + */ + void resume( Thread t ) + { + version( Windows ) + { + if( t.m_addr != GetCurrentThreadId() && ResumeThread( t.m_hndl ) == 0xFFFFFFFF ) + { + if( !t.isRunning ) + { + Thread.remove( t ); + return; + } + throw new ThreadException( "Unable to resume thread" ); + } + + if( !t.m_lock ) + t.m_curr.tstack = t.m_curr.bstack; + t.m_reg[0 .. $] = 0; + } + else version( OSX ) + { + if( t.m_addr != pthread_self() && thread_resume( t.m_tmach ) != KERN_SUCCESS ) + { + if( !t.isRunning ) + { + Thread.remove( t ); + return; + } + throw new ThreadException( "Unable to resume thread" ); + } + + if( !t.m_lock ) + t.m_curr.tstack = t.m_curr.bstack; + t.m_reg[0 .. $] = 0; + } + else version( Posix ) + { + if( t.m_addr != pthread_self() ) + { + if( pthread_kill( t.m_addr, SIGUSR2 ) != 0 ) + { + if( !t.isRunning ) + { + Thread.remove( t ); + return; + } + throw new ThreadException( "Unable to resume thread" ); + } + } + else if( !t.m_lock ) + { + t.m_curr.tstack = t.m_curr.bstack; + } + } + } + + + // NOTE: See thread_suspendAll for the logic behind this. + if( !multiThreadedFlag && Thread.sm_tbeg ) + { + if( --suspendDepth == 0 ) + resume( Thread.getThis() ); + return; + } + synchronized( Thread.slock ) + { + if( --suspendDepth > 0 ) + return; + + for( Thread t = Thread.sm_tbeg; t; t = t.next ) + { + resume( t ); + } + } +} + + +private alias void delegate( void*, void* ) scanAllThreadsFn; + + +/** + * The main entry point for garbage collection. The supplied delegate + * will be passed ranges representing both stack and register values. + * + * Params: + * scan = The scanner function. It should scan from p1 through p2 - 1. + * curStackTop = An optional pointer to the top of the calling thread's stack. + * + * In: + * This routine must be preceded by a call to thread_suspendAll. + */ +extern (C) void thread_scanAll( scanAllThreadsFn scan, void* curStackTop = null ) +in +{ + assert( suspendDepth > 0 ); +} +body +{ + Thread thisThread = null; + void* oldStackTop = null; + + if( curStackTop && Thread.sm_tbeg ) + { + thisThread = Thread.getThis(); + if( !thisThread.m_lock ) + { + oldStackTop = thisThread.m_curr.tstack; + thisThread.m_curr.tstack = curStackTop; + } + } + + scope( exit ) + { + if( curStackTop && Thread.sm_tbeg ) + { + if( !thisThread.m_lock ) + { + thisThread.m_curr.tstack = oldStackTop; + } + } + } + + // NOTE: Synchronizing on Thread.slock is not needed because this + // function may only be called after all other threads have + // been suspended from within the same lock. + for( Thread.Context* c = Thread.sm_cbeg; c; c = c.next ) + { + version( StackGrowsDown ) + { + // NOTE: We can't index past the bottom of the stack + // so don't do the "+1" for StackGrowsDown. + if( c.tstack && c.tstack < c.bstack ) + scan( c.tstack, c.bstack ); + } + else + { + if( c.bstack && c.bstack < c.tstack ) + scan( c.bstack, c.tstack + 1 ); + } + } + + for( Thread t = Thread.sm_tbeg; t; t = t.next ) + { + scan( &t.m_tls[0], &t.m_tls[0] + t.m_tls.length ); + + version( Windows ) + { + scan( &t.m_reg[0], &t.m_reg[0] + t.m_reg.length ); + } + } +} + + +/////////////////////////////////////////////////////////////////////////////// +// Thread Local +/////////////////////////////////////////////////////////////////////////////// + + +/** + * This class encapsulates the operations required to initialize, access, and + * destroy thread local data. + */ +class ThreadLocal( T ) +{ + /////////////////////////////////////////////////////////////////////////// + // Initialization + /////////////////////////////////////////////////////////////////////////// + + + /** + * Initializes thread local storage for the indicated value which will be + * initialized to def for all threads. + * + * Params: + * def = The default value to return if no value has been explicitly set. + */ + this( T def = T.init ) + { + m_def = def; + m_key = Thread.createLocal(); + } + + + ~this() + { + Thread.deleteLocal( m_key ); + } + + + /////////////////////////////////////////////////////////////////////////// + // Accessors + /////////////////////////////////////////////////////////////////////////// + + + /** + * Gets the value last set by the calling thread, or def if no such value + * has been set. + * + * Returns: + * The stored value or def if no value is stored. + */ + T val() + { + Wrap* wrap = cast(Wrap*) Thread.getLocal( m_key ); + + return wrap ? wrap.val : m_def; + } + + + /** + * Copies newval to a location specific to the calling thread, and returns + * newval. + * + * Params: + * newval = The value to set. + * + * Returns: + * The value passed to this function. + */ + T val( T newval ) + { + Wrap* wrap = cast(Wrap*) Thread.getLocal( m_key ); + + if( wrap is null ) + { + wrap = new Wrap; + Thread.setLocal( m_key, wrap ); + } + wrap.val = newval; + return newval; + } + + +private: + // + // A wrapper for the stored data. This is needed for determining whether + // set has ever been called for this thread (and therefore whether the + // default value should be returned) and also to flatten the differences + // between data that is smaller and larger than (void*).sizeof. The + // obvious tradeoff here is an extra per-thread allocation for each + // ThreadLocal value as compared to calling the Thread routines directly. + // + struct Wrap + { + T val; + } + + + T m_def; + uint m_key; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Thread Group +/////////////////////////////////////////////////////////////////////////////// + + +/** + * This class is intended to simplify certain common programming techniques. + */ +class ThreadGroup +{ + /** + * Creates and starts a new Thread object that executes fn and adds it to + * the list of tracked threads. + * + * Params: + * fn = The thread function. + * + * Returns: + * A reference to the newly created thread. + */ + final Thread create( void function() fn ) + { + Thread t = new Thread( fn ); + + t.start(); + synchronized( this ) + { + m_all[t] = t; + } + return t; + } + + + /** + * Creates and starts a new Thread object that executes dg and adds it to + * the list of tracked threads. + * + * Params: + * dg = The thread function. + * + * Returns: + * A reference to the newly created thread. + */ + final Thread create( void delegate() dg ) + { + Thread t = new Thread( dg ); + + t.start(); + synchronized( this ) + { + m_all[t] = t; + } + return t; + } + + + /** + * Add t to the list of tracked threads if it is not already being tracked. + * + * Params: + * t = The thread to add. + * + * In: + * t must not be null. + */ + final void add( Thread t ) + in + { + assert( t ); + } + body + { + synchronized( this ) + { + m_all[t] = t; + } + } + + + /** + * Removes t from the list of tracked threads. No operation will be + * performed if t is not currently being tracked by this object. + * + * Params: + * t = The thread to remove. + * + * In: + * t must not be null. + */ + final void remove( Thread t ) + in + { + assert( t ); + } + body + { + synchronized( this ) + { + m_all.remove( t ); + } + } + + + /** + * Operates on all threads currently tracked by this object. + */ + final int opApply( int delegate( inout Thread ) dg ) + { + synchronized( this ) + { + int ret = 0; + + // NOTE: This loop relies on the knowledge that m_all uses the + // Thread object for both the key and the mapped value. + foreach( Thread t; m_all.keys ) + { + ret = dg( t ); + if( ret ) + break; + } + return ret; + } + } + + + /** + * Iteratively joins all tracked threads. This function will block add, + * remove, and opApply until it completes. + * + * Params: + * rethrow = Rethrow any unhandled exception which may have caused the + * current thread to terminate. + * + * Throws: + * Any exception not handled by the joined threads. + */ + final void joinAll( bool rethrow = true ) + { + synchronized( this ) + { + // NOTE: This loop relies on the knowledge that m_all uses the + // Thread object for both the key and the mapped value. + foreach( Thread t; m_all.keys ) + { + t.join( rethrow ); + } + } + } + + +private: + Thread[Thread] m_all; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Fiber Platform Detection and Memory Allocation +/////////////////////////////////////////////////////////////////////////////// + + +private +{ + version( D_InlineAsm_X86 ) + { + version( X86_64 ) + { + + } + else + { + version( Windows ) + version = AsmX86_Win32; + else version( Posix ) + version = AsmX86_Posix; + } + } + else version( PPC ) + { + version( Posix ) + version = AsmPPC_Posix; + } + + + version( Posix ) + { + import core.sys.posix.unistd; // for sysconf + import core.sys.posix.sys.mman; // for mmap + import core.sys.posix.stdlib; // for malloc, valloc, free + + version( AsmX86_Win32 ) {} else + version( AsmX86_Posix ) {} else + version( AsmPPC_Posix ) {} else + { + // NOTE: The ucontext implementation requires architecture specific + // data definitions to operate so testing for it must be done + // by checking for the existence of ucontext_t rather than by + // a version identifier. Please note that this is considered + // an obsolescent feature according to the POSIX spec, so a + // custom solution is still preferred. + import core.sys.posix.ucontext; + } + } + + const size_t PAGESIZE; +} + + +static this() +{ + static if( is( typeof( GetSystemInfo ) ) ) + { + SYSTEM_INFO info; + GetSystemInfo( &info ); + + PAGESIZE = info.dwPageSize; + assert( PAGESIZE < int.max ); + } + else static if( is( typeof( sysconf ) ) && + is( typeof( _SC_PAGESIZE ) ) ) + { + PAGESIZE = cast(size_t) sysconf( _SC_PAGESIZE ); + assert( PAGESIZE < int.max ); + } + else + { + version( PPC ) + PAGESIZE = 8192; + else + PAGESIZE = 4096; + } +} + + +/////////////////////////////////////////////////////////////////////////////// +// Fiber Entry Point and Context Switch +/////////////////////////////////////////////////////////////////////////////// + + +private +{ + extern (C) void fiber_entryPoint() + { + Fiber obj = Fiber.getThis(); + assert( obj ); + + assert( Thread.getThis().m_curr is obj.m_ctxt ); + volatile Thread.getThis().m_lock = false; + obj.m_ctxt.tstack = obj.m_ctxt.bstack; + obj.m_state = Fiber.State.EXEC; + + try + { + obj.run(); + } + catch( Object o ) + { + obj.m_unhandled = o; + } + + static if( is( ucontext_t ) ) + obj.m_ucur = &obj.m_utxt; + + obj.m_state = Fiber.State.TERM; + obj.switchOut(); + } + + + // NOTE: If AsmPPC_Posix is defined then the context switch routine will + // be defined externally until GDC supports inline PPC ASM. + version( AsmPPC_Posix ) + extern (C) void fiber_switchContext( void** oldp, void* newp ); + else + extern (C) void fiber_switchContext( void** oldp, void* newp ) + { + // NOTE: The data pushed and popped in this routine must match the + // default stack created by Fiber.initStack or the initial + // switch into a new context will fail. + + version( AsmX86_Win32 ) + { + asm + { + naked; + + // save current stack state + push EBP; + mov EBP, ESP; + push EAX; + push dword ptr FS:[0]; + push dword ptr FS:[4]; + push dword ptr FS:[8]; + push EBX; + push ESI; + push EDI; + + // store oldp again with more accurate address + mov EAX, dword ptr 8[EBP]; + mov [EAX], ESP; + // load newp to begin context switch + mov ESP, dword ptr 12[EBP]; + + // load saved state from new stack + pop EDI; + pop ESI; + pop EBX; + pop dword ptr FS:[8]; + pop dword ptr FS:[4]; + pop dword ptr FS:[0]; + pop EAX; + pop EBP; + + // 'return' to complete switch + ret; + } + } + else version( AsmX86_Posix ) + { + asm + { + naked; + + // save current stack state + push EBP; + mov EBP, ESP; + push EAX; + push EBX; + push ESI; + push EDI; + + // store oldp again with more accurate address + mov EAX, dword ptr 8[EBP]; + mov [EAX], ESP; + // load newp to begin context switch + mov ESP, dword ptr 12[EBP]; + + // load saved state from new stack + pop EDI; + pop ESI; + pop EBX; + pop EAX; + pop EBP; + + // 'return' to complete switch + ret; + } + } + else static if( is( ucontext_t ) ) + { + Fiber cfib = Fiber.getThis(); + void* ucur = cfib.m_ucur; + + *oldp = &ucur; + swapcontext( **(cast(ucontext_t***) oldp), + *(cast(ucontext_t**) newp) ); + } + } +} + + +/////////////////////////////////////////////////////////////////////////////// +// Fiber +/////////////////////////////////////////////////////////////////////////////// + + +/** + * This class provides a cooperative concurrency mechanism integrated with the + * threading and garbage collection functionality. Calling a fiber may be + * considered a blocking operation that returns when the fiber yields (via + * Fiber.yield()). Execution occurs within the context of the calling thread + * so synchronization is not necessary to guarantee memory visibility so long + * as the same thread calls the fiber each time. Please note that there is no + * requirement that a fiber be bound to one specific thread. Rather, fibers + * may be freely passed between threads so long as they are not currently + * executing. Like threads, a new fiber thread may be created using either + * derivation or composition, as in the following example. + * + * Example: + * ---------------------------------------------------------------------- + * + * class DerivedFiber : Fiber + * { + * this() + * { + * super( &run ); + * } + * + * private : + * void run() + * { + * printf( "Derived fiber running.\n" ); + * } + * } + * + * void fiberFunc() + * { + * printf( "Composed fiber running.\n" ); + * Fiber.yield(); + * printf( "Composed fiber running.\n" ); + * } + * + * // create instances of each type + * Fiber derived = new DerivedFiber(); + * Fiber composed = new Fiber( &fiberFunc ); + * + * // call both fibers once + * derived.call(); + * composed.call(); + * printf( "Execution returned to calling context.\n" ); + * composed.call(); + * + * // since each fiber has run to completion, each should have state TERM + * assert( derived.state == Fiber.State.TERM ); + * assert( composed.state == Fiber.State.TERM ); + * + * ---------------------------------------------------------------------- + * + * Authors: Based on a design by Mikola Lysenko. + */ +class Fiber +{ + /////////////////////////////////////////////////////////////////////////// + // Initialization + /////////////////////////////////////////////////////////////////////////// + + + /** + * Initializes a fiber object which is associated with a static + * D function. + * + * Params: + * fn = The thread function. + * sz = The stack size for this fiber. + * + * In: + * fn must not be null. + */ + this( void function() fn, size_t sz = PAGESIZE ) + in + { + assert( fn ); + } + body + { + m_fn = fn; + m_call = Call.FN; + m_state = State.HOLD; + allocStack( sz ); + initStack(); + } + + + /** + * Initializes a fiber object which is associated with a dynamic + * D function. + * + * Params: + * dg = The thread function. + * sz = The stack size for this fiber. + * + * In: + * dg must not be null. + */ + this( void delegate() dg, size_t sz = PAGESIZE ) + in + { + assert( dg ); + } + body + { + m_dg = dg; + m_call = Call.DG; + m_state = State.HOLD; + allocStack( sz ); + initStack(); + } + + + /** + * Cleans up any remaining resources used by this object. + */ + ~this() + { + // NOTE: A live reference to this object will exist on its associated + // stack from the first time its call() method has been called + // until its execution completes with State.TERM. Thus, the only + // times this dtor should be called are either if the fiber has + // terminated (and therefore has no active stack) or if the user + // explicitly deletes this object. The latter case is an error + // but is not easily tested for, since State.HOLD may imply that + // the fiber was just created but has never been run. There is + // not a compelling case to create a State.INIT just to offer a + // means of ensuring the user isn't violating this object's + // contract, so for now this requirement will be enforced by + // documentation only. + freeStack(); + } + + + /////////////////////////////////////////////////////////////////////////// + // General Actions + /////////////////////////////////////////////////////////////////////////// + + + /** + * Transfers execution to this fiber object. The calling context will be + * suspended until the fiber calls Fiber.yield() or until it terminates + * via an unhandled exception. + * + * Params: + * rethrow = Rethrow any unhandled exception which may have caused this + * fiber to terminate. + * + * In: + * This fiber must be in state HOLD. + * + * Throws: + * Any exception not handled by the joined thread. + * + * Returns: + * Any exception not handled by this fiber if rethrow = false, null + * otherwise. + */ + final Object call( bool rethrow = true ) + in + { + assert( m_state == State.HOLD ); + } + body + { + Fiber cur = getThis(); + + static if( is( ucontext_t ) ) + m_ucur = cur ? &cur.m_utxt : &Fiber.sm_utxt; + + setThis( this ); + this.switchIn(); + setThis( cur ); + + static if( is( ucontext_t ) ) + m_ucur = null; + + // NOTE: If the fiber has terminated then the stack pointers must be + // reset. This ensures that the stack for this fiber is not + // scanned if the fiber has terminated. This is necessary to + // prevent any references lingering on the stack from delaying + // the collection of otherwise dead objects. The most notable + // being the current object, which is referenced at the top of + // fiber_entryPoint. + if( m_state == State.TERM ) + { + m_ctxt.tstack = m_ctxt.bstack; + } + if( m_unhandled ) + { + Object obj = m_unhandled; + m_unhandled = null; + if( rethrow ) + throw obj; + return obj; + } + return null; + } + + + /** + * Resets this fiber so that it may be re-used. This routine may only be + * called for fibers that have terminated, as doing otherwise could result + * in scope-dependent functionality that is not executed. Stack-based + * classes, for example, may not be cleaned up properly if a fiber is reset + * before it has terminated. + * + * In: + * This fiber must be in state TERM. + */ + final void reset() + in + { + assert( m_state == State.TERM ); + assert( m_ctxt.tstack == m_ctxt.bstack ); + } + body + { + m_state = State.HOLD; + initStack(); + m_unhandled = null; + } + + + /////////////////////////////////////////////////////////////////////////// + // General Properties + /////////////////////////////////////////////////////////////////////////// + + + /** + * A fiber may occupy one of three states: HOLD, EXEC, and TERM. The HOLD + * state applies to any fiber that is suspended and ready to be called. + * The EXEC state will be set for any fiber that is currently executing. + * And the TERM state is set when a fiber terminates. Once a fiber + * terminates, it must be reset before it may be called again. + */ + enum State + { + HOLD, /// + EXEC, /// + TERM /// + } + + + /** + * Gets the current state of this fiber. + * + * Returns: + * The state of this fiber as an enumerated value. + */ + final State state() + { + return m_state; + } + + + /////////////////////////////////////////////////////////////////////////// + // Actions on Calling Fiber + /////////////////////////////////////////////////////////////////////////// + + + /** + * Forces a context switch to occur away from the calling fiber. + */ + static void yield() + { + Fiber cur = getThis(); + assert( cur, "Fiber.yield() called with no active fiber" ); + assert( cur.m_state == State.EXEC ); + + static if( is( ucontext_t ) ) + cur.m_ucur = &cur.m_utxt; + + cur.m_state = State.HOLD; + cur.switchOut(); + cur.m_state = State.EXEC; + } + + + /** + * Forces a context switch to occur away from the calling fiber and then + * throws obj in the calling fiber. + * + * Params: + * obj = The object to throw. + * + * In: + * obj must not be null. + */ + static void yieldAndThrow( Object obj ) + in + { + assert( obj ); + } + body + { + Fiber cur = getThis(); + assert( cur, "Fiber.yield() called with no active fiber" ); + assert( cur.m_state == State.EXEC ); + + static if( is( ucontext_t ) ) + cur.m_ucur = &cur.m_utxt; + + cur.m_unhandled = obj; + cur.m_state = State.HOLD; + cur.switchOut(); + cur.m_state = State.EXEC; + } + + + /////////////////////////////////////////////////////////////////////////// + // Fiber Accessors + /////////////////////////////////////////////////////////////////////////// + + + /** + * Provides a reference to the calling fiber or null if no fiber is + * currently active. + * + * Returns: + * The fiber object representing the calling fiber or null if no fiber + * is currently active. The result of deleting this object is undefined. + */ + static Fiber getThis() + { + version( Windows ) + { + return cast(Fiber) TlsGetValue( sm_this ); + } + else version( Posix ) + { + return cast(Fiber) pthread_getspecific( sm_this ); + } + } + + + /////////////////////////////////////////////////////////////////////////// + // Static Initialization + /////////////////////////////////////////////////////////////////////////// + + + static this() + { + version( Windows ) + { + sm_this = TlsAlloc(); + assert( sm_this != TLS_OUT_OF_INDEXES ); + } + else version( Posix ) + { + int status; + + status = pthread_key_create( &sm_this, null ); + assert( status == 0 ); + + static if( is( ucontext_t ) ) + { + status = getcontext( &sm_utxt ); + assert( status == 0 ); + } + } + } + + +private: + // + // Initializes a fiber object which has no associated executable function. + // + this() + { + m_call = Call.NO; + } + + + // + // Fiber entry point. Invokes the function or delegate passed on + // construction (if any). + // + final void run() + { + switch( m_call ) + { + case Call.FN: + m_fn(); + break; + case Call.DG: + m_dg(); + break; + default: + break; + } + } + + +private: + // + // The type of routine passed on fiber construction. + // + enum Call + { + NO, + FN, + DG + } + + + // + // Standard fiber data + // + Call m_call; + union + { + void function() m_fn; + void delegate() m_dg; + } + bool m_isRunning; + Object m_unhandled; + State m_state; + + +private: + /////////////////////////////////////////////////////////////////////////// + // Stack Management + /////////////////////////////////////////////////////////////////////////// + + + // + // Allocate a new stack for this fiber. + // + final void allocStack( size_t sz ) + in + { + assert( !m_pmem && !m_ctxt ); + } + body + { + // adjust alloc size to a multiple of PAGESIZE + sz += PAGESIZE - 1; + sz -= sz % PAGESIZE; + + // NOTE: This instance of Thread.Context is dynamic so Fiber objects + // can be collected by the GC so long as no user level references + // to the object exist. If m_ctxt were not dynamic then its + // presence in the global context list would be enough to keep + // this object alive indefinitely. An alternative to allocating + // room for this struct explicitly would be to mash it into the + // base of the stack being allocated below. However, doing so + // requires too much special logic to be worthwhile. + m_ctxt = new Thread.Context; + + static if( is( typeof( VirtualAlloc ) ) ) + { + // reserve memory for stack + m_pmem = VirtualAlloc( null, + sz + PAGESIZE, + MEM_RESERVE, + PAGE_NOACCESS ); + if( !m_pmem ) + { + throw new FiberException( "Unable to reserve memory for stack" ); + } + + version( StackGrowsDown ) + { + void* stack = m_pmem + PAGESIZE; + void* guard = m_pmem; + void* pbase = stack + sz; + } + else + { + void* stack = m_pmem; + void* guard = m_pmem + sz; + void* pbase = stack; + } + + // allocate reserved stack segment + stack = VirtualAlloc( stack, + sz, + MEM_COMMIT, + PAGE_READWRITE ); + if( !stack ) + { + throw new FiberException( "Unable to allocate memory for stack" ); + } + + // allocate reserved guard page + guard = VirtualAlloc( guard, + PAGESIZE, + MEM_COMMIT, + PAGE_READWRITE | PAGE_GUARD ); + if( !guard ) + { + throw new FiberException( "Unable to create guard page for stack" ); + } + + m_ctxt.bstack = pbase; + m_ctxt.tstack = pbase; + m_size = sz; + } + else + { static if( is( typeof( mmap ) ) ) + { + m_pmem = mmap( null, + sz, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, + -1, + 0 ); + if( m_pmem == MAP_FAILED ) + m_pmem = null; + } + else static if( is( typeof( valloc ) ) ) + { + m_pmem = valloc( sz ); + } + else static if( is( typeof( malloc ) ) ) + { + m_pmem = malloc( sz ); + } + else + { + m_pmem = null; + } + + if( !m_pmem ) + { + throw new FiberException( "Unable to allocate memory for stack" ); + } + + version( StackGrowsDown ) + { + m_ctxt.bstack = m_pmem + sz; + m_ctxt.tstack = m_pmem + sz; + } + else + { + m_ctxt.bstack = m_pmem; + m_ctxt.tstack = m_pmem; + } + m_size = sz; + } + + Thread.add( m_ctxt ); + } + + + // + // Free this fiber's stack. + // + final void freeStack() + in + { + assert( m_pmem && m_ctxt ); + } + body + { + // NOTE: Since this routine is only ever expected to be called from + // the dtor, pointers to freed data are not set to null. + + // NOTE: m_ctxt is guaranteed to be alive because it is held in the + // global context list. + Thread.remove( m_ctxt ); + + static if( is( typeof( VirtualAlloc ) ) ) + { + VirtualFree( m_pmem, 0, MEM_RELEASE ); + } + else static if( is( typeof( mmap ) ) ) + { + munmap( m_pmem, m_size ); + } + else static if( is( typeof( valloc ) ) ) + { + free( m_pmem ); + } + else static if( is( typeof( malloc ) ) ) + { + free( m_pmem ); + } + delete m_ctxt; + } + + + // + // Initialize the allocated stack. + // + final void initStack() + in + { + assert( m_ctxt.tstack && m_ctxt.tstack == m_ctxt.bstack ); + assert( cast(size_t) m_ctxt.bstack % (void*).sizeof == 0 ); + } + body + { + void* pstack = m_ctxt.tstack; + scope( exit ) m_ctxt.tstack = pstack; + + void push( size_t val ) + { + version( StackGrowsDown ) + { + pstack -= size_t.sizeof; + *(cast(size_t*) pstack) = val; + } + else + { + pstack += size_t.sizeof; + *(cast(size_t*) pstack) = val; + } + } + + // NOTE: On OS X the stack must be 16-byte aligned according to the + // IA-32 call spec. + version( OSX ) + { + version( StackGrowsDown ) + { + pstack = cast(void*)(cast(uint)(pstack) - (cast(uint)(pstack) & 0x0F)); + } + else + { + pstack = cast(void*)(cast(uint)(pstack) + (cast(uint)(pstack) & 0x0F)); + } + } + + version( AsmX86_Win32 ) + { + push( cast(size_t) &fiber_entryPoint ); // EIP + push( 0xFFFFFFFF ); // EBP + push( 0x00000000 ); // EAX + push( 0xFFFFFFFF ); // FS:[0] + version( StackGrowsDown ) + { + push( cast(size_t) m_ctxt.bstack ); // FS:[4] + push( cast(size_t) m_ctxt.bstack - m_size ); // FS:[8] + } + else + { + push( cast(size_t) m_ctxt.bstack ); // FS:[4] + push( cast(size_t) m_ctxt.bstack + m_size ); // FS:[8] + } + push( 0x00000000 ); // EBX + push( 0x00000000 ); // ESI + push( 0x00000000 ); // EDI + } + else version( AsmX86_Posix ) + { + push( 0x00000000 ); // Pad stack for OSX + push( cast(size_t) &fiber_entryPoint ); // EIP + push( 0x00000000 ); // EBP + push( 0x00000000 ); // EAX + push( 0x00000000 ); // EBX + push( 0x00000000 ); // ESI + push( 0x00000000 ); // EDI + } + else version( AsmPPC_Posix ) + { + version( StackGrowsDown ) + { + pstack -= int.sizeof * 5; + } + else + { + pstack += int.sizeof * 5; + } + + push( cast(size_t) &fiber_entryPoint ); // link register + push( 0x00000000 ); // control register + push( 0x00000000 ); // old stack pointer + + // GPR values + version( StackGrowsDown ) + { + pstack -= int.sizeof * 20; + } + else + { + pstack += int.sizeof * 20; + } + + assert( cast(uint) pstack & 0x0f == 0 ); + } + else static if( is( ucontext_t ) ) + { + getcontext( &m_utxt ); + m_utxt.uc_stack.ss_sp = m_ctxt.bstack; + m_utxt.uc_stack.ss_size = m_size; + makecontext( &m_utxt, &fiber_entryPoint, 0 ); + // NOTE: If ucontext is being used then the top of the stack will + // be a pointer to the ucontext_t struct for that fiber. + push( cast(size_t) &m_utxt ); + } + } + + + Thread.Context* m_ctxt; + size_t m_size; + void* m_pmem; + + static if( is( ucontext_t ) ) + { + // NOTE: The static ucontext instance is used to represent the context + // of the main application thread. + static ucontext_t sm_utxt = void; + ucontext_t m_utxt = void; + ucontext_t* m_ucur = null; + } + + +private: + /////////////////////////////////////////////////////////////////////////// + // Storage of Active Fiber + /////////////////////////////////////////////////////////////////////////// + + + // + // Sets a thread-local reference to the current fiber object. + // + static void setThis( Fiber f ) + { + version( Windows ) + { + TlsSetValue( sm_this, cast(void*) f ); + } + else version( Posix ) + { + pthread_setspecific( sm_this, cast(void*) f ); + } + } + + + __gshared Thread.TLSKey sm_this; + + +private: + /////////////////////////////////////////////////////////////////////////// + // Context Switching + /////////////////////////////////////////////////////////////////////////// + + + // + // Switches into the stack held by this fiber. + // + final void switchIn() + { + Thread tobj = Thread.getThis(); + void** oldp = &tobj.m_curr.tstack; + void* newp = m_ctxt.tstack; + + // NOTE: The order of operations here is very important. The current + // stack top must be stored before m_lock is set, and pushContext + // must not be called until after m_lock is set. This process + // is intended to prevent a race condition with the suspend + // mechanism used for garbage collection. If it is not followed, + // a badly timed collection could cause the GC to scan from the + // bottom of one stack to the top of another, or to miss scanning + // a stack that still contains valid data. The old stack pointer + // oldp will be set again before the context switch to guarantee + // that it points to exactly the correct stack location so the + // successive pop operations will succeed. + *oldp = getStackTop(); + volatile tobj.m_lock = true; + tobj.pushContext( m_ctxt ); + + fiber_switchContext( oldp, newp ); + + // NOTE: As above, these operations must be performed in a strict order + // to prevent Bad Things from happening. + tobj.popContext(); + volatile tobj.m_lock = false; + tobj.m_curr.tstack = tobj.m_curr.bstack; + } + + + // + // Switches out of the current stack and into the enclosing stack. + // + final void switchOut() + { + Thread tobj = Thread.getThis(); + void** oldp = &m_ctxt.tstack; + void* newp = tobj.m_curr.within.tstack; + + // NOTE: The order of operations here is very important. The current + // stack top must be stored before m_lock is set, and pushContext + // must not be called until after m_lock is set. This process + // is intended to prevent a race condition with the suspend + // mechanism used for garbage collection. If it is not followed, + // a badly timed collection could cause the GC to scan from the + // bottom of one stack to the top of another, or to miss scanning + // a stack that still contains valid data. The old stack pointer + // oldp will be set again before the context switch to guarantee + // that it points to exactly the correct stack location so the + // successive pop operations will succeed. + *oldp = getStackTop(); + volatile tobj.m_lock = true; + + fiber_switchContext( oldp, newp ); + + // NOTE: As above, these operations must be performed in a strict order + // to prevent Bad Things from happening. + volatile tobj.m_lock = false; + tobj.m_curr.tstack = tobj.m_curr.bstack; + } +} + +version (OSX) +{ + /* The Mach-O object file format does not allow for thread local storage + * declarations. So, instead we roll our own by putting tls into + * the sections __tlsdata and __tlscoal_nt. + */ + + extern (D) + void* ___tls_get_addr(void* p) + { + return p; + } +} diff --git a/druntime/src/common/core/threadasm.S b/druntime/src/common/core/threadasm.S new file mode 100644 index 00000000..3143db67 --- /dev/null +++ b/druntime/src/common/core/threadasm.S @@ -0,0 +1,143 @@ +/** + * Support code for mutithreading. + * + * Copyright: Copyright Mikola Lysenko 2005 - 2009. + * License: Boost License 1.0. + * Authors: Walter Bright + * + * Copyright Digital Mars 2004 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module rt.aApply; + +private import rt.util.utf; + +/********************************************** + */ + +// dg is D, but _aApplycd() is C +extern (D) typedef int delegate(void *) dg_t; + +extern (C) int _aApplycd1(char[] aa, dg_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplycd1(), len = %d\n", len); + for (i = 0; i < len; ) + { dchar d; + + d = aa[i]; + if (d & 0x80) + d = decode(aa, i); + else + i++; + result = dg(cast(void *)&d); + if (result) + break; + } + return result; +} + +extern (C) int _aApplywd1(wchar[] aa, dg_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplywd1(), len = %d\n", len); + for (i = 0; i < len; ) + { dchar d; + + d = aa[i]; + if (d & ~0x7F) + d = decode(aa, i); + else + i++; + result = dg(cast(void *)&d); + if (result) + break; + } + return result; +} + +extern (C) int _aApplycw1(char[] aa, dg_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplycw1(), len = %d\n", len); + for (i = 0; i < len; ) + { dchar d; + wchar w; + + w = aa[i]; + if (w & 0x80) + { d = decode(aa, i); + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(cast(void *)&w); + if (result) + break; + w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00); + } + } + else + i++; + result = dg(cast(void *)&w); + if (result) + break; + } + return result; +} + +extern (C) int _aApplywc1(wchar[] aa, dg_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplywc1(), len = %d\n", len); + for (i = 0; i < len; ) + { dchar d; + wchar w; + char c; + + w = aa[i]; + if (w & ~0x7F) + { + char[4] buf; + + d = decode(aa, i); + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { c = cast(char)w; + i++; + } + result = dg(cast(void *)&c); + if (result) + break; + } + return result; +} + +extern (C) int _aApplydc1(dchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplydc1(), len = %d\n", aa.length); + foreach (dchar d; aa) + { + char c; + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { + c = cast(char)d; + } + result = dg(cast(void *)&c); + if (result) + break; + } + return result; +} + +extern (C) int _aApplydw1(dchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplydw1(), len = %d\n", aa.length); + foreach (dchar d; aa) + { + wchar w; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(cast(void *)&w); + if (result) + break; + w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00); + } + result = dg(cast(void *)&w); + if (result) + break; + } + return result; +} + + +/****************************************************************************/ + +// dg is D, but _aApplycd2() is C +extern (D) typedef int delegate(void *, void *) dg2_t; + +extern (C) int _aApplycd2(char[] aa, dg2_t dg) +{ int result; + size_t i; + size_t n; + size_t len = aa.length; + + debug(apply) printf("_aApplycd2(), len = %d\n", len); + for (i = 0; i < len; i += n) + { dchar d; + + d = aa[i]; + if (d & 0x80) + { + n = i; + d = decode(aa, n); + n -= i; + } + else + n = 1; + result = dg(&i, cast(void *)&d); + if (result) + break; + } + return result; +} + +extern (C) int _aApplywd2(wchar[] aa, dg2_t dg) +{ int result; + size_t i; + size_t n; + size_t len = aa.length; + + debug(apply) printf("_aApplywd2(), len = %d\n", len); + for (i = 0; i < len; i += n) + { dchar d; + + d = aa[i]; + if (d & ~0x7F) + { + n = i; + d = decode(aa, n); + n -= i; + } + else + n = 1; + result = dg(&i, cast(void *)&d); + if (result) + break; + } + return result; +} + +extern (C) int _aApplycw2(char[] aa, dg2_t dg) +{ int result; + size_t i; + size_t n; + size_t len = aa.length; + + debug(apply) printf("_aApplycw2(), len = %d\n", len); + for (i = 0; i < len; i += n) + { dchar d; + wchar w; + + w = aa[i]; + if (w & 0x80) + { n = i; + d = decode(aa, n); + n -= i; + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(&i, cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + } + else + n = 1; + result = dg(&i, cast(void *)&w); + if (result) + break; + } + return result; +} + +extern (C) int _aApplywc2(wchar[] aa, dg2_t dg) +{ int result; + size_t i; + size_t n; + size_t len = aa.length; + + debug(apply) printf("_aApplywc2(), len = %d\n", len); + for (i = 0; i < len; i += n) + { dchar d; + wchar w; + char c; + + w = aa[i]; + if (w & ~0x7F) + { + char[4] buf; + + n = i; + d = decode(aa, n); + n -= i; + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(&i, cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { c = cast(char)w; + n = 1; + } + result = dg(&i, cast(void *)&c); + if (result) + break; + } + return result; +} + +extern (C) int _aApplydc2(dchar[] aa, dg2_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplydc2(), len = %d\n", len); + for (i = 0; i < len; i++) + { dchar d; + char c; + + d = aa[i]; + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(&i, cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { c = cast(char)d; + } + result = dg(&i, cast(void *)&c); + if (result) + break; + } + return result; +} + +extern (C) int _aApplydw2(dchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplydw2(), len = %d\n", aa.length); + foreach (size_t i, dchar d; aa) + { + wchar w; + auto j = i; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(&j, cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + result = dg(&j, cast(void *)&w); + if (result) + break; + } + return result; +} diff --git a/druntime/src/compiler/dmd/aApplyR.d b/druntime/src/compiler/dmd/aApplyR.d new file mode 100644 index 00000000..7236ccd5 --- /dev/null +++ b/druntime/src/compiler/dmd/aApplyR.d @@ -0,0 +1,957 @@ +/** + * This code handles decoding UTF strings for foreach_reverse loops. There are + * 6 combinations of conversions between char, wchar, and dchar, and 2 of each + * of those. + * + * Copyright: Copyright Digital Mars 2004 - 2009. + * License: >= 1; + c = aa[i]; + } + d |= (c & m) << j; + } + result = dg(cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcd1.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(dchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(dchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == '\U00100456'); break; + case 2: assert(d == '\u1234'); break; + case 3: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + result = dg(cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwd1.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(dchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(dchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == '\U00100456'); break; + case 2: assert(d == '\u1234'); break; + case 3: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRcw1(in char[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + wchar w; + + i--; + w = aa[i]; + if (w & 0x80) + { char c = cast(char)w; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + } + result = dg(cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcw1.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(wchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(wchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xDBC1); break; + case 2: assert(d == 0xDC56); break; + case 3: assert(d == 0x1234); break; + case 4: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} + +/*****************************/ + +extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + char c; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(cast(void *)&c2); + if (result) + return result; + } + continue; + } + c = cast(char)d; + result = dg(cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwc1.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(char d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(char d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xF4); break; + case 2: assert(d == 0x80); break; + case 3: assert(d == 0x91); break; + case 4: assert(d == 0x96); break; + case 5: assert(d == 0xE1); break; + case 6: assert(d == 0x88); break; + case 7: assert(d == 0xB4); break; + case 8: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0;) + { dchar d = aa[--i]; + char c; + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { + c = cast(char)d; + } + result = dg(cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdc1.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(char d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(char d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xF4); break; + case 2: assert(d == 0x80); break; + case 3: assert(d == 0x91); break; + case 4: assert(d == 0x96); break; + case 5: assert(d == 0xE1); break; + case 6: assert(d == 0x88); break; + case 7: assert(d == 0xB4); break; + case 8: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg) +{ int result; + + debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d = aa[--i]; + wchar w; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + result = dg(cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdw1.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(wchar d; s) + { + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(wchar d; s) + { + //printf("i = %d, d = %x\n", i, d); + switch (i) + { + case 0: assert(d == 'b'); break; + case 1: assert(d == 0xDBC1); break; + case 2: assert(d == 0xDC56); break; + case 3: assert(d == 0x1234); break; + case 4: assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} + + +/****************************************************************************/ +/* 2 argument versions */ + +// dg is D, but _aApplyRcd2() is C +extern (D) typedef int delegate(void *, void *) dg2_t; + +extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg) +{ int result; + size_t i; + size_t len = aa.length; + + debug(apply) printf("_aApplyRcd2(), len = %d\n", len); + for (i = len; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d & 0x80) + { char c = cast(char)d; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + } + result = dg(&i, cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcd2.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(k, dchar d; s) + { + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, dchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(d == 'b'); assert(k == 8); break; + case 1: assert(d == '\U00100456'); assert(k == 4); break; + case 2: assert(d == '\u1234'); assert(k == 1); break; + case 3: assert(d == 'a'); assert(k == 0); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + result = dg(&i, cast(void *)&d); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwd2.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(k, dchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, dchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 4); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == '\U00100456'); break; + case 2: assert(k == 1); assert(d == '\u1234'); break; + case 3: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 4); +} + +/*****************************/ + +extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + wchar w; + + i--; + w = aa[i]; + if (w & 0x80) + { char c = cast(char)w; + uint j; + uint m = 0x3F; + d = 0; + while ((c & 0xC0) != 0xC0) + { if (i == 0) + onUnicodeError("Invalid UTF-8 sequence", 0); + i--; + d |= (c & 0x3F) << j; + j += 6; + m >>= 1; + c = aa[i]; + } + d |= (c & m) << j; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(&i, cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + } + result = dg(&i, cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRcw2.unittest\n"); + + auto s = "hello"c[]; + int i; + + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 8); assert(d == 'b'); break; + case 1: assert(k == 4); assert(d == 0xDBC1); break; + case 2: assert(k == 4); assert(d == 0xDC56); break; + case 3: assert(k == 1); assert(d == 0x1234); break; + case 4: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} + +/*****************************/ + +extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d; + char c; + + i--; + d = aa[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { if (i == 0) + onUnicodeError("Invalid UTF-16 sequence", 0); + i--; + d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); + } + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(&i, cast(void *)&c2); + if (result) + return result; + } + continue; + } + c = cast(char)d; + result = dg(&i, cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRwc2.unittest\n"); + + auto s = "hello"w[]; + int i; + + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 4); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == 0xF4); break; + case 2: assert(k == 2); assert(d == 0x80); break; + case 3: assert(k == 2); assert(d == 0x91); break; + case 4: assert(k == 2); assert(d == 0x96); break; + case 5: assert(k == 1); assert(d == 0xE1); break; + case 6: assert(k == 1); assert(d == 0x88); break; + case 7: assert(k == 1); assert(d == 0xB4); break; + case 8: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d = aa[--i]; + char c; + + if (d & ~0x7F) + { + char[4] buf; + + auto b = toUTF8(buf, d); + foreach (char c2; b) + { + result = dg(&i, cast(void *)&c2); + if (result) + return result; + } + continue; + } + else + { c = cast(char)d; + } + result = dg(&i, cast(void *)&c); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdc2.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, char d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 3); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == 0xF4); break; + case 2: assert(k == 2); assert(d == 0x80); break; + case 3: assert(k == 2); assert(d == 0x91); break; + case 4: assert(k == 2); assert(d == 0x96); break; + case 5: assert(k == 1); assert(d == 0xE1); break; + case 6: assert(k == 1); assert(d == 0x88); break; + case 7: assert(k == 1); assert(d == 0xB4); break; + case 8: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 9); +} + +/*****************************/ + +extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg) +{ int result; + + debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length); + for (size_t i = aa.length; i != 0; ) + { dchar d = aa[--i]; + wchar w; + + if (d <= 0xFFFF) + w = cast(wchar) d; + else + { + w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); + result = dg(&i, cast(void *)&w); + if (result) + break; + w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); + } + result = dg(&i, cast(void *)&w); + if (result) + break; + } + return result; +} + +unittest +{ + debug(apply) printf("_aApplyRdw2.unittest\n"); + + auto s = "hello"d[]; + int i; + + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + assert(k == 4 - i); + switch (i) + { + case 0: assert(d == 'o'); break; + case 1: assert(d == 'l'); break; + case 2: assert(d == 'l'); break; + case 3: assert(d == 'e'); break; + case 4: assert(d == 'h'); break; + default: assert(0); + } + i++; + } + assert(i == 5); + + s = "a\u1234\U00100456b"; + i = 0; + foreach_reverse(k, wchar d; s) + { + //printf("i = %d, k = %d, d = %x\n", i, k, d); + switch (i) + { + case 0: assert(k == 3); assert(d == 'b'); break; + case 1: assert(k == 2); assert(d == 0xDBC1); break; + case 2: assert(k == 2); assert(d == 0xDC56); break; + case 3: assert(k == 1); assert(d == 0x1234); break; + case 4: assert(k == 0); assert(d == 'a'); break; + default: assert(0); + } + i++; + } + assert(i == 5); +} diff --git a/druntime/src/compiler/dmd/aaA.d b/druntime/src/compiler/dmd/aaA.d new file mode 100644 index 00000000..418fb364 --- /dev/null +++ b/druntime/src/compiler/dmd/aaA.d @@ -0,0 +1,872 @@ +/** + * Implementation of associative arrays. + * + * Copyright: Copyright Digital Mars 2000 - 2009. + * License: aa.a.b.length * 4) + { + //printf("rehash\n"); + _aaRehash(aa,keyti); + } + +Lret: + return cast(void *)(e + 1) + keysize; +} + + +/************************************************* + * Get pointer to value in associative array indexed by key. + * Returns null if it is not already there. + */ + +void* _aaGetRvalue(AA aa, TypeInfo keyti, size_t valuesize, ...) +{ + //printf("_aaGetRvalue(valuesize = %u)\n", valuesize); + if (!aa.a) + return null; + + auto pkey = cast(void *)(&valuesize + 1); + auto keysize = aligntsize(keyti.tsize()); + auto len = aa.a.b.length; + + if (len) + { + auto key_hash = keyti.getHash(pkey); + //printf("hash = %d\n", key_hash); + size_t i = key_hash % len; + auto e = aa.a.b[i]; + while (e !is null) + { + if (key_hash == e.hash) + { + auto c = keyti.compare(pkey, e + 1); + if (c == 0) + return cast(void *)(e + 1) + keysize; + e = (c < 0) ? e.left : e.right; + } + else + e = (key_hash < e.hash) ? e.left : e.right; + } + } + return null; // not found, caller will throw exception +} + + +/************************************************* + * Determine if key is in aa. + * Returns: + * null not in aa + * !=null in aa, return pointer to value + */ + +void* _aaIn(AA aa, TypeInfo keyti, ...) +in +{ +} +out (result) +{ + //assert(result == 0 || result == 1); +} +body +{ + if (aa.a) + { + auto pkey = cast(void *)(&keyti + 1); + + //printf("_aaIn(), .length = %d, .ptr = %x\n", aa.a.length, cast(uint)aa.a.ptr); + auto len = aa.a.b.length; + + if (len) + { + auto key_hash = keyti.getHash(pkey); + //printf("hash = %d\n", key_hash); + size_t i = key_hash % len; + auto e = aa.a.b[i]; + while (e !is null) + { + if (key_hash == e.hash) + { + auto c = keyti.compare(pkey, e + 1); + if (c == 0) + return cast(void *)(e + 1) + aligntsize(keyti.tsize()); + e = (c < 0) ? e.left : e.right; + } + else + e = (key_hash < e.hash) ? e.left : e.right; + } + } + } + + // Not found + return null; +} + +/************************************************* + * Delete key entry in aa[]. + * If key is not in aa[], do nothing. + */ + +void _aaDel(AA aa, TypeInfo keyti, ...) +{ + auto pkey = cast(void *)(&keyti + 1); + aaA *e; + + if (aa.a && aa.a.b.length) + { + auto key_hash = keyti.getHash(pkey); + //printf("hash = %d\n", key_hash); + size_t i = key_hash % aa.a.b.length; + auto pe = &aa.a.b[i]; + while ((e = *pe) !is null) // null means not found + { + if (key_hash == e.hash) + { + auto c = keyti.compare(pkey, e + 1); + if (c == 0) + { + if (!e.left && !e.right) + { + *pe = null; + } + else if (e.left && !e.right) + { + *pe = e.left; + e.left = null; + } + else if (!e.left && e.right) + { + *pe = e.right; + e.right = null; + } + else + { + *pe = e.left; + e.left = null; + do + pe = &(*pe).right; + while (*pe); + *pe = e.right; + e.right = null; + } + + aa.a.nodes--; + gc_free(e); + break; + } + pe = (c < 0) ? &e.left : &e.right; + } + else + pe = (key_hash < e.hash) ? &e.left : &e.right; + } + } +} + + +/******************************************** + * Produce array of values from aa. + */ + +ArrayRet_t _aaValues(AA aa, size_t keysize, size_t valuesize) +in +{ + assert(keysize == aligntsize(keysize)); +} +body +{ + size_t resi; + Array a; + + void _aaValues_x(aaA* e) + { + do + { + memcpy(a.ptr + resi * valuesize, + cast(byte*)e + aaA.sizeof + keysize, + valuesize); + resi++; + if (e.left) + { if (!e.right) + { e = e.left; + continue; + } + _aaValues_x(e.left); + } + e = e.right; + } while (e !is null); + } + + if (aa.a) + { + a.length = _aaLen(aa); + a.ptr = cast(byte*) gc_malloc(a.length * valuesize, + valuesize < (void*).sizeof ? BlkAttr.NO_SCAN : 0); + resi = 0; + foreach (e; aa.a.b) + { + if (e) + _aaValues_x(e); + } + assert(resi == a.length); + } + return *cast(ArrayRet_t*)(&a); +} + + +/******************************************** + * Rehash an array. + */ + +void* _aaRehash(AA* paa, TypeInfo keyti) +in +{ + //_aaInvAh(paa); +} +out (result) +{ + //_aaInvAh(result); +} +body +{ + BB newb; + + void _aaRehash_x(aaA* olde) + { + while (1) + { + auto left = olde.left; + auto right = olde.right; + olde.left = null; + olde.right = null; + + aaA *e; + + //printf("rehash %p\n", olde); + auto key_hash = olde.hash; + size_t i = key_hash % newb.b.length; + auto pe = &newb.b[i]; + while ((e = *pe) !is null) + { + //printf("\te = %p, e.left = %p, e.right = %p\n", e, e.left, e.right); + assert(e.left != e); + assert(e.right != e); + if (key_hash == e.hash) + { + auto c = keyti.compare(olde + 1, e + 1); + assert(c != 0); + pe = (c < 0) ? &e.left : &e.right; + } + else + pe = (key_hash < e.hash) ? &e.left : &e.right; + } + *pe = olde; + + if (right) + { + if (!left) + { olde = right; + continue; + } + _aaRehash_x(right); + } + if (!left) + break; + olde = left; + } + } + + //printf("Rehash\n"); + if (paa.a) + { + auto aa = paa.a; + auto len = _aaLen(*paa); + if (len) + { size_t i; + + for (i = 0; i < prime_list.length - 1; i++) + { + if (len <= prime_list[i]) + break; + } + len = prime_list[i]; + newb.b = new aaA*[len]; + + foreach (e; aa.b) + { + if (e) + _aaRehash_x(e); + } + delete aa.b; + + newb.nodes = aa.nodes; + newb.keyti = aa.keyti; + } + + *paa.a = newb; + _aaBalance(paa); + } + return (*paa).a; +} + +/******************************************** + * Balance an array. + */ + +void _aaBalance(AA* paa) +{ + //printf("_aaBalance()\n"); + if (paa.a) + { + aaA*[16] tmp; + aaA*[] array = tmp; + auto aa = paa.a; + foreach (j, e; aa.b) + { + /* Temporarily store contents of bucket in array[] + */ + size_t k = 0; + void addToArray(aaA* e) + { + while (e) + { addToArray(e.left); + if (k == array.length) + array.length = array.length * 2; + array[k++] = e; + e = e.right; + } + } + addToArray(e); + /* The contents of the bucket are now sorted into array[]. + * Rebuild the tree. + */ + void buildTree(aaA** p, size_t x1, size_t x2) + { + if (x1 >= x2) + *p = null; + else + { auto mid = (x1 + x2) >> 1; + *p = array[mid]; + buildTree(&(*p).left, x1, mid); + buildTree(&(*p).right, mid + 1, x2); + } + } + auto p = &aa.b[j]; + buildTree(p, 0, k); + } + } +} +/******************************************** + * Produce array of N byte keys from aa. + */ + +ArrayRet_t _aaKeys(AA aa, size_t keysize) +{ + byte[] res; + size_t resi; + + void _aaKeys_x(aaA* e) + { + do + { + memcpy(&res[resi * keysize], cast(byte*)(e + 1), keysize); + resi++; + if (e.left) + { if (!e.right) + { e = e.left; + continue; + } + _aaKeys_x(e.left); + } + e = e.right; + } while (e !is null); + } + + auto len = _aaLen(aa); + if (!len) + return 0; + res = (cast(byte*) gc_malloc(len * keysize, + !(aa.a.keyti.flags() & 1) ? BlkAttr.NO_SCAN : 0))[0 .. len * keysize]; + resi = 0; + foreach (e; aa.a.b) + { + if (e) + _aaKeys_x(e); + } + assert(resi == len); + + Array a; + a.length = len; + a.ptr = res.ptr; + return *cast(ArrayRet_t*)(&a); +} + + +/********************************************** + * 'apply' for associative arrays - to support foreach + */ + +// dg is D, but _aaApply() is C +extern (D) typedef int delegate(void *) dg_t; + +int _aaApply(AA aa, size_t keysize, dg_t dg) +in +{ + assert(aligntsize(keysize) == keysize); +} +body +{ int result; + + //printf("_aaApply(aa = x%llx, keysize = %d, dg = x%llx)\n", aa.a, keysize, dg); + + int treewalker(aaA* e) + { int result; + + do + { + //printf("treewalker(e = %p, dg = x%llx)\n", e, dg); + result = dg(cast(void *)(e + 1) + keysize); + if (result) + break; + if (e.right) + { if (!e.left) + { + e = e.right; + continue; + } + result = treewalker(e.right); + if (result) + break; + } + e = e.left; + } while (e); + + return result; + } + + if (aa.a) + { + foreach (e; aa.a.b) + { + if (e) + { + result = treewalker(e); + if (result) + break; + } + } + } + return result; +} + +// dg is D, but _aaApply2() is C +extern (D) typedef int delegate(void *, void *) dg2_t; + +int _aaApply2(AA aa, size_t keysize, dg2_t dg) +in +{ + assert(aligntsize(keysize) == keysize); +} +body +{ int result; + + //printf("_aaApply(aa = x%llx, keysize = %d, dg = x%llx)\n", aa.a, keysize, dg); + + int treewalker(aaA* e) + { int result; + + do + { + //printf("treewalker(e = %p, dg = x%llx)\n", e, dg); + result = dg(cast(void *)(e + 1), cast(void *)(e + 1) + keysize); + if (result) + break; + if (e.right) + { if (!e.left) + { + e = e.right; + continue; + } + result = treewalker(e.right); + if (result) + break; + } + e = e.left; + } while (e); + + return result; + } + + if (aa.a) + { + foreach (e; aa.a.b) + { + if (e) + { + result = treewalker(e); + if (result) + break; + } + } + } + return result; +} + + +/*********************************** + * Construct an associative array of type ti from + * length pairs of key/value pairs. + */ + +extern (C) +BB* _d_assocarrayliteralT(TypeInfo_AssociativeArray ti, size_t length, ...) +{ + auto valuesize = ti.next.tsize(); // value size + auto keyti = ti.key; + auto keysize = keyti.tsize(); // key size + BB* result; + + //printf("_d_assocarrayliteralT(keysize = %d, valuesize = %d, length = %d)\n", keysize, valuesize, length); + //printf("tivalue = %.*s\n", ti.next.classinfo.name); + if (length == 0 || valuesize == 0 || keysize == 0) + { + ; + } + else + { + va_list q; + va_start!(size_t)(q, length); + + result = new BB(); + result.keyti = keyti; + size_t i; + + for (i = 0; i < prime_list.length - 1; i++) + { + if (length <= prime_list[i]) + break; + } + auto len = prime_list[i]; + result.b = new aaA*[len]; + + size_t keystacksize = (keysize + int.sizeof - 1) & ~(int.sizeof - 1); + size_t valuestacksize = (valuesize + int.sizeof - 1) & ~(int.sizeof - 1); + + size_t keytsize = aligntsize(keysize); + + for (size_t j = 0; j < length; j++) + { void* pkey = q; + q += keystacksize; + void* pvalue = q; + q += valuestacksize; + aaA* e; + + auto key_hash = keyti.getHash(pkey); + //printf("hash = %d\n", key_hash); + i = key_hash % len; + auto pe = &result.b[i]; + while (1) + { + e = *pe; + if (!e) + { + // Not found, create new elem + //printf("create new one\n"); + e = cast(aaA *) cast(void*) new void[aaA.sizeof + keytsize + valuesize]; + memcpy(e + 1, pkey, keysize); + e.hash = key_hash; + *pe = e; + result.nodes++; + break; + } + if (key_hash == e.hash) + { + auto c = keyti.compare(pkey, e + 1); + if (c == 0) + break; + pe = (c < 0) ? &e.left : &e.right; + } + else + pe = (key_hash < e.hash) ? &e.left : &e.right; + } + memcpy(cast(void *)(e + 1) + keytsize, pvalue, valuesize); + } + + va_end(q); + } + return result; +} diff --git a/druntime/src/compiler/dmd/adi.d b/druntime/src/compiler/dmd/adi.d new file mode 100644 index 00000000..7f401ad1 --- /dev/null +++ b/druntime/src/compiler/dmd/adi.d @@ -0,0 +1,603 @@ +/** + * Implementation of dynamic array property support routines. + * + * Copyright: Copyright Digital Mars 2000 - 2009. + * License: = lo); + } + if (lo == hi) + break; + + debug(adi) printf("\tstridelo = %d, stridehi = %d\n", stridelo, stridehi); + if (stridelo == stridehi) + { + + memcpy(tmp.ptr, lo, stridelo); + memcpy(lo, hi, stridelo); + memcpy(hi, tmp.ptr, stridelo); + lo += stridelo; + hi--; + continue; + } + + /* Shift the whole array. This is woefully inefficient + */ + memcpy(tmp.ptr, hi, stridehi); + memcpy(tmplo.ptr, lo, stridelo); + memmove(lo + stridehi, lo + stridelo , (hi - lo) - stridelo); + memcpy(lo, tmp.ptr, stridehi); + memcpy(hi + stridehi - stridelo, tmplo.ptr, stridelo); + + lo += stridehi; + hi = hi - 1 + (stridehi - stridelo); + } + } + return *cast(long*)(&a); +} + +unittest +{ + auto a = "abcd"c[]; + + auto r = a.dup.reverse; + //writefln(r); + assert(r == "dcba"); + + a = "a\u1235\u1234c"; + //writefln(a); + r = a.dup.reverse; + //writefln(r); + assert(r == "c\u1234\u1235a"); + + a = "ab\u1234c"; + //writefln(a); + r = a.dup.reverse; + //writefln(r); + assert(r == "c\u1234ba"); + + a = "\u3026\u2021\u3061\n"; + r = a.dup.reverse; + assert(r == "\n\u3061\u2021\u3026"); +} + + +/********************************************** + * Reverse array of wchars. + * Handled separately because embedded multiword encodings should not be + * reversed. + */ + +extern (C) long _adReverseWchar(wchar[] a) +{ + if (a.length > 1) + { + wchar[2] tmp; + wchar* lo = a.ptr; + wchar* hi = &a[length - 1]; + + while (lo < hi) + { auto clo = *lo; + auto chi = *hi; + + if ((clo < 0xD800 || clo > 0xDFFF) && + (chi < 0xD800 || chi > 0xDFFF)) + { + *lo = chi; + *hi = clo; + lo++; + hi--; + continue; + } + + int stridelo = 1 + (clo >= 0xD800 && clo <= 0xDBFF); + + int stridehi = 1; + if (chi >= 0xDC00 && chi <= 0xDFFF) + { + chi = *--hi; + stridehi++; + assert(hi >= lo); + } + if (lo == hi) + break; + + if (stridelo == stridehi) + { int stmp; + + assert(stridelo == 2); + assert(stmp.sizeof == 2 * (*lo).sizeof); + stmp = *cast(int*)lo; + *cast(int*)lo = *cast(int*)hi; + *cast(int*)hi = stmp; + lo += stridelo; + hi--; + continue; + } + + /* Shift the whole array. This is woefully inefficient + */ + memcpy(tmp.ptr, hi, stridehi * wchar.sizeof); + memcpy(hi + stridehi - stridelo, lo, stridelo * wchar.sizeof); + memmove(lo + stridehi, lo + stridelo , (hi - (lo + stridelo)) * wchar.sizeof); + memcpy(lo, tmp.ptr, stridehi * wchar.sizeof); + + lo += stridehi; + hi = hi - 1 + (stridehi - stridelo); + } + } + return *cast(long*)(&a); +} + +unittest +{ + wstring a = "abcd"; + + auto r = a.dup.reverse; + assert(r == "dcba"); + + a = "a\U00012356\U00012346c"; + r = a.dup.reverse; + assert(r == "c\U00012346\U00012356a"); + + a = "ab\U00012345c"; + r = a.dup.reverse; + assert(r == "c\U00012345ba"); +} + + +/********************************************** + * Support for array.reverse property. + */ + +extern (C) long _adReverse(Array a, size_t szelem) +out (result) +{ + assert(result is *cast(long*)(&a)); +} +body +{ + if (a.length >= 2) + { + byte* tmp; + byte[16] buffer; + + void* lo = a.ptr; + void* hi = a.ptr + (a.length - 1) * szelem; + + tmp = buffer.ptr; + if (szelem > 16) + { + //version (Windows) + tmp = cast(byte*) alloca(szelem); + //else + //tmp = gc_malloc(szelem); + } + + for (; lo < hi; lo += szelem, hi -= szelem) + { + memcpy(tmp, lo, szelem); + memcpy(lo, hi, szelem); + memcpy(hi, tmp, szelem); + } + + version (Windows) + { + } + else + { + //if (szelem > 16) + // BUG: bad code is generate for delete pointer, tries + // to call delclass. + //gc_free(tmp); + } + } + return *cast(long*)(&a); +} + +unittest +{ + debug(adi) printf("array.reverse.unittest\n"); + + int[] a = new int[5]; + int[] b; + size_t i; + + for (i = 0; i < 5; i++) + a[i] = i; + b = a.reverse; + assert(b is a); + for (i = 0; i < 5; i++) + assert(a[i] == 4 - i); + + struct X20 + { // More than 16 bytes in size + int a; + int b, c, d, e; + } + + X20[] c = new X20[5]; + X20[] d; + + for (i = 0; i < 5; i++) + { c[i].a = i; + c[i].e = 10; + } + d = c.reverse; + assert(d is c); + for (i = 0; i < 5; i++) + { + assert(c[i].a == 4 - i); + assert(c[i].e == 10); + } +} + +/********************************************** + * Sort array of chars. + */ + +extern (C) long _adSortChar(char[] a) +{ + if (a.length > 1) + { + dstring da = toUTF32(a); + da.sort; + size_t i = 0; + foreach (dchar d; da) + { char[4] buf; + auto t = toUTF8(buf, d); + a[i .. i + t.length] = t[]; + i += t.length; + } + delete da; + } + return *cast(long*)(&a); +} + +/********************************************** + * Sort array of wchars. + */ + +extern (C) long _adSortWchar(wchar[] a) +{ + if (a.length > 1) + { + dstring da = toUTF32(a); + da.sort; + size_t i = 0; + foreach (dchar d; da) + { wchar[2] buf; + auto t = toUTF16(buf, d); + a[i .. i + t.length] = t[]; + i += t.length; + } + delete da; + } + return *cast(long*)(&a); +} + +/*************************************** + * Support for array equality test. + * Returns: + * 1 equal + * 0 not equal + */ + +extern (C) int _adEq(Array a1, Array a2, TypeInfo ti) +{ + debug(adi) printf("_adEq(a1.length = %d, a2.length = %d)\n", a1.length, a2.length); + if (a1.length != a2.length) + return 0; // not equal + auto sz = ti.tsize(); + auto p1 = a1.ptr; + auto p2 = a2.ptr; + + if (sz == 1) + // We should really have a ti.isPOD() check for this + return (memcmp(p1, p2, a1.length) == 0); + + for (size_t i = 0; i < a1.length; i++) + { + if (!ti.equals(p1 + i * sz, p2 + i * sz)) + return 0; // not equal + } + return 1; // equal +} + +extern (C) int _adEq2(Array a1, Array a2, TypeInfo ti) +{ + debug(adi) printf("_adEq2(a1.length = %d, a2.length = %d)\n", a1.length, a2.length); + if (a1.length != a2.length) + return 0; // not equal + if (!ti.equals(&a1, &a2)) + return 0; + return 1; +} +unittest +{ + debug(adi) printf("array.Eq unittest\n"); + + auto a = "hello"c; + + assert(a != "hel"); + assert(a != "helloo"); + assert(a != "betty"); + assert(a == "hello"); + assert(a != "hxxxx"); +} + +/*************************************** + * Support for array compare test. + */ + +extern (C) int _adCmp(Array a1, Array a2, TypeInfo ti) +{ + debug(adi) printf("adCmp()\n"); + auto len = a1.length; + if (a2.length < len) + len = a2.length; + auto sz = ti.tsize(); + void *p1 = a1.ptr; + void *p2 = a2.ptr; + + if (sz == 1) + { // We should really have a ti.isPOD() check for this + auto c = memcmp(p1, p2, len); + if (c) + return c; + } + else + { + for (size_t i = 0; i < len; i++) + { + auto c = ti.compare(p1 + i * sz, p2 + i * sz); + if (c) + return c; + } + } + if (a1.length == a2.length) + return 0; + return (a1.length > a2.length) ? 1 : -1; +} + +extern (C) int _adCmp2(Array a1, Array a2, TypeInfo ti) +{ + debug(adi) printf("_adCmp2(a1.length = %d, a2.length = %d)\n", a1.length, a2.length); + return ti.compare(&a1, &a2); +} +unittest +{ + debug(adi) printf("array.Cmp unittest\n"); + + auto a = "hello"c; + + assert(a > "hel"); + assert(a >= "hel"); + assert(a < "helloo"); + assert(a <= "helloo"); + assert(a > "betty"); + assert(a >= "betty"); + assert(a == "hello"); + assert(a <= "hello"); + assert(a >= "hello"); +} + +/*************************************** + * Support for array compare test. + */ + +extern (C) int _adCmpChar(Array a1, Array a2) +{ + version (X86) + { + asm + { naked ; + + push EDI ; + push ESI ; + + mov ESI,a1+4[4+ESP] ; + mov EDI,a2+4[4+ESP] ; + + mov ECX,a1[4+ESP] ; + mov EDX,a2[4+ESP] ; + + cmp ECX,EDX ; + jb GotLength ; + + mov ECX,EDX ; + +GotLength: + cmp ECX,4 ; + jb DoBytes ; + + // Do alignment if neither is dword aligned + test ESI,3 ; + jz Aligned ; + + test EDI,3 ; + jz Aligned ; +DoAlign: + mov AL,[ESI] ; //align ESI to dword bounds + mov DL,[EDI] ; + + cmp AL,DL ; + jnz Unequal ; + + inc ESI ; + inc EDI ; + + test ESI,3 ; + + lea ECX,[ECX-1] ; + jnz DoAlign ; +Aligned: + mov EAX,ECX ; + + // do multiple of 4 bytes at a time + + shr ECX,2 ; + jz TryOdd ; + + repe ; + cmpsd ; + + jnz UnequalQuad ; + +TryOdd: + mov ECX,EAX ; +DoBytes: + // if still equal and not end of string, do up to 3 bytes slightly + // slower. + + and ECX,3 ; + jz Equal ; + + repe ; + cmpsb ; + + jnz Unequal ; +Equal: + mov EAX,a1[4+ESP] ; + mov EDX,a2[4+ESP] ; + + sub EAX,EDX ; + pop ESI ; + + pop EDI ; + ret ; + +UnequalQuad: + mov EDX,[EDI-4] ; + mov EAX,[ESI-4] ; + + cmp AL,DL ; + jnz Unequal ; + + cmp AH,DH ; + jnz Unequal ; + + shr EAX,16 ; + + shr EDX,16 ; + + cmp AL,DL ; + jnz Unequal ; + + cmp AH,DH ; +Unequal: + sbb EAX,EAX ; + pop ESI ; + + or EAX,1 ; + pop EDI ; + + ret ; + } + } + else + { + int len; + int c; + + debug(adi) printf("adCmpChar()\n"); + len = a1.length; + if (a2.length < len) + len = a2.length; + c = memcmp(cast(char *)a1.ptr, cast(char *)a2.ptr, len); + if (!c) + c = cast(int)a1.length - cast(int)a2.length; + return c; + } +} + +unittest +{ + debug(adi) printf("array.CmpChar unittest\n"); + + auto a = "hello"c; + + assert(a > "hel"); + assert(a >= "hel"); + assert(a < "helloo"); + assert(a <= "helloo"); + assert(a > "betty"); + assert(a >= "betty"); + assert(a == "hello"); + assert(a <= "hello"); + assert(a >= "hello"); +} diff --git a/druntime/src/compiler/dmd/alloca.d b/druntime/src/compiler/dmd/alloca.d new file mode 100644 index 00000000..d6381e35 --- /dev/null +++ b/druntime/src/compiler/dmd/alloca.d @@ -0,0 +1,135 @@ +/** + * Implementation of alloca() standard C routine. + * + * Copyright: Copyright Digital Mars 1990 - 2009. + * License: buf.sizeof) + tmp = alloca(element_size)[0 .. element_size]; + else + tmp = buf; + + + if (to.ptr <= from.ptr) + { + foreach (i; 0 .. to.length) + { + void* pto = to.ptr + i * element_size; + void* pfrom = from.ptr + i * element_size; + memcpy(tmp.ptr, pto, element_size); + memcpy(pto, pfrom, element_size); + ti.postblit(pto); + ti.destroy(tmp.ptr); + } + } + else + { + for (int i = to.length; i--; ) + { + void* pto = to.ptr + i * element_size; + void* pfrom = from.ptr + i * element_size; + memcpy(tmp.ptr, pto, element_size); + memcpy(pto, pfrom, element_size); + ti.postblit(pto); + ti.destroy(tmp.ptr); + } + } + return to; +} + +/** + * Does array initialization (not assignment) from another + * array of the same element type. + * ti is the element type. + */ +extern (C) void[] _d_arrayctor(TypeInfo ti, void[] from, void[] to) +{ + debug(PRINTF) printf("_d_arrayctor(from = %p,%d, to = %p,%d) size = %d\n", from.ptr, from.length, to.ptr, to.length, ti.tsize()); + + if (to.length != from.length) + { + char[10] tmp = void; + string msg = "lengths don't match for array initialization,"c; + msg ~= tmp.intToString(to.length) ~ " = " ~ tmp.intToString(from.length); + throw new Exception(msg); + } + + auto element_size = ti.tsize(); + + int i; + try + { + for (i = 0; i < to.length; i++) + { + // Copy construction is defined as bit copy followed by postblit. + memcpy(to.ptr + i * element_size, from.ptr + i * element_size, element_size); + ti.postblit(to.ptr + i * element_size); + } + } + catch (Object o) + { + /* Destroy, in reverse order, what we've constructed so far + */ + while (i--) + { + ti.destroy(to.ptr + i * element_size); + } + + throw o; + } + return to; +} + + +/** + * Do assignment to an array. + * p[0 .. count] = value; + */ +extern (C) void* _d_arraysetassign(void* p, void* value, int count, TypeInfo ti) +{ + void* pstart = p; + + auto element_size = ti.tsize(); + + //Need a temporary buffer tmp[] big enough to hold one element + void[16] buf = void; + void[] tmp; + if (element_size > buf.sizeof) + { + tmp = alloca(element_size)[0 .. element_size]; + } + else + tmp = buf; + + foreach (i; 0 .. count) + { + memcpy(tmp.ptr, p, element_size); + memcpy(p, value, element_size); + ti.postblit(p); + ti.destroy(tmp.ptr); + p += element_size; + } + return pstart; +} + +/** + * Do construction of an array. + * ti[count] p = value; + */ +extern (C) void* _d_arraysetctor(void* p, void* value, int count, TypeInfo ti) +{ + void* pstart = p; + auto element_size = ti.tsize(); + + try + { + foreach (i; 0 .. count) + { + // Copy construction is defined as bit copy followed by postblit. + memcpy(p, value, element_size); + ti.postblit(p); + p += element_size; + } + } + catch (Object o) + { + // Destroy, in reverse order, what we've constructed so far + while (p > pstart) + { + p -= element_size; + ti.destroy(p); + } + + throw o; + } + return pstart; +} diff --git a/druntime/src/compiler/dmd/arraybyte.d b/druntime/src/compiler/dmd/arraybyte.d new file mode 100644 index 00000000..e3d50a25 --- /dev/null +++ b/druntime/src/compiler/dmd/arraybyte.d @@ -0,0 +1,1893 @@ +/** + * Contains SSE2 and MMX versions of certain operations for char, byte, and + * ubyte ('a', 'g' and 'h' suffixes). + * + * Copyright: Copyright Digital Mars 2008 - 2009. + * License: = 64) + { + auto n = aptr + (a.length & ~63); + + uint l = cast(ubyte) value; + l |= (l << 8); + l |= (l << 16); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startaddsse2u: + add ESI, 64; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + movdqu XMM2, [EAX+32]; + movdqu XMM3, [EAX+48]; + add EAX, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM4; + paddb XMM2, XMM4; + paddb XMM3, XMM4; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startaddsse2a: + add ESI, 64; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + movdqa XMM2, [EAX+32]; + movdqa XMM3, [EAX+48]; + add EAX, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM4; + paddb XMM2, XMM4; + paddb XMM3, XMM4; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 1000% faster + if (mmx() && a.length >= 32) + { + auto n = aptr + (a.length & ~31); + + uint l = cast(ubyte) value; + l |= (l << 8); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM4, l; + pshufw MM4, MM4, 0; + + align 4; + startaddmmx: + add ESI, 32; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + add EAX, 32; + paddb MM0, MM4; + paddb MM1, MM4; + paddb MM2, MM4; + paddb MM3, MM4; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startaddmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + /* trying to be fair and treat normal 32-bit cpu the same way as we do + * the SIMD units, with unrolled asm. There's not enough registers, + * really. + */ + else + if (a.length >= 4) + { + + auto n = aptr + (a.length & ~3); + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov CL, value; + + align 4; + startadd386: + add ESI, 4; + mov DX, [EAX]; + mov BX, [EAX+2]; + add EAX, 4; + add BL, CL; + add BH, CL; + add DL, CL; + add DH, CL; + mov [ESI -4], DX; + mov [ESI+2 -4], BX; + cmp ESI, EDI; + jb startadd386; + + mov aptr, ESI; + mov bptr, EAX; + } + + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ + value); + + return a; +} + +unittest +{ + printf("_arraySliceExpAddSliceAssign_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] + c[] + */ + +T[] _arraySliceSliceAddSliceAssign_a(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceAddSliceAssign_g(a, c, b); +} + +T[] _arraySliceSliceAddSliceAssign_h(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceAddSliceAssign_g(a, c, b); +} + +T[] _arraySliceSliceAddSliceAssign_g(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceAddSliceAssign_g()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 5739% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + version (log) printf("\tsse2 unaligned\n"); + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 8; + startaddlsse2u: + add ESI, 64; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + movdqu XMM2, [EAX+32]; + movdqu XMM3, [EAX+48]; + add EAX, 64; + movdqu XMM4, [ECX]; + movdqu XMM5, [ECX+16]; + movdqu XMM6, [ECX+32]; + movdqu XMM7, [ECX+48]; + add ECX, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM5; + paddb XMM2, XMM6; + paddb XMM3, XMM7; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddlsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + version (log) printf("\tsse2 aligned\n"); + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 8; + startaddlsse2a: + add ESI, 64; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + movdqa XMM2, [EAX+32]; + movdqa XMM3, [EAX+48]; + add EAX, 64; + movdqa XMM4, [ECX]; + movdqa XMM5, [ECX+16]; + movdqa XMM6, [ECX+32]; + movdqa XMM7, [ECX+48]; + add ECX, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM5; + paddb XMM2, XMM6; + paddb XMM3, XMM7; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddlsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 4428% faster + if (mmx() && a.length >= 32) + { + version (log) printf("\tmmx\n"); + auto n = aptr + (a.length & ~31); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startaddlmmx: + add ESI, 32; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + add EAX, 32; + movq MM4, [ECX]; + movq MM5, [ECX+8]; + movq MM6, [ECX+16]; + movq MM7, [ECX+24]; + add ECX, 32; + paddb MM0, MM4; + paddb MM1, MM5; + paddb MM2, MM6; + paddb MM3, MM7; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startaddlmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + version (log) if (aptr < aend) printf("\tbase\n"); + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ + *cptr++); + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddSliceAssign_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += value + */ + +T[] _arrayExpSliceAddass_a(T[] a, T value) +{ + return _arrayExpSliceAddass_g(a, value); +} + +T[] _arrayExpSliceAddass_h(T[] a, T value) +{ + return _arrayExpSliceAddass_g(a, value); +} + +T[] _arrayExpSliceAddass_g(T[] a, T value) +{ + //printf("_arrayExpSliceAddass_g(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1578% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + uint l = cast(ubyte) value; + l |= (l << 8); + l |= (l << 16); + + if (((cast(uint) aptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startaddasssse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + movdqu XMM2, [ESI+32]; + movdqu XMM3, [ESI+48]; + add ESI, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM4; + paddb XMM2, XMM4; + paddb XMM3, XMM4; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddasssse2u; + + mov aptr, ESI; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startaddasssse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + movdqa XMM2, [ESI+32]; + movdqa XMM3, [ESI+48]; + add ESI, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM4; + paddb XMM2, XMM4; + paddb XMM3, XMM4; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddasssse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 1721% faster + if (mmx() && a.length >= 32) + { + + auto n = aptr + (a.length & ~31); + + uint l = cast(ubyte) value; + l |= (l << 8); + + asm + { + mov ESI, aptr; + mov EDI, n; + movd MM4, l; + pshufw MM4, MM4, 0; + + align 8; + startaddassmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + add ESI, 32; + paddb MM0, MM4; + paddb MM1, MM4; + paddb MM2, MM4; + paddb MM3, MM4; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startaddassmmx; + + emms; + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ += value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceAddass_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] += 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += b[] + */ + +T[] _arraySliceSliceAddass_a(T[] a, T[] b) +{ + return _arraySliceSliceAddass_g(a, b); +} + +T[] _arraySliceSliceAddass_h(T[] a, T[] b) +{ + return _arraySliceSliceAddass_g(a, b); +} + +T[] _arraySliceSliceAddass_g(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceAddass_g()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 4727% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 8; + startaddasslsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + movdqu XMM2, [ESI+32]; + movdqu XMM3, [ESI+48]; + add ESI, 64; + movdqu XMM4, [ECX]; + movdqu XMM5, [ECX+16]; + movdqu XMM6, [ECX+32]; + movdqu XMM7, [ECX+48]; + add ECX, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM5; + paddb XMM2, XMM6; + paddb XMM3, XMM7; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddasslsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 8; + startaddasslsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + movdqa XMM2, [ESI+32]; + movdqa XMM3, [ESI+48]; + add ESI, 64; + movdqa XMM4, [ECX]; + movdqa XMM5, [ECX+16]; + movdqa XMM6, [ECX+32]; + movdqa XMM7, [ECX+48]; + add ECX, 64; + paddb XMM0, XMM4; + paddb XMM1, XMM5; + paddb XMM2, XMM6; + paddb XMM3, XMM7; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startaddasslsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 3059% faster + if (mmx() && a.length >= 32) + { + + auto n = aptr + (a.length & ~31); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 8; + startaddasslmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + add ESI, 32; + movq MM4, [ECX]; + movq MM5, [ECX+8]; + movq MM6, [ECX+16]; + movq MM7, [ECX+24]; + add ECX, 32; + paddb MM0, MM4; + paddb MM1, MM5; + paddb MM2, MM6; + paddb MM3, MM7; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startaddasslmmx; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ += *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddass_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] += b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + + +/*********************** + * Computes: + * a[] = b[] - value + */ + +T[] _arraySliceExpMinSliceAssign_a(T[] a, T value, T[] b) +{ + return _arraySliceExpMinSliceAssign_g(a, value, b); +} + +T[] _arraySliceExpMinSliceAssign_h(T[] a, T value, T[] b) +{ + return _arraySliceExpMinSliceAssign_g(a, value, b); +} + +T[] _arraySliceExpMinSliceAssign_g(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMinSliceAssign_g()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1189% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + uint l = cast(ubyte) value; + l |= (l << 8); + l |= (l << 16); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startsubsse2u: + add ESI, 64; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + movdqu XMM2, [EAX+32]; + movdqu XMM3, [EAX+48]; + add EAX, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM4; + psubb XMM2, XMM4; + psubb XMM3, XMM4; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsubsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startsubsse2a: + add ESI, 64; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + movdqa XMM2, [EAX+32]; + movdqa XMM3, [EAX+48]; + add EAX, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM4; + psubb XMM2, XMM4; + psubb XMM3, XMM4; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsubsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 1079% faster + if (mmx() && a.length >= 32) + { + auto n = aptr + (a.length & ~31); + + uint l = cast(ubyte) value; + l |= (l << 8); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM4, l; + pshufw MM4, MM4, 0; + + align 4; + startsubmmx: + add ESI, 32; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + add EAX, 32; + psubb MM0, MM4; + psubb MM1, MM4; + psubb MM2, MM4; + psubb MM3, MM4; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startsubmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + // trying to be fair and treat normal 32-bit cpu the same way as we do the SIMD units, with unrolled asm. There's not enough registers, really. + else + if (a.length >= 4) + { + auto n = aptr + (a.length & ~3); + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov CL, value; + + align 4; + startsub386: + add ESI, 4; + mov DX, [EAX]; + mov BX, [EAX+2]; + add EAX, 4; + sub BL, CL; + sub BH, CL; + sub DL, CL; + sub DH, CL; + mov [ESI -4], DX; + mov [ESI+2 -4], BX; + cmp ESI, EDI; + jb startsub386; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ - value); + + return a; +} + +unittest +{ + printf("_arraySliceExpMinSliceAssign_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] = b[] - 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(b[i] - 6)) + { + printf("[%d]: %d != %d - 6\n", i, c[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = value - b[] + */ + +T[] _arrayExpSliceMinSliceAssign_a(T[] a, T[] b, T value) +{ + return _arrayExpSliceMinSliceAssign_g(a, b, value); +} + +T[] _arrayExpSliceMinSliceAssign_h(T[] a, T[] b, T value) +{ + return _arrayExpSliceMinSliceAssign_g(a, b, value); +} + +T[] _arrayExpSliceMinSliceAssign_g(T[] a, T[] b, T value) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arrayExpSliceMinSliceAssign_g()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 8748% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + uint l = cast(ubyte) value; + l |= (l << 8); + l |= (l << 16); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startsubrsse2u: + add ESI, 64; + movdqa XMM5, XMM4; + movdqa XMM6, XMM4; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + psubb XMM5, XMM0; + psubb XMM6, XMM1; + movdqu [ESI -64], XMM5; + movdqu [ESI+16-64], XMM6; + movdqa XMM5, XMM4; + movdqa XMM6, XMM4; + movdqu XMM2, [EAX+32]; + movdqu XMM3, [EAX+48]; + add EAX, 64; + psubb XMM5, XMM2; + psubb XMM6, XMM3; + movdqu [ESI+32-64], XMM5; + movdqu [ESI+48-64], XMM6; + cmp ESI, EDI; + jb startsubrsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startsubrsse2a: + add ESI, 64; + movdqa XMM5, XMM4; + movdqa XMM6, XMM4; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + psubb XMM5, XMM0; + psubb XMM6, XMM1; + movdqa [ESI -64], XMM5; + movdqa [ESI+16-64], XMM6; + movdqa XMM5, XMM4; + movdqa XMM6, XMM4; + movdqa XMM2, [EAX+32]; + movdqa XMM3, [EAX+48]; + add EAX, 64; + psubb XMM5, XMM2; + psubb XMM6, XMM3; + movdqa [ESI+32-64], XMM5; + movdqa [ESI+48-64], XMM6; + cmp ESI, EDI; + jb startsubrsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 7397% faster + if (mmx() && a.length >= 32) + { + auto n = aptr + (a.length & ~31); + + uint l = cast(ubyte) value; + l |= (l << 8); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM4, l; + pshufw MM4, MM4, 0; + + align 4; + startsubrmmx: + add ESI, 32; + movq MM5, MM4; + movq MM6, MM4; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + psubb MM5, MM0; + psubb MM6, MM1; + movq [ESI -32], MM5; + movq [ESI+8 -32], MM6; + movq MM5, MM4; + movq MM6, MM4; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + add EAX, 32; + psubb MM5, MM2; + psubb MM6, MM3; + movq [ESI+16-32], MM5; + movq [ESI+24-32], MM6; + cmp ESI, EDI; + jb startsubrmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + + } + + while (aptr < aend) + *aptr++ = cast(T)(value - *bptr++); + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinSliceAssign_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] = 6 - b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(6 - b[i])) + { + printf("[%d]: %d != 6 - %d\n", i, c[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - c[] + */ + +T[] _arraySliceSliceMinSliceAssign_a(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMinSliceAssign_g(a, c, b); +} + +T[] _arraySliceSliceMinSliceAssign_h(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMinSliceAssign_g(a, c, b); +} + +T[] _arraySliceSliceMinSliceAssign_g(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 5756% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 8; + startsublsse2u: + add ESI, 64; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + movdqu XMM2, [EAX+32]; + movdqu XMM3, [EAX+48]; + add EAX, 64; + movdqu XMM4, [ECX]; + movdqu XMM5, [ECX+16]; + movdqu XMM6, [ECX+32]; + movdqu XMM7, [ECX+48]; + add ECX, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM5; + psubb XMM2, XMM6; + psubb XMM3, XMM7; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsublsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 8; + startsublsse2a: + add ESI, 64; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + movdqa XMM2, [EAX+32]; + movdqa XMM3, [EAX+48]; + add EAX, 64; + movdqa XMM4, [ECX]; + movdqa XMM5, [ECX+16]; + movdqa XMM6, [ECX+32]; + movdqa XMM7, [ECX+48]; + add ECX, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM5; + psubb XMM2, XMM6; + psubb XMM3, XMM7; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsublsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 4428% faster + if (mmx() && a.length >= 32) + { + auto n = aptr + (a.length & ~31); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 8; + startsublmmx: + add ESI, 32; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + add EAX, 32; + movq MM4, [ECX]; + movq MM5, [ECX+8]; + movq MM6, [ECX+16]; + movq MM7, [ECX+24]; + add ECX, 32; + psubb MM0, MM4; + psubb MM1, MM5; + psubb MM2, MM6; + psubb MM3, MM7; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startsublmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ - *cptr++); + + return a; +} + +unittest +{ + printf("_arraySliceSliceMinSliceAssign_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - b[i])) + { + printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= value + */ + +T[] _arrayExpSliceMinass_a(T[] a, T value) +{ + return _arrayExpSliceMinass_g(a, value); +} + +T[] _arrayExpSliceMinass_h(T[] a, T value) +{ + return _arrayExpSliceMinass_g(a, value); +} + +T[] _arrayExpSliceMinass_g(T[] a, T value) +{ + //printf("_arrayExpSliceMinass_g(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1577% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + uint l = cast(ubyte) value; + l |= (l << 8); + l |= (l << 16); + + if (((cast(uint) aptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startsubasssse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + movdqu XMM2, [ESI+32]; + movdqu XMM3, [ESI+48]; + add ESI, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM4; + psubb XMM2, XMM4; + psubb XMM3, XMM4; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsubasssse2u; + + mov aptr, ESI; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 8; + startsubasssse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + movdqa XMM2, [ESI+32]; + movdqa XMM3, [ESI+48]; + add ESI, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM4; + psubb XMM2, XMM4; + psubb XMM3, XMM4; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsubasssse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 1577% faster + if (mmx() && a.length >= 32) + { + + auto n = aptr + (a.length & ~31); + + uint l = cast(ubyte) value; + l |= (l << 8); + + asm + { + mov ESI, aptr; + mov EDI, n; + movd MM4, l; + pshufw MM4, MM4, 0; + + align 8; + startsubassmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + add ESI, 32; + psubb MM0, MM4; + psubb MM1, MM4; + psubb MM2, MM4; + psubb MM3, MM4; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startsubassmmx; + + emms; + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ -= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinass_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %d != %d - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] + */ + +T[] _arraySliceSliceMinass_a(T[] a, T[] b) +{ + return _arraySliceSliceMinass_g(a, b); +} + +T[] _arraySliceSliceMinass_h(T[] a, T[] b) +{ + return _arraySliceSliceMinass_g(a, b); +} + +T[] _arraySliceSliceMinass_g(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMinass_g()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 4800% faster + if (sse2() && a.length >= 64) + { + auto n = aptr + (a.length & ~63); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 8; + startsubasslsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + movdqu XMM2, [ESI+32]; + movdqu XMM3, [ESI+48]; + add ESI, 64; + movdqu XMM4, [ECX]; + movdqu XMM5, [ECX+16]; + movdqu XMM6, [ECX+32]; + movdqu XMM7, [ECX+48]; + add ECX, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM5; + psubb XMM2, XMM6; + psubb XMM3, XMM7; + movdqu [ESI -64], XMM0; + movdqu [ESI+16-64], XMM1; + movdqu [ESI+32-64], XMM2; + movdqu [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsubasslsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 8; + startsubasslsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + movdqa XMM2, [ESI+32]; + movdqa XMM3, [ESI+48]; + add ESI, 64; + movdqa XMM4, [ECX]; + movdqa XMM5, [ECX+16]; + movdqa XMM6, [ECX+32]; + movdqa XMM7, [ECX+48]; + add ECX, 64; + psubb XMM0, XMM4; + psubb XMM1, XMM5; + psubb XMM2, XMM6; + psubb XMM3, XMM7; + movdqa [ESI -64], XMM0; + movdqa [ESI+16-64], XMM1; + movdqa [ESI+32-64], XMM2; + movdqa [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsubasslsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 3107% faster + if (mmx() && a.length >= 32) + { + + auto n = aptr + (a.length & ~31); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 8; + startsubasslmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + add ESI, 32; + movq MM4, [ECX]; + movq MM5, [ECX+8]; + movq MM6, [ECX+16]; + movq MM7, [ECX+24]; + add ECX, 32; + psubb MM0, MM4; + psubb MM1, MM5; + psubb MM2, MM6; + psubb MM3, MM7; + movq [ESI -32], MM0; + movq [ESI+8 -32], MM1; + movq [ESI+16-32], MM2; + movq [ESI+24-32], MM3; + cmp ESI, EDI; + jb startsubasslmmx; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ -= *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMinass_g unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] -= b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - b[i])) + { + printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} diff --git a/druntime/src/compiler/dmd/arraycast.d b/druntime/src/compiler/dmd/arraycast.d new file mode 100644 index 00000000..755b5378 --- /dev/null +++ b/druntime/src/compiler/dmd/arraycast.d @@ -0,0 +1,94 @@ +/** + * Implementation of array cast support routines. + * + * Copyright: Copyright Digital Mars 2004 - 2009. + * License: = 8) + { + auto n = aptr + (b.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add EAX, 64; + movupd XMM4, [ECX]; + movupd XMM5, [ECX+16]; + movupd XMM6, [ECX+32]; + movupd XMM7, [ECX+48]; + add ESI, 64; + subpd XMM0, XMM4; + subpd XMM1, XMM5; + subpd XMM2, XMM6; + subpd XMM3, XMM7; + add ECX, 64; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + // Handle remainder + while (aptr < aend) + *aptr++ = *bptr++ - *cptr++; + + return a; +} + + +unittest +{ + printf("_arraySliceSliceMinSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - b[i])) + { + printf("[%d]: %g != %g - %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] + value + */ + +T[] _arraySliceExpAddSliceAssign_d(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpAddSliceAssign_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 305% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add EAX, 64; + addpd XMM0, XMM4; + addpd XMM1, XMM4; + addpd XMM2, XMM4; + addpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ + value; + + return a; +} + +unittest +{ + printf("_arraySliceExpAddSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %g != %g + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += value + */ + +T[] _arrayExpSliceAddass_d(T[] a, T value) +{ + //printf("_arrayExpSliceAddass_d(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 version is 114% faster + if (sse2() && a.length >= 8) + { + auto n = cast(T*)((cast(uint)aend) & ~7); + if (aptr < n) + + // Unaligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloopa: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + addpd XMM0, XMM4; + addpd XMM1, XMM4; + addpd XMM2, XMM4; + addpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ += value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceAddass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] += 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %g != %g + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += b[] + */ + +T[] _arraySliceSliceAddass_d(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceAddass_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 183% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov ECX, bptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + movupd XMM4, [ECX]; + movupd XMM5, [ECX+16]; + movupd XMM6, [ECX+32]; + movupd XMM7, [ECX+48]; + add ECX, 64; + addpd XMM0, XMM4; + addpd XMM1, XMM5; + addpd XMM2, XMM6; + addpd XMM3, XMM7; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ += *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] += b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %g != %g + %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - value + */ + +T[] _arraySliceExpMinSliceAssign_d(T[] a, T value, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMinSliceAssign_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 305% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add EAX, 64; + subpd XMM0, XMM4; + subpd XMM1, XMM4; + subpd XMM2, XMM4; + subpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ - value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMinSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = value - b[] + */ + +T[] _arrayExpSliceMinSliceAssign_d(T[] a, T[] b, T value) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arrayExpSliceMinSliceAssign_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 66% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movapd XMM5, XMM4; + movapd XMM6, XMM4; + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add EAX, 64; + subpd XMM5, XMM0; + subpd XMM6, XMM1; + movupd [ESI+ 0-64], XMM5; + movupd [ESI+16-64], XMM6; + movapd XMM5, XMM4; + movapd XMM6, XMM4; + subpd XMM5, XMM2; + subpd XMM6, XMM3; + movupd [ESI+32-64], XMM5; + movupd [ESI+48-64], XMM6; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = value - *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = 6 - a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(6 - a[i])) + { + printf("[%d]: %g != 6 - %g\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= value + */ + +T[] _arrayExpSliceMinass_d(T[] a, T value) +{ + //printf("_arrayExpSliceMinass_d(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 version is 115% faster + if (sse2() && a.length >= 8) + { + auto n = cast(T*)((cast(uint)aend) & ~7); + if (aptr < n) + + // Unaligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloopa: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + subpd XMM0, XMM4; + subpd XMM1, XMM4; + subpd XMM2, XMM4; + subpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ -= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] + */ + +T[] _arraySliceSliceMinass_d(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMinass_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 183% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov ECX, bptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + movupd XMM4, [ECX]; + movupd XMM5, [ECX+16]; + movupd XMM6, [ECX+32]; + movupd XMM7, [ECX+48]; + add ECX, 64; + subpd XMM0, XMM4; + subpd XMM1, XMM5; + subpd XMM2, XMM6; + subpd XMM3, XMM7; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ -= *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * value + */ + +T[] _arraySliceExpMulSliceAssign_d(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMulSliceAssign_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 304% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add EAX, 64; + mulpd XMM0, XMM4; + mulpd XMM1, XMM4; + mulpd XMM2, XMM4; + mulpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMulSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * c[] + */ + +T[] _arraySliceSliceMulSliceAssign_d(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceMulSliceAssign_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 329% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add ESI, 64; + movupd XMM4, [ECX]; + movupd XMM5, [ECX+16]; + movupd XMM6, [ECX+32]; + movupd XMM7, [ECX+48]; + add EAX, 64; + mulpd XMM0, XMM4; + mulpd XMM1, XMM5; + mulpd XMM2, XMM6; + mulpd XMM3, XMM7; + add ECX, 64; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * *cptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMulSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * b[i])) + { + printf("[%d]: %g != %g * %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= value + */ + +T[] _arrayExpSliceMulass_d(T[] a, T value) +{ + //printf("_arrayExpSliceMulass_d(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 version is 109% faster + if (sse2() && a.length >= 8) + { + auto n = cast(T*)((cast(uint)aend) & ~7); + if (aptr < n) + + // Unaligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movsd XMM4, value; + shufpd XMM4, XMM4, 0; + + align 8; + startsseloopa: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + mulpd XMM0, XMM4; + mulpd XMM1, XMM4; + mulpd XMM2, XMM4; + mulpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ *= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMulass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] *= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= b[] + */ + +T[] _arraySliceSliceMulass_d(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMulass_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 version is 205% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov ECX, bptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + movupd XMM4, [ECX]; + movupd XMM5, [ECX+16]; + movupd XMM6, [ECX+32]; + movupd XMM7, [ECX+48]; + add ECX, 64; + mulpd XMM0, XMM4; + mulpd XMM1, XMM5; + mulpd XMM2, XMM6; + mulpd XMM3, XMM7; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ *= *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMulass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] *= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] / value + */ + +T[] _arraySliceExpDivSliceAssign_d(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpDivSliceAssign_d()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + /* Multiplying by the reciprocal is faster, but does + * not produce as accurate an answer. + */ + T recip = cast(T)1 / value; + + version (D_InlineAsm_X86) + { + // SSE2 version is 299% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movsd XMM4, recip; + //movsd XMM4, value + //rcpsd XMM4, XMM4 + shufpd XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movupd XMM0, [EAX]; + movupd XMM1, [EAX+16]; + movupd XMM2, [EAX+32]; + movupd XMM3, [EAX+48]; + add EAX, 64; + mulpd XMM0, XMM4; + mulpd XMM1, XMM4; + mulpd XMM2, XMM4; + mulpd XMM3, XMM4; + //divpd XMM0, XMM4; + //divpd XMM1, XMM4; + //divpd XMM2, XMM4; + //divpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + { + *aptr++ = *bptr++ / value; + //*aptr++ = *bptr++ * recip; + } + + return a; +} + +unittest +{ + printf("_arraySliceExpDivSliceAssign_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] / 8; + + for (int i = 0; i < dim; i++) + { + //printf("[%d]: %g ?= %g / 8\n", i, c[i], a[i]); + if (c[i] != cast(T)(a[i] / 8)) + { + printf("[%d]: %g != %g / 8\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] /= value + */ + +T[] _arrayExpSliceDivass_d(T[] a, T value) +{ + //printf("_arrayExpSliceDivass_d(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + /* Multiplying by the reciprocal is faster, but does + * not produce as accurate an answer. + */ + T recip = cast(T)1 / value; + + version (D_InlineAsm_X86) + { + // SSE2 version is 65% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + // Unaligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movsd XMM4, recip; + //movsd XMM4, value + //rcpsd XMM4, XMM4 + shufpd XMM4, XMM4, 0; + + align 8; + startsseloopa: + movupd XMM0, [ESI]; + movupd XMM1, [ESI+16]; + movupd XMM2, [ESI+32]; + movupd XMM3, [ESI+48]; + add ESI, 64; + mulpd XMM0, XMM4; + mulpd XMM1, XMM4; + mulpd XMM2, XMM4; + mulpd XMM3, XMM4; + //divpd XMM0, XMM4; + //divpd XMM1, XMM4; + //divpd XMM2, XMM4; + //divpd XMM3, XMM4; + movupd [ESI+ 0-64], XMM0; + movupd [ESI+16-64], XMM1; + movupd [ESI+32-64], XMM2; + movupd [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ *= recip; + + return a; +} + + +unittest +{ + printf("_arrayExpSliceDivass_d unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] /= 8; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] / 8)) + { + printf("[%d]: %g != %g / 8\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] * value + */ + +T[] _arraySliceExpMulSliceMinass_d(T[] a, T value, T[] b) +{ + return _arraySliceExpMulSliceAddass_d(a, -value, b); +} + +/*********************** + * Computes: + * a[] += b[] * value + */ + +T[] _arraySliceExpMulSliceAddass_d(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + // Handle remainder + while (aptr < aend) + *aptr++ += *bptr++ * value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMulSliceAddass_d unittest\n"); + + cpuid = 1; + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 1; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = c[]; + c[] += a[] * 6; + + for (int i = 0; i < dim; i++) + { + //printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); + if (c[i] != cast(T)(b[i] + a[i] * 6)) + { + printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); + assert(0); + } + } + } + } +} diff --git a/druntime/src/compiler/dmd/arrayfloat.d b/druntime/src/compiler/dmd/arrayfloat.d new file mode 100644 index 00000000..fa73050c --- /dev/null +++ b/druntime/src/compiler/dmd/arrayfloat.d @@ -0,0 +1,2309 @@ +/** + * Contains SSE2 and MMX versions of certain operations for float. + * + * Copyright: Copyright Digital Mars 2008 - 2009. + * License: = 16) + { + version (log) printf("\tsse unaligned\n"); + auto n = aptr + (b.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + movups XMM4, [ECX]; + movups XMM5, [ECX+16]; + movups XMM6, [ECX+32]; + movups XMM7, [ECX+48]; + add ESI, 64; + addps XMM0, XMM4; + addps XMM1, XMM5; + addps XMM2, XMM6; + addps XMM3, XMM7; + add ECX, 64; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + // 3DNow! version is only 13% faster + if (amd3dnow() && b.length >= 8) + { + version (log) printf("\tamd3dnow\n"); + auto n = aptr + (b.length & ~7); + + asm + { + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + + align 4; + start3dnow: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfadd MM0, [ECX]; + pfadd MM1, [ECX+8]; + pfadd MM2, [ECX+16]; + pfadd MM3, [ECX+24]; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ECX, 32; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + // Handle remainder + version (log) if (aptr < aend) printf("\tbase\n"); + while (aptr < aend) + *aptr++ = *bptr++ + *cptr++; + + return a; +} + + +unittest +{ + printf("_arraySliceSliceAddSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %g != %g + %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - c[] + */ + +T[] _arraySliceSliceMinSliceAssign_f(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 834% faster + if (sse() && b.length >= 16) + { + auto n = aptr + (b.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + movups XMM4, [ECX]; + movups XMM5, [ECX+16]; + movups XMM6, [ECX+32]; + movups XMM7, [ECX+48]; + add ESI, 64; + subps XMM0, XMM4; + subps XMM1, XMM5; + subps XMM2, XMM6; + subps XMM3, XMM7; + add ECX, 64; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + // 3DNow! version is only 13% faster + if (amd3dnow() && b.length >= 8) + { + auto n = aptr + (b.length & ~7); + + asm + { + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + + align 4; + start3dnow: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfsub MM0, [ECX]; + pfsub MM1, [ECX+8]; + pfsub MM2, [ECX+16]; + pfsub MM3, [ECX+24]; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ECX, 32; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + // Handle remainder + while (aptr < aend) + *aptr++ = *bptr++ - *cptr++; + + return a; +} + + +unittest +{ + printf("_arraySliceSliceMinSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - b[i])) + { + printf("[%d]: %g != %gd - %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] + value + */ + +T[] _arraySliceExpAddSliceAssign_f(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpAddSliceAssign_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 665% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + addps XMM0, XMM4; + addps XMM1, XMM4; + addps XMM2, XMM4; + addps XMM3, XMM4; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + // 3DNow! version is 69% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + ulong w = *cast(uint *) &value; + ulong v = w | (w << 32L); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movq MM4, qword ptr [v]; + + align 8; + start3dnow: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfadd MM0, MM4; + pfadd MM1, MM4; + pfadd MM2, MM4; + pfadd MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ + value; + + return a; +} + +unittest +{ + printf("_arraySliceExpAddSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %g != %g + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += value + */ + +T[] _arrayExpSliceAddass_f(T[] a, T value) +{ + //printf("_arrayExpSliceAddass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE version is 302% faster + if (sse() && a.length >= 16) + { + // align pointer + auto n = cast(T*)((cast(uint)aptr + 15) & ~15); + while (aptr < n) + *aptr++ += value; + n = cast(T*)((cast(uint)aend) & ~15); + if (aptr < n) + + // Aligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloopa: + movaps XMM0, [ESI]; + movaps XMM1, [ESI+16]; + movaps XMM2, [ESI+32]; + movaps XMM3, [ESI+48]; + add ESI, 64; + addps XMM0, XMM4; + addps XMM1, XMM4; + addps XMM2, XMM4; + addps XMM3, XMM4; + movaps [ESI+ 0-64], XMM0; + movaps [ESI+16-64], XMM1; + movaps [ESI+32-64], XMM2; + movaps [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + else + // 3DNow! version is 63% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + ulong w = *cast(uint *) &value; + ulong v = w | (w << 32L); + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + movq MM4, qword ptr [v]; + + align 8; + start3dnow: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfadd MM0, MM4; + pfadd MM1, MM4; + pfadd MM2, MM4; + pfadd MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov dword ptr [aptr], ESI; + } + } + } + + while (aptr < aend) + *aptr++ += value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceAddass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] += 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %g != %g + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += b[] + */ + +T[] _arraySliceSliceAddass_f(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceAddass_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 468% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov ECX, bptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movups XMM0, [ESI]; + movups XMM1, [ESI+16]; + movups XMM2, [ESI+32]; + movups XMM3, [ESI+48]; + add ESI, 64; + movups XMM4, [ECX]; + movups XMM5, [ECX+16]; + movups XMM6, [ECX+32]; + movups XMM7, [ECX+48]; + add ECX, 64; + addps XMM0, XMM4; + addps XMM1, XMM5; + addps XMM2, XMM6; + addps XMM3, XMM7; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + // 3DNow! version is 57% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, dword ptr [aptr]; // destination operand + mov EDI, dword ptr [n]; // end comparison + mov ECX, dword ptr [bptr]; // right operand + + align 4; + start3dnow: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfadd MM0, [ECX]; + pfadd MM1, [ECX+8]; + pfadd MM2, [ECX+16]; + pfadd MM3, [ECX+24]; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add ECX, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov dword ptr [aptr], ESI; + mov dword ptr [bptr], ECX; + } + } + } + + while (aptr < aend) + *aptr++ += *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] += b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %g != %g + %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - value + */ + +T[] _arraySliceExpMinSliceAssign_f(T[] a, T value, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMinSliceAssign_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 622% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + subps XMM0, XMM4; + subps XMM1, XMM4; + subps XMM2, XMM4; + subps XMM3, XMM4; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + // 3DNow! version is 67% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + T[2] w; + + w[0] = w[1] = value; + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + mov EAX, dword ptr [bptr]; + movq MM4, qword ptr [w]; + + align 8; + start3dnow: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfsub MM0, MM4; + pfsub MM1, MM4; + pfsub MM2, MM4; + pfsub MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov dword ptr [aptr], ESI; + mov dword ptr [bptr], EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ - value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMinSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = value - b[] + */ + +T[] _arrayExpSliceMinSliceAssign_f(T[] a, T[] b, T value) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arrayExpSliceMinSliceAssign_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 690% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movaps XMM5, XMM4; + movaps XMM6, XMM4; + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + subps XMM5, XMM0; + subps XMM6, XMM1; + movups [ESI+ 0-64], XMM5; + movups [ESI+16-64], XMM6; + movaps XMM5, XMM4; + movaps XMM6, XMM4; + subps XMM5, XMM2; + subps XMM6, XMM3; + movups [ESI+32-64], XMM5; + movups [ESI+48-64], XMM6; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + // 3DNow! version is 67% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + ulong w = *cast(uint *) &value; + ulong v = w | (w << 32L); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movq MM4, qword ptr [v]; + + align 8; + start3dnow: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfsubr MM0, MM4; + pfsubr MM1, MM4; + pfsubr MM2, MM4; + pfsubr MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start3dnow; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = value - *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = 6 - a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(6 - a[i])) + { + printf("[%d]: %g != 6 - %g\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= value + */ + +T[] _arrayExpSliceMinass_f(T[] a, T value) +{ + //printf("_arrayExpSliceMinass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE version is 304% faster + if (sse() && a.length >= 16) + { + // align pointer + auto n = cast(T*)((cast(uint)aptr + 15) & ~15); + while (aptr < n) + *aptr++ -= value; + n = cast(T*)((cast(uint)aend) & ~15); + if (aptr < n) + + // Aligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloopa: + movaps XMM0, [ESI]; + movaps XMM1, [ESI+16]; + movaps XMM2, [ESI+32]; + movaps XMM3, [ESI+48]; + add ESI, 64; + subps XMM0, XMM4; + subps XMM1, XMM4; + subps XMM2, XMM4; + subps XMM3, XMM4; + movaps [ESI+ 0-64], XMM0; + movaps [ESI+16-64], XMM1; + movaps [ESI+32-64], XMM2; + movaps [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + else + // 3DNow! version is 63% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + ulong w = *cast(uint *) &value; + ulong v = w | (w << 32L); + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + movq MM4, qword ptr [v]; + + align 8; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfsub MM0, MM4; + pfsub MM1, MM4; + pfsub MM2, MM4; + pfsub MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + cmp ESI, EDI; + jb start; + + emms; + mov dword ptr [aptr], ESI; + } + } + } + + while (aptr < aend) + *aptr++ -= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceminass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] + */ + +T[] _arraySliceSliceMinass_f(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMinass_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 468% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov ECX, bptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movups XMM0, [ESI]; + movups XMM1, [ESI+16]; + movups XMM2, [ESI+32]; + movups XMM3, [ESI+48]; + add ESI, 64; + movups XMM4, [ECX]; + movups XMM5, [ECX+16]; + movups XMM6, [ECX+32]; + movups XMM7, [ECX+48]; + add ECX, 64; + subps XMM0, XMM4; + subps XMM1, XMM5; + subps XMM2, XMM6; + subps XMM3, XMM7; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + // 3DNow! version is 57% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, dword ptr [aptr]; // destination operand + mov EDI, dword ptr [n]; // end comparison + mov ECX, dword ptr [bptr]; // right operand + + align 4; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfsub MM0, [ECX]; + pfsub MM1, [ECX+8]; + pfsub MM2, [ECX+16]; + pfsub MM3, [ECX+24]; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add ECX, 32; + cmp ESI, EDI; + jb start; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ -= *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * value + */ + +T[] _arraySliceExpMulSliceAssign_f(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMulSliceAssign_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 607% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + mulps XMM0, XMM4; + mulps XMM1, XMM4; + mulps XMM2, XMM4; + mulps XMM3, XMM4; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + // 3DNow! version is 69% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + ulong w = *cast(uint *) &value; + ulong v = w | (w << 32L); + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + mov EAX, dword ptr [bptr]; + movq MM4, qword ptr [v]; + + align 8; + start: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfmul MM0, MM4; + pfmul MM1, MM4; + pfmul MM2, MM4; + pfmul MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMulSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * c[] + */ + +T[] _arraySliceSliceMulSliceAssign_f(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceMulSliceAssign_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 833% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; // left operand + mov ECX, cptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add ESI, 64; + movups XMM4, [ECX]; + movups XMM5, [ECX+16]; + movups XMM6, [ECX+32]; + movups XMM7, [ECX+48]; + add EAX, 64; + mulps XMM0, XMM4; + mulps XMM1, XMM5; + mulps XMM2, XMM6; + mulps XMM3, XMM7; + add ECX, 64; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + // 3DNow! version is only 13% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, dword ptr [aptr]; // destination operand + mov EDI, dword ptr [n]; // end comparison + mov EAX, dword ptr [bptr]; // left operand + mov ECX, dword ptr [cptr]; // right operand + + align 4; + start: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfmul MM0, [ECX]; + pfmul MM1, [ECX+8]; + pfmul MM2, [ECX+16]; + pfmul MM3, [ECX+24]; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ECX, 32; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * *cptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMulSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * b[i])) + { + printf("[%d]: %g != %g * %g\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= value + */ + +T[] _arrayExpSliceMulass_f(T[] a, T value) +{ + //printf("_arrayExpSliceMulass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE version is 303% faster + if (sse() && a.length >= 16) + { + // align pointer + auto n = cast(T*)((cast(uint)aptr + 15) & ~15); + while (aptr < n) + *aptr++ *= value; + n = cast(T*)((cast(uint)aend) & ~15); + if (aptr < n) + + // Aligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movss XMM4, value; + shufps XMM4, XMM4, 0; + + align 8; + startsseloopa: + movaps XMM0, [ESI]; + movaps XMM1, [ESI+16]; + movaps XMM2, [ESI+32]; + movaps XMM3, [ESI+48]; + add ESI, 64; + mulps XMM0, XMM4; + mulps XMM1, XMM4; + mulps XMM2, XMM4; + mulps XMM3, XMM4; + movaps [ESI+ 0-64], XMM0; + movaps [ESI+16-64], XMM1; + movaps [ESI+32-64], XMM2; + movaps [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + else + // 3DNow! version is 63% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + ulong w = *cast(uint *) &value; + ulong v = w | (w << 32L); + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + movq MM4, qword ptr [v]; + + align 8; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfmul MM0, MM4; + pfmul MM1, MM4; + pfmul MM2, MM4; + pfmul MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + cmp ESI, EDI; + jb start; + + emms; + mov dword ptr [aptr], ESI; + } + } + } + + while (aptr < aend) + *aptr++ *= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMulass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] *= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= b[] + */ + +T[] _arraySliceSliceMulass_f(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMulass_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE version is 525% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov ECX, bptr; // right operand + mov ESI, aptr; // destination operand + mov EDI, n; // end comparison + + align 8; + startsseloopb: + movups XMM0, [ESI]; + movups XMM1, [ESI+16]; + movups XMM2, [ESI+32]; + movups XMM3, [ESI+48]; + add ESI, 64; + movups XMM4, [ECX]; + movups XMM5, [ECX+16]; + movups XMM6, [ECX+32]; + movups XMM7, [ECX+48]; + add ECX, 64; + mulps XMM0, XMM4; + mulps XMM1, XMM5; + mulps XMM2, XMM6; + mulps XMM3, XMM7; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopb; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + // 3DNow! version is 57% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, dword ptr [aptr]; // destination operand + mov EDI, dword ptr [n]; // end comparison + mov ECX, dword ptr [bptr]; // right operand + + align 4; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfmul MM0, [ECX]; + pfmul MM1, [ECX+8]; + pfmul MM2, [ECX+16]; + pfmul MM3, [ECX+24]; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add ECX, 32; + cmp ESI, EDI; + jb start; + + emms; + mov dword ptr [aptr], ESI; + mov dword ptr [bptr], ECX; + } + } + } + + while (aptr < aend) + *aptr++ *= *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMulass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] *= 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] / value + */ + +T[] _arraySliceExpDivSliceAssign_f(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpDivSliceAssign_f()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + /* Multiplying by the reciprocal is faster, but does + * not produce as accurate an answer. + */ + T recip = cast(T)1 / value; + + version (D_InlineAsm_X86) + { + // SSE version is 587% faster + if (sse() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + // Unaligned case + asm + { + mov EAX, bptr; + mov ESI, aptr; + mov EDI, n; + movss XMM4, recip; + //movss XMM4, value + //rcpss XMM4, XMM4 + shufps XMM4, XMM4, 0; + + align 8; + startsseloop: + add ESI, 64; + movups XMM0, [EAX]; + movups XMM1, [EAX+16]; + movups XMM2, [EAX+32]; + movups XMM3, [EAX+48]; + add EAX, 64; + mulps XMM0, XMM4; + mulps XMM1, XMM4; + mulps XMM2, XMM4; + mulps XMM3, XMM4; + //divps XMM0, XMM4; + //divps XMM1, XMM4; + //divps XMM2, XMM4; + //divps XMM3, XMM4; + movups [ESI+ 0-64], XMM0; + movups [ESI+16-64], XMM1; + movups [ESI+32-64], XMM2; + movups [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloop; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + // 3DNow! version is 72% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + T[2] w = void; + + w[0] = recip; + w[1] = recip; + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + mov EAX, dword ptr [bptr]; + movq MM4, qword ptr [w]; + + align 8; + start: + movq MM0, [EAX]; + movq MM1, [EAX+8]; + movq MM2, [EAX+16]; + movq MM3, [EAX+24]; + pfmul MM0, MM4; + pfmul MM1, MM4; + pfmul MM2, MM4; + pfmul MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + add EAX, 32; + cmp ESI, EDI; + jb start; + + emms; + mov dword ptr [aptr], ESI; + mov dword ptr [bptr], EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * recip; + + return a; +} + +unittest +{ + printf("_arraySliceExpDivSliceAssign_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] / 8; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] / 8)) + { + printf("[%d]: %g != %g / 8\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] /= value + */ + +T[] _arrayExpSliceDivass_f(T[] a, T value) +{ + //printf("_arrayExpSliceDivass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + /* Multiplying by the reciprocal is faster, but does + * not produce as accurate an answer. + */ + T recip = cast(T)1 / value; + + version (D_InlineAsm_X86) + { + // SSE version is 245% faster + if (sse() && a.length >= 16) + { + // align pointer + auto n = cast(T*)((cast(uint)aptr + 15) & ~15); + while (aptr < n) + *aptr++ *= recip; + n = cast(T*)((cast(uint)aend) & ~15); + if (aptr < n) + + // Aligned case + asm + { + mov ESI, aptr; + mov EDI, n; + movss XMM4, recip; + //movss XMM4, value + //rcpss XMM4, XMM4 + shufps XMM4, XMM4, 0; + + align 8; + startsseloopa: + movaps XMM0, [ESI]; + movaps XMM1, [ESI+16]; + movaps XMM2, [ESI+32]; + movaps XMM3, [ESI+48]; + add ESI, 64; + mulps XMM0, XMM4; + mulps XMM1, XMM4; + mulps XMM2, XMM4; + mulps XMM3, XMM4; + //divps XMM0, XMM4; + //divps XMM1, XMM4; + //divps XMM2, XMM4; + //divps XMM3, XMM4; + movaps [ESI+ 0-64], XMM0; + movaps [ESI+16-64], XMM1; + movaps [ESI+32-64], XMM2; + movaps [ESI+48-64], XMM3; + cmp ESI, EDI; + jb startsseloopa; + + mov aptr, ESI; + } + } + else + // 3DNow! version is 57% faster + if (amd3dnow() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + T[2] w = void; + + w[0] = w[1] = recip; + + asm + { + mov ESI, dword ptr [aptr]; + mov EDI, dword ptr [n]; + movq MM4, qword ptr [w]; + + align 8; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + movq MM2, [ESI+16]; + movq MM3, [ESI+24]; + pfmul MM0, MM4; + pfmul MM1, MM4; + pfmul MM2, MM4; + pfmul MM3, MM4; + movq [ESI], MM0; + movq [ESI+8], MM1; + movq [ESI+16], MM2; + movq [ESI+24], MM3; + add ESI, 32; + cmp ESI, EDI; + jb start; + + emms; + mov dword ptr [aptr], ESI; + } + } + } + + while (aptr < aend) + *aptr++ *= recip; + + return a; +} + +unittest +{ + printf("_arrayExpSliceDivass_f unittest\n"); + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + c[] /= 8; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] / 8)) + { + printf("[%d]: %g != %g / 8\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] * value + */ + +T[] _arraySliceExpMulSliceMinass_f(T[] a, T value, T[] b) +{ + return _arraySliceExpMulSliceAddass_f(a, -value, b); +} + +/*********************** + * Computes: + * a[] += b[] * value + */ + +T[] _arraySliceExpMulSliceAddass_f(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + // Handle remainder + while (aptr < aend) + *aptr++ += *bptr++ * value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMulSliceAddass_f unittest\n"); + + cpuid = 1; + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 1; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = c[]; + c[] += a[] * 6; + + for (int i = 0; i < dim; i++) + { + //printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); + if (c[i] != cast(T)(b[i] + a[i] * 6)) + { + printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); + assert(0); + } + } + } + } +} diff --git a/druntime/src/compiler/dmd/arrayint.d b/druntime/src/compiler/dmd/arrayint.d new file mode 100644 index 00000000..2df945c5 --- /dev/null +++ b/druntime/src/compiler/dmd/arrayint.d @@ -0,0 +1,2430 @@ +/** + * Contains MMX versions of certain operations for dchar, int, and uint ('w', + * 'i' and 'k' suffixes). + * + * Copyright: Copyright Digital Mars 2008 - 2009. + * License: = 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 298% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movq MM2, l; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + paddd MM0, MM2; + paddd MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + else + if (a.length >= 2) + { + auto n = aptr + (a.length & ~1); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov EDX, value; + + align 4; + start386: + add ESI, 8; + mov EBX, [EAX]; + mov ECX, [EAX+4]; + add EAX, 8; + add EBX, EDX; + add ECX, EDX; + mov [ESI -8], EBX; + mov [ESI+4-8], ECX; + cmp ESI, EDI; + jb start386; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ + value; + + return a; +} + +unittest +{ + printf("_arraySliceExpAddSliceAssign_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] + c[] + */ + +T[] _arraySliceSliceAddSliceAssign_w(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceAddSliceAssign_i(a, c, b); +} + +T[] _arraySliceSliceAddSliceAssign_k(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceAddSliceAssign_i(a, c, b); +} + +T[] _arraySliceSliceAddSliceAssign_i(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceAddSliceAssign_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1710% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM2, [ECX]; + movdqu XMM1, [EAX+16]; + movdqu XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM2, [ECX]; + movdqa XMM1, [EAX+16]; + movdqa XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 995% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM2, [ECX]; + movq MM1, [EAX+8]; + movq MM3, [ECX+8]; + add EAX, 16; + add ECX, 16; + paddd MM0, MM2; + paddd MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + +normal: + while (aptr < aend) + *aptr++ = *bptr++ + *cptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddSliceAssign_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += value + */ + +T[] _arrayExpSliceAddass_w(T[] a, T value) +{ + return _arrayExpSliceAddass_i(a, value); +} + +T[] _arrayExpSliceAddass_k(T[] a, T value) +{ + return _arrayExpSliceAddass_i(a, value); +} + +T[] _arrayExpSliceAddass_i(T[] a, T value) +{ + //printf("_arrayExpSliceAddass_i(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 83% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 81% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + movq MM2, l; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + paddd MM0, MM2; + paddd MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + } + } + else + if (a.length >= 2) + { + auto n = aptr + (a.length & ~1); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EDX, value; + + align 4; + start386: + mov EBX, [ESI]; + mov ECX, [ESI+4]; + add ESI, 8; + add EBX, EDX; + add ECX, EDX; + mov [ESI -8], EBX; + mov [ESI+4-8], ECX; + cmp ESI, EDI; + jb start386; + + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ += value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceAddass_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + a[] += 6; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(c[i] + 6)) + { + printf("[%d]: %d != %d + 6\n", i, a[i], c[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += b[] + */ + +T[] _arraySliceSliceAddass_w(T[] a, T[] b) +{ + return _arraySliceSliceAddass_i(a, b); +} + +T[] _arraySliceSliceAddass_k(T[] a, T[] b) +{ + return _arraySliceSliceAddass_i(a, b); +} + +T[] _arraySliceSliceAddass_i(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceAddass_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 695% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM2, [ECX]; + movdqu XMM1, [ESI+16]; + movdqu XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM2, [ECX]; + movdqa XMM1, [ESI+16]; + movdqa XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + paddd XMM0, XMM2; + paddd XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 471% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM2, [ECX]; + movq MM1, [ESI+8]; + movq MM3, [ECX+8]; + add ESI, 16; + add ECX, 16; + paddd MM0, MM2; + paddd MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + +normal: + while (aptr < aend) + *aptr++ += *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddass_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = c[]; + c[] += a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(b[i] + a[i])) + { + printf("[%d]: %d != %d + %d\n", i, c[i], b[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - value + */ + +T[] _arraySliceExpMinSliceAssign_w(T[] a, T value, T[] b) +{ + return _arraySliceExpMinSliceAssign_i(a, value, b); +} + +T[] _arraySliceExpMinSliceAssign_k(T[] a, T value, T[] b) +{ + return _arraySliceExpMinSliceAssign_i(a, value, b); +} + +T[] _arraySliceExpMinSliceAssign_i(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMinSliceAssign_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 400% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 315% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movq MM2, l; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + psubd MM0, MM2; + psubd MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + else + if (a.length >= 2) + { + auto n = aptr + (a.length & ~1); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov EDX, value; + + align 4; + start386: + add ESI, 8; + mov EBX, [EAX]; + mov ECX, [EAX+4]; + add EAX, 8; + sub EBX, EDX; + sub ECX, EDX; + mov [ESI -8], EBX; + mov [ESI+4-8], ECX; + cmp ESI, EDI; + jb start386; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ - value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMinSliceAssign_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %d != %d - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = value - b[] + */ + +T[] _arrayExpSliceMinSliceAssign_w(T[] a, T[] b, T value) +{ + return _arrayExpSliceMinSliceAssign_i(a, b, value); +} + +T[] _arrayExpSliceMinSliceAssign_k(T[] a, T[] b, T value) +{ + return _arrayExpSliceMinSliceAssign_i(a, b, value); +} + +T[] _arrayExpSliceMinSliceAssign_i(T[] a, T[] b, T value) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arrayExpSliceMinSliceAssign_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1812% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 4; + startaddsse2u: + add ESI, 32; + movdqu XMM2, [EAX]; + movdqu XMM3, [EAX+16]; + movdqa XMM0, XMM4; + movdqa XMM1, XMM4; + add EAX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM4, l; + pshufd XMM4, XMM4, 0; + + align 4; + startaddsse2a: + add ESI, 32; + movdqa XMM2, [EAX]; + movdqa XMM3, [EAX+16]; + movdqa XMM0, XMM4; + movdqa XMM1, XMM4; + add EAX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 1077% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movq MM4, l; + + align 4; + startmmx: + add ESI, 16; + movq MM2, [EAX]; + movq MM3, [EAX+8]; + movq MM0, MM4; + movq MM1, MM4; + add EAX, 16; + psubd MM0, MM2; + psubd MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = value - *bptr++; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinSliceAssign_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = 6 - a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(6 - a[i])) + { + printf("[%d]: %d != 6 - %d\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - c[] + */ + +T[] _arraySliceSliceMinSliceAssign_w(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMinSliceAssign_i(a, c, b); +} + +T[] _arraySliceSliceMinSliceAssign_k(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMinSliceAssign_i(a, c, b); +} + +T[] _arraySliceSliceMinSliceAssign_i(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1721% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM2, [ECX]; + movdqu XMM1, [EAX+16]; + movdqu XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM2, [ECX]; + movdqa XMM1, [EAX+16]; + movdqa XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 1002% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM2, [ECX]; + movq MM1, [EAX+8]; + movq MM3, [ECX+8]; + add EAX, 16; + add ECX, 16; + psubd MM0, MM2; + psubd MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ - *cptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMinSliceAssign_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - b[i])) + { + printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= value + */ + +T[] _arrayExpSliceMinass_w(T[] a, T value) +{ + return _arrayExpSliceMinass_i(a, value); +} + +T[] _arrayExpSliceMinass_k(T[] a, T value) +{ + return _arrayExpSliceMinass_i(a, value); +} + +T[] _arrayExpSliceMinass_i(T[] a, T value) +{ + //printf("_arrayExpSliceMinass_i(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 81% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 81% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + movq MM2, l; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + psubd MM0, MM2; + psubd MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + } + } + else + if (a.length >= 2) + { + auto n = aptr + (a.length & ~1); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EDX, value; + + align 4; + start386: + mov EBX, [ESI]; + mov ECX, [ESI+4]; + add ESI, 8; + sub EBX, EDX; + sub ECX, EDX; + mov [ESI -8], EBX; + mov [ESI+4-8], ECX; + cmp ESI, EDI; + jb start386; + + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ -= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinass_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + a[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(c[i] - 6)) + { + printf("[%d]: %d != %d - 6\n", i, a[i], c[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] + */ + +T[] _arraySliceSliceMinass_w(T[] a, T[] b) +{ + return _arraySliceSliceMinass_i(a, b); +} + +T[] _arraySliceSliceMinass_k(T[] a, T[] b) +{ + return _arraySliceSliceMinass_i(a, b); +} + +T[] _arraySliceSliceMinass_i(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMinass_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 731% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM2, [ECX]; + movdqu XMM1, [ESI+16]; + movdqu XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM2, [ECX]; + movdqa XMM1, [ESI+16]; + movdqa XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + psubd XMM0, XMM2; + psubd XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 441% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM2, [ECX]; + movq MM1, [ESI+8]; + movq MM3, [ECX+8]; + add ESI, 16; + add ECX, 16; + psubd MM0, MM2; + psubd MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ -= *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMinass_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = c[]; + c[] -= a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(b[i] - a[i])) + { + printf("[%d]: %d != %d - %d\n", i, c[i], b[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * value + */ + +T[] _arraySliceExpMulSliceAssign_w(T[] a, T value, T[] b) +{ + return _arraySliceExpMulSliceAssign_i(a, value, b); +} + +T[] _arraySliceExpMulSliceAssign_k(T[] a, T value, T[] b) +{ + return _arraySliceExpMulSliceAssign_i(a, value, b); +} + +T[] _arraySliceExpMulSliceAssign_i(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMulSliceAssign_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (none) // multiplying a pair is not supported by MMX + { + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1380% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + { + // MMX version is 1380% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movq MM2, l; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + pmuludq MM0, MM2; // only multiplies low 32 bits + pmuludq MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * value; + + return a; +} + +unittest +{ + printf("_arraySliceExpMulSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * 6; + + for (int i = 0; i < dim; i++) + { + //printf("[%d]: %d ?= %d * 6\n", i, c[i], a[i]); + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %d != %d * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * c[] + */ + +T[] _arraySliceSliceMulSliceAssign_w(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMulSliceAssign_i(a, c, b); +} + +T[] _arraySliceSliceMulSliceAssign_k(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMulSliceAssign_i(a, c, b); +} + +T[] _arraySliceSliceMulSliceAssign_i(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceMulSliceAssign_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (none) + { + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 1407% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM2, [ECX]; + movdqu XMM1, [EAX+16]; + movdqu XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM2, [ECX]; + movdqa XMM1, [EAX+16]; + movdqa XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 1029% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM2, [ECX]; + movq MM1, [EAX+8]; + movq MM3, [ECX+8]; + add EAX, 16; + add ECX, 16; + pmuludq MM0, MM2; + pmuludq MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + } + + while (aptr < aend) + *aptr++ = *bptr++ * *cptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMulSliceAssign_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * b[i])) + { + printf("[%d]: %d != %d * %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= value + */ + +T[] _arrayExpSliceMulass_w(T[] a, T value) +{ + return _arrayExpSliceMulass_i(a, value); +} + +T[] _arrayExpSliceMulass_k(T[] a, T value) +{ + return _arrayExpSliceMulass_i(a, value); +} + +T[] _arrayExpSliceMulass_i(T[] a, T value) +{ + //printf("_arrayExpSliceMulass_i(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (none) + { + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 400% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = value; + + if (((cast(uint) aptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 402% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); + + asm + { + mov ESI, aptr; + mov EDI, n; + movq MM2, l; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + pmuludq MM0, MM2; + pmuludq MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + } + } + } + } + + while (aptr < aend) + *aptr++ *= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMulass_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = a[]; + a[] *= 6; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(b[i] * 6)) + { + printf("[%d]: %d != %d * 6\n", i, a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= b[] + */ + +T[] _arraySliceSliceMulass_w(T[] a, T[] b) +{ + return _arraySliceSliceMulass_i(a, b); +} + +T[] _arraySliceSliceMulass_k(T[] a, T[] b) +{ + return _arraySliceSliceMulass_i(a, b); +} + +T[] _arraySliceSliceMulass_i(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMulass_i()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (none) + { + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 873% faster + if (sse2() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM2, [ECX]; + movdqu XMM1, [ESI+16]; + movdqu XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM2, [ECX]; + movdqa XMM1, [ESI+16]; + movdqa XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + pmuludq XMM0, XMM2; + pmuludq XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } +/+ BUG: comment out this section until we figure out what is going + wrong with the invalid pshufd instructions. + + else + // MMX version is 573% faster + if (mmx() && a.length >= 4) + { + auto n = aptr + (a.length & ~3); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM2, [ECX]; + movq MM1, [ESI+8]; + movq MM3, [ECX+8]; + pxor MM4, MM4; + pxor MM5, MM5; + punpckldq MM4, MM0; + punpckldq MM5, MM2; + add ESI, 16; + add ECX, 16; + pmuludq MM4, MM5; + pshufd MM4, MM4, 8; // ? + movq [ESI -16], MM4; + pxor MM4, MM4; + pxor MM5, MM5; + punpckldq MM4, MM1; + punpckldq MM5, MM3; + pmuludq MM4, MM5; + pshufd MM4, MM4, 8; // ? + movq [ESI+8-16], MM4; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } ++/ + } + } + + while (aptr < aend) + *aptr++ *= *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMulass_i unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = a[]; + a[] *= c[]; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(b[i] * c[i])) + { + printf("[%d]: %d != %d * %d\n", i, a[i], b[i], c[i]); + assert(0); + } + } + } + } +} diff --git a/druntime/src/compiler/dmd/arrayreal.d b/druntime/src/compiler/dmd/arrayreal.d new file mode 100644 index 00000000..6e4e07e8 --- /dev/null +++ b/druntime/src/compiler/dmd/arrayreal.d @@ -0,0 +1,241 @@ +/** + * Contains SSE2 and MMX versions of certain operations for real. + * + * Copyright: Copyright Digital Mars 2008 - 2009. + * License: = 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= (l << 16); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 3343% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM2, l; + pshufw MM2, MM2, 0; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + paddw MM0, MM2; + paddw MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ + value); + + return a; +} + +unittest +{ + printf("_arraySliceExpAddSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + 6)) + { + printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] + c[] + */ + +T[] _arraySliceSliceAddSliceAssign_u(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceAddSliceAssign_s(a, c, b); +} + +T[] _arraySliceSliceAddSliceAssign_t(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceAddSliceAssign_s(a, c, b); +} + +T[] _arraySliceSliceAddSliceAssign_s(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceAddSliceAssign_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 3777% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + movdqu XMM2, [ECX]; + movdqu XMM3, [ECX+16]; + add ECX, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + movdqa XMM2, [ECX]; + movdqa XMM3, [ECX+16]; + add ECX, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 2068% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + movq MM2, [ECX]; + movq MM3, [ECX+8]; + add ECX, 16; + paddw MM0, MM2; + paddw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ + *cptr++); + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] + b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] + b[i])) + { + printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += value + */ + +T[] _arrayExpSliceAddass_u(T[] a, T value) +{ + return _arrayExpSliceAddass_s(a, value); +} + +T[] _arrayExpSliceAddass_t(T[] a, T value) +{ + return _arrayExpSliceAddass_s(a, value); +} + +T[] _arrayExpSliceAddass_s(T[] a, T value) +{ + //printf("_arrayExpSliceAddass_s(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 832% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= (l << 16); + + if (((cast(uint) aptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 826% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + movd MM2, l; + pshufw MM2, MM2, 0; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + paddw MM0, MM2; + paddw MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ += value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceAddass_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + a[] += 6; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(c[i] + 6)) + { + printf("[%d]: %d != %d + 6\n", i, a[i], c[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] += b[] + */ + +T[] _arraySliceSliceAddass_u(T[] a, T[] b) +{ + return _arraySliceSliceAddass_s(a, b); +} + +T[] _arraySliceSliceAddass_t(T[] a, T[] b) +{ + return _arraySliceSliceAddass_s(a, b); +} + +T[] _arraySliceSliceAddass_s(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceAddass_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 2085% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + movdqu XMM2, [ECX]; + movdqu XMM3, [ECX+16]; + add ECX, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + movdqa XMM2, [ECX]; + movdqa XMM3, [ECX+16]; + add ECX, 32; + paddw XMM0, XMM2; + paddw XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 1022% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + movq MM2, [ECX]; + movq MM3, [ECX+8]; + add ECX, 16; + paddw MM0, MM2; + paddw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb start; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ += *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceAddass_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = c[]; + c[] += a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(b[i] + a[i])) + { + printf("[%d]: %d != %d + %d\n", i, c[i], b[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - value + */ + +T[] _arraySliceExpMinSliceAssign_u(T[] a, T value, T[] b) +{ + return _arraySliceExpMinSliceAssign_s(a, value, b); +} + +T[] _arraySliceExpMinSliceAssign_t(T[] a, T value, T[] b) +{ + return _arraySliceExpMinSliceAssign_s(a, value, b); +} + +T[] _arraySliceExpMinSliceAssign_s(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMinSliceAssign_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 3695% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= (l << 16); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 3049% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM2, l; + pshufw MM2, MM2, 0; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + psubw MM0, MM2; + psubw MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ - value); + + return a; +} + +unittest +{ + printf("_arraySliceExpMinSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - 6)) + { + printf("[%d]: %d != %d - 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = value - b[] + */ + +T[] _arrayExpSliceMinSliceAssign_u(T[] a, T[] b, T value) +{ + return _arrayExpSliceMinSliceAssign_s(a, b, value); +} + +T[] _arrayExpSliceMinSliceAssign_t(T[] a, T[] b, T value) +{ + return _arrayExpSliceMinSliceAssign_s(a, b, value); +} + +T[] _arrayExpSliceMinSliceAssign_s(T[] a, T[] b, T value) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arrayExpSliceMinSliceAssign_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 4995% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= (l << 16); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + + align 4; + startaddsse2u: + movd XMM2, l; + pshufd XMM2, XMM2, 0; + movd XMM3, l; + pshufd XMM3, XMM3, 0; + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + psubw XMM2, XMM0; + psubw XMM3, XMM1; + movdqu [ESI -32], XMM2; + movdqu [ESI+16-32], XMM3; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + + align 4; + startaddsse2a: + movd XMM2, l; + pshufd XMM2, XMM2, 0; + movd XMM3, l; + pshufd XMM3, XMM3, 0; + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + psubw XMM2, XMM0; + psubw XMM3, XMM1; + movdqa [ESI -32], XMM2; + movdqa [ESI+16-32], XMM3; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 4562% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM4, l; + pshufw MM4, MM4, 0; + + align 4; + startmmx: + add ESI, 16; + movq MM2, [EAX]; + movq MM3, [EAX+8]; + movq MM0, MM4; + movq MM1, MM4; + add EAX, 16; + psubw MM0, MM2; + psubw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(value - *bptr++); + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = 6 - a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(6 - a[i])) + { + printf("[%d]: %d != 6 - %d\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] - c[] + */ + +T[] _arraySliceSliceMinSliceAssign_u(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMinSliceAssign_s(a, c, b); +} + +T[] _arraySliceSliceMinSliceAssign_t(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMinSliceAssign_s(a, c, b); +} + +T[] _arraySliceSliceMinSliceAssign_s(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 4129% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + movdqu XMM2, [ECX]; + movdqu XMM3, [ECX+16]; + add ECX, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + movdqa XMM2, [ECX]; + movdqa XMM3, [ECX+16]; + add ECX, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 2018% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + movq MM2, [ECX]; + movq MM3, [ECX+8]; + add ECX, 16; + psubw MM0, MM2; + psubw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ - *cptr++); + + return a; +} + +unittest +{ + printf("_arraySliceSliceMinSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] - b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] - b[i])) + { + printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= value + */ + +T[] _arrayExpSliceMinass_u(T[] a, T value) +{ + return _arrayExpSliceMinass_s(a, value); +} + +T[] _arrayExpSliceMinass_t(T[] a, T value) +{ + return _arrayExpSliceMinass_s(a, value); +} + +T[] _arrayExpSliceMinass_s(T[] a, T value) +{ + //printf("_arrayExpSliceMinass_s(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 835% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= (l << 16); + + if (((cast(uint) aptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2u; + + mov aptr, ESI; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startaddsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startaddsse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 835% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + movd MM2, l; + pshufw MM2, MM2, 0; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + psubw MM0, MM2; + psubw MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ -= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMinass_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + a[] = c[]; + a[] -= 6; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(c[i] - 6)) + { + printf("[%d]: %d != %d - 6\n", i, a[i], c[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] -= b[] + */ + +T[] _arraySliceSliceMinass_u(T[] a, T[] b) +{ + return _arraySliceSliceMinass_s(a, b); +} + +T[] _arraySliceSliceMinass_t(T[] a, T[] b) +{ + return _arraySliceSliceMinass_s(a, b); +} + +T[] _arraySliceSliceMinass_s(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMinass_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 2121% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm // unaligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + movdqu XMM2, [ECX]; + movdqu XMM3, [ECX+16]; + add ECX, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm // aligned case + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + movdqa XMM2, [ECX]; + movdqa XMM3, [ECX+16]; + add ECX, 32; + psubw XMM0, XMM2; + psubw XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 1116% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + start: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + movq MM2, [ECX]; + movq MM3, [ECX+8]; + add ECX, 16; + psubw MM0, MM2; + psubw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb start; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ -= *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMinass_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = c[]; + c[] -= a[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(b[i] - a[i])) + { + printf("[%d]: %d != %d - %d\n", i, c[i], b[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * value + */ + +T[] _arraySliceExpMulSliceAssign_u(T[] a, T value, T[] b) +{ + return _arraySliceExpMulSliceAssign_s(a, value, b); +} + +T[] _arraySliceExpMulSliceAssign_t(T[] a, T value, T[] b) +{ + return _arraySliceExpMulSliceAssign_s(a, value, b); +} + +T[] _arraySliceExpMulSliceAssign_s(T[] a, T value, T[] b) +in +{ + assert(a.length == b.length); + assert(disjoint(a, b)); +} +body +{ + //printf("_arraySliceExpMulSliceAssign_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 3733% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= l << 16; + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM1, [EAX+16]; + add EAX, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM1, [EAX+16]; + add EAX, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + } + } + } + else + // MMX version is 3733% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + movd MM2, l; + pshufw MM2, MM2, 0; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM1, [EAX+8]; + add EAX, 16; + pmullw MM0, MM2; + pmullw MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ * value); + + return a; +} + +unittest +{ + printf("_arraySliceExpMulSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * 6; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * 6)) + { + printf("[%d]: %d != %d * 6\n", i, c[i], a[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] = b[] * c[] + */ + +T[] _arraySliceSliceMulSliceAssign_u(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMulSliceAssign_s(a, c, b); +} + +T[] _arraySliceSliceMulSliceAssign_t(T[] a, T[] c, T[] b) +{ + return _arraySliceSliceMulSliceAssign_s(a, c, b); +} + +T[] _arraySliceSliceMulSliceAssign_s(T[] a, T[] c, T[] b) +in +{ + assert(a.length == b.length && b.length == c.length); + assert(disjoint(a, b)); + assert(disjoint(a, c)); + assert(disjoint(b, c)); +} +body +{ + //printf("_arraySliceSliceMulSliceAssign_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + auto cptr = c.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 2515% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2u: + add ESI, 32; + movdqu XMM0, [EAX]; + movdqu XMM2, [ECX]; + movdqu XMM1, [EAX+16]; + movdqu XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startsse2a: + add ESI, 32; + movdqa XMM0, [EAX]; + movdqa XMM2, [ECX]; + movdqa XMM1, [EAX+16]; + movdqa XMM3, [ECX+16]; + add EAX, 32; + add ECX, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + else + // MMX version is 2515% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov EAX, bptr; + mov ECX, cptr; + + align 4; + startmmx: + add ESI, 16; + movq MM0, [EAX]; + movq MM2, [ECX]; + movq MM1, [EAX+8]; + movq MM3, [ECX+8]; + add EAX, 16; + add ECX, 16; + pmullw MM0, MM2; + pmullw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, EAX; + mov cptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ = cast(T)(*bptr++ * *cptr++); + + return a; +} + +unittest +{ + printf("_arraySliceSliceMulSliceAssign_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + c[] = a[] * b[]; + + for (int i = 0; i < dim; i++) + { + if (c[i] != cast(T)(a[i] * b[i])) + { + printf("[%d]: %d != %d * %d\n", i, c[i], a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= value + */ + +T[] _arrayExpSliceMulass_u(T[] a, T value) +{ + return _arrayExpSliceMulass_s(a, value); +} + +T[] _arrayExpSliceMulass_t(T[] a, T value) +{ + return _arrayExpSliceMulass_s(a, value); +} + +T[] _arrayExpSliceMulass_s(T[] a, T value) +{ + //printf("_arrayExpSliceMulass_s(a.length = %d, value = %Lg)\n", a.length, cast(real)value); + auto aptr = a.ptr; + auto aend = aptr + a.length; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 2044% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + uint l = cast(ushort) value; + l |= l << 16; + + if (((cast(uint) aptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM1, [ESI+16]; + add ESI, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM2; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + movd XMM2, l; + pshufd XMM2, XMM2, 0; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM1, [ESI+16]; + add ESI, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM2; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + } + } + } + else + // MMX version is 2056% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + uint l = cast(ushort) value; + + asm + { + mov ESI, aptr; + mov EDI, n; + movd MM2, l; + pshufw MM2, MM2, 0; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM1, [ESI+8]; + add ESI, 16; + pmullw MM0, MM2; + pmullw MM1, MM2; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + } + } + } + + while (aptr < aend) + *aptr++ *= value; + + return a; +} + +unittest +{ + printf("_arrayExpSliceMulass_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = a[]; + a[] *= 6; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(b[i] * 6)) + { + printf("[%d]: %d != %d * 6\n", i, a[i], b[i]); + assert(0); + } + } + } + } +} + + +/* ======================================================================== */ + +/*********************** + * Computes: + * a[] *= b[] + */ + +T[] _arraySliceSliceMulass_u(T[] a, T[] b) +{ + return _arraySliceSliceMulass_s(a, b); +} + +T[] _arraySliceSliceMulass_t(T[] a, T[] b) +{ + return _arraySliceSliceMulass_s(a, b); +} + +T[] _arraySliceSliceMulass_s(T[] a, T[] b) +in +{ + assert (a.length == b.length); + assert (disjoint(a, b)); +} +body +{ + //printf("_arraySliceSliceMulass_s()\n"); + auto aptr = a.ptr; + auto aend = aptr + a.length; + auto bptr = b.ptr; + + version (D_InlineAsm_X86) + { + // SSE2 aligned version is 2519% faster + if (sse2() && a.length >= 16) + { + auto n = aptr + (a.length & ~15); + + if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2u: + movdqu XMM0, [ESI]; + movdqu XMM2, [ECX]; + movdqu XMM1, [ESI+16]; + movdqu XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM3; + movdqu [ESI -32], XMM0; + movdqu [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2u; + + mov aptr, ESI; + mov bptr, ECX; + } + } + else + { + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startsse2a: + movdqa XMM0, [ESI]; + movdqa XMM2, [ECX]; + movdqa XMM1, [ESI+16]; + movdqa XMM3, [ECX+16]; + add ESI, 32; + add ECX, 32; + pmullw XMM0, XMM2; + pmullw XMM1, XMM3; + movdqa [ESI -32], XMM0; + movdqa [ESI+16-32], XMM1; + cmp ESI, EDI; + jb startsse2a; + + mov aptr, ESI; + mov bptr, ECX; + } + } + } + else + // MMX version is 1712% faster + if (mmx() && a.length >= 8) + { + auto n = aptr + (a.length & ~7); + + asm + { + mov ESI, aptr; + mov EDI, n; + mov ECX, bptr; + + align 4; + startmmx: + movq MM0, [ESI]; + movq MM2, [ECX]; + movq MM1, [ESI+8]; + movq MM3, [ECX+8]; + add ESI, 16; + add ECX, 16; + pmullw MM0, MM2; + pmullw MM1, MM3; + movq [ESI -16], MM0; + movq [ESI+8-16], MM1; + cmp ESI, EDI; + jb startmmx; + + emms; + mov aptr, ESI; + mov bptr, ECX; + } + } + } + + while (aptr < aend) + *aptr++ *= *bptr++; + + return a; +} + +unittest +{ + printf("_arraySliceSliceMulass_s unittest\n"); + + for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) + { + version (log) printf(" cpuid %d\n", cpuid); + + for (int j = 0; j < 2; j++) + { + const int dim = 67; + T[] a = new T[dim + j]; // aligned on 16 byte boundary + a = a[j .. dim + j]; // misalign for second iteration + T[] b = new T[dim + j]; + b = b[j .. dim + j]; + T[] c = new T[dim + j]; + c = c[j .. dim + j]; + + for (int i = 0; i < dim; i++) + { a[i] = cast(T)i; + b[i] = cast(T)(i + 7); + c[i] = cast(T)(i * 2); + } + + b[] = a[]; + a[] *= c[]; + + for (int i = 0; i < dim; i++) + { + if (a[i] != cast(T)(b[i] * c[i])) + { + printf("[%d]: %d != %d * %d\n", i, a[i], b[i], c[i]); + assert(0); + } + } + } + } +} diff --git a/druntime/src/compiler/dmd/cast_.d b/druntime/src/compiler/dmd/cast_.d new file mode 100644 index 00000000..88f69322 --- /dev/null +++ b/druntime/src/compiler/dmd/cast_.d @@ -0,0 +1,169 @@ +/** + * Implementation of array assignment support routines. + * + * Copyright: Copyright Digital Mars 2004 - 2009. + * License: Boost License 1.0. + * Authors: Walter Bright, Sean Kelly + * + * Copyright Digital Mars 2000 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module rt.cover; + +private +{ + version( Windows ) + import core.sys.windows.windows; + else version( Posix ) + { + import core.sys.posix.fcntl; + import core.sys.posix.unistd; + } + import core.bitop; + import core.stdc.stdio; + import rt.util.utf; + + struct BitArray + { + size_t len; + uint* ptr; + + bool opIndex( size_t i ) + in + { + assert( i < len ); + } + body + { + return cast(bool) bt( ptr, i ); + } + } + + struct Cover + { + string filename; + BitArray valid; + uint[] data; + } + + __gshared + { + Cover[] gdata; + string srcpath; + string dstpath; + bool merge; + } +} + + +/** + * Set path to where source files are located. + * + * Params: + * pathname = The new path name. + */ +extern (C) void dmd_coverSourcePath( string pathname ) +{ + srcpath = pathname; +} + + +/** + * Set path to where listing files are to be written. + * + * Params: + * pathname = The new path name. + */ +extern (C) void dmd_coverDestPath( string pathname ) +{ + dstpath = pathname; +} + + +/** + * Set merge mode. + * + * Params: + * flag = true means new data is summed with existing data in the listing + * file; false means a new listing file is always created. + */ +extern (C) void dmd_coverSetMerge( bool flag ) +{ + merge = flag; +} + + +/** + * The coverage callback. + * + * Params: + * filename = The name of the coverage file. + * valid = ??? + * data = ??? + */ +extern (C) void _d_cover_register( string filename, BitArray valid, uint[] data ) +{ + Cover c; + + c.filename = filename; + c.valid = valid; + c.data = data; + gdata ~= c; +} + + +static ~this() +{ + const NUMLINES = 16384 - 1; + const NUMCHARS = 16384 * 16 - 1; + + char[] srcbuf = new char[NUMCHARS]; + char[][] srclines = new char[][NUMLINES]; + char[] lstbuf = new char[NUMCHARS]; + char[][] lstlines = new char[][NUMLINES]; + + foreach( Cover c; gdata ) + { + if( !readFile( appendFN( srcpath, c.filename ), srcbuf ) ) + continue; + splitLines( srcbuf, srclines ); + + if( merge ) + { + if( !readFile( addExt( baseName( c.filename ), "lst" ), lstbuf ) ) + break; + splitLines( lstbuf, lstlines ); + + for( size_t i = 0; i < lstlines.length; ++i ) + { + if( i >= c.data.length ) + break; + + int count = 0; + + foreach( char c2; lstlines[i] ) + { + switch( c2 ) + { + case ' ': + continue; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + count = count * 10 + c2 - '0'; + continue; + default: + break; + } + } + c.data[i] += count; + } + } + + FILE* flst = fopen( (addExt( baseName( c.filename ), "lst\0" )).ptr, "wb" ); + + if( !flst ) + continue; //throw new Exception( "Error opening file for write: " ~ lstfn ); + + uint nno; + uint nyes; + + for( int i = 0; i < c.data.length; i++ ) + { + if( i < srclines.length ) + { + uint n = c.data[i]; + char[] line = srclines[i]; + + line = expandTabs( line ); + + if( n == 0 ) + { + if( c.valid[i] ) + { + nno++; + fprintf( flst, "0000000|%.*s\n", line ); + } + else + { + fprintf( flst, " |%.*s\n", line ); + } + } + else + { + nyes++; + fprintf( flst, "%7u|%.*s\n", n, line ); + } + } + } + if( nyes + nno ) // no divide by 0 bugs + { + fprintf( flst, "%.*s is %d%% covered\n", c.filename, ( nyes * 100 ) / ( nyes + nno ) ); + } + fclose( flst ); + } +} + + +string appendFN( string path, string name ) +{ + version( Windows ) + const char sep = '\\'; + else + const char sep = '/'; + + auto dest = path; + + if( dest && dest[$ - 1] != sep ) + dest ~= sep; + dest ~= name; + return dest; +} + + +string baseName( string name, string ext = null ) +{ + auto i = name.length; + for( ; i > 0; --i ) + { + version( Windows ) + { + if( name[i - 1] == ':' || name[i - 1] == '\\' ) + break; + } + else version( Posix ) + { + if( name[i - 1] == '/' ) + break; + } + } + return chomp( name[i .. $], ext ? ext : "" ); +} + + +string getExt( string name ) +{ + auto i = name.length; + + while( i > 0 ) + { + if( name[i - 1] == '.' ) + return name[i .. $]; + --i; + version( Windows ) + { + if( name[i] == ':' || name[i] == '\\' ) + break; + } + else version( Posix ) + { + if( name[i] == '/' ) + break; + } + } + return null; +} + + +string addExt( string name, string ext ) +{ + auto existing = getExt( name ); + + if( existing.length == 0 ) + { + if( name.length && name[$ - 1] == '.' ) + name ~= ext; + else + name = name ~ "." ~ ext; + } + else + { + name = name[0 .. $ - existing.length] ~ ext; + } + return name; +} + + +string chomp( string str, string delim = null ) +{ + if( delim is null ) + { + auto len = str.length; + + if( len ) + { + auto c = str[len - 1]; + + if( c == '\r' ) + --len; + else if( c == '\n' && str[--len - 1] == '\r' ) + --len; + } + return str[0 .. len]; + } + else if( str.length >= delim.length ) + { + if( str[$ - delim.length .. $] == delim ) + return str[0 .. $ - delim.length]; + } + return str; +} + + +bool readFile( string name, inout char[] buf ) +{ + version( Windows ) + { + auto wnamez = toUTF16z( name ); + HANDLE file = CreateFileW( wnamez, + GENERIC_READ, + FILE_SHARE_READ, + null, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, + cast(HANDLE) null ); + + delete wnamez; + if( file == INVALID_HANDLE_VALUE ) + return false; + scope( exit ) CloseHandle( file ); + + DWORD num = 0; + DWORD pos = 0; + + buf.length = 4096; + while( true ) + { + if( !ReadFile( file, &buf[pos], cast(DWORD)( buf.length - pos ), &num, null ) ) + return false; + if( !num ) + break; + pos += num; + buf.length = pos * 2; + } + buf.length = pos; + return true; + } + else version( Posix ) + { + char[] namez = new char[name.length + 1]; + namez[0 .. name.length] = name; + namez[$ - 1] = 0; + int file = open( namez.ptr, O_RDONLY ); + + delete namez; + if( file == -1 ) + return false; + scope( exit ) close( file ); + + int num = 0; + uint pos = 0; + + buf.length = 4096; + while( true ) + { + num = read( file, &buf[pos], cast(uint)( buf.length - pos ) ); + if( num == -1 ) + return false; + if( !num ) + break; + pos += num; + buf.length = pos * 2; + } + buf.length = pos; + return true; + } +} + + +void splitLines( char[] buf, inout char[][] lines ) +{ + size_t beg = 0, + pos = 0; + + lines.length = 0; + for( ; pos < buf.length; ++pos ) + { + char c = buf[pos]; + + switch( buf[pos] ) + { + case '\r': + case '\n': + lines ~= buf[beg .. pos]; + beg = pos + 1; + if( buf[pos] == '\r' && pos < buf.length - 1 && buf[pos + 1] == '\n' ) + ++pos, ++beg; + default: + continue; + } + } + if( beg != pos ) + { + lines ~= buf[beg .. pos]; + } +} + + +char[] expandTabs( char[] str, int tabsize = 8 ) +{ + const dchar LS = '\u2028'; // UTF line separator + const dchar PS = '\u2029'; // UTF paragraph separator + + bool changes = false; + char[] result = str; + int column; + int nspaces; + + foreach( size_t i, dchar c; str ) + { + switch( c ) + { + case '\t': + nspaces = tabsize - (column % tabsize); + if( !changes ) + { + changes = true; + result = null; + result.length = str.length + nspaces - 1; + result.length = i + nspaces; + result[0 .. i] = str[0 .. i]; + result[i .. i + nspaces] = ' '; + } + else + { int j = result.length; + result.length = j + nspaces; + result[j .. j + nspaces] = ' '; + } + column += nspaces; + break; + + case '\r': + case '\n': + case PS: + case LS: + column = 0; + goto L1; + + default: + column++; + L1: + if (changes) + { + if (c <= 0x7F) + result ~= c; + else + encode(result, c); + } + break; + } + } + return result; +} diff --git a/druntime/src/compiler/dmd/critical.c b/druntime/src/compiler/dmd/critical.c new file mode 100644 index 00000000..5e906ea3 --- /dev/null +++ b/druntime/src/compiler/dmd/critical.c @@ -0,0 +1,173 @@ +/** + * Implementation of support routines for synchronized blocks. + * + * Copyright: Copyright Digital Mars 2000 - 2009. + * License: next) + { + pthread_mutex_lock(&critical_section.cs); + if (!dcs->next) // if, in the meantime, another thread didn't set it + { + dcs->next = dcs_list; + dcs_list = dcs; + pthread_mutex_init(&dcs->cs, &_criticals_attr); + } + pthread_mutex_unlock(&critical_section.cs); + } + pthread_mutex_lock(&dcs->cs); +} + +void _d_criticalexit(D_CRITICAL_SECTION *dcs) +{ + //printf("_d_criticalexit(dcs = x%x)\n", dcs); + pthread_mutex_unlock(&dcs->cs); +} + +void _STI_critical_init() +{ + if (!dcs_list) + { //printf("_STI_critical_init()\n"); + pthread_mutexattr_init(&_criticals_attr); + pthread_mutexattr_settype(&_criticals_attr, PTHREAD_MUTEX_RECURSIVE); + + // The global critical section doesn't need to be recursive + pthread_mutex_init(&critical_section.cs, 0); + dcs_list = &critical_section; + } +} + +void _STD_critical_term() +{ + if (dcs_list) + { //printf("_STI_critical_term()\n"); + while (dcs_list) + { + //printf("\tlooping... %x\n", dcs_list); + pthread_mutex_destroy(&dcs_list->cs); + dcs_list = dcs_list->next; + } + } +} + +#endif + diff --git a/druntime/src/compiler/dmd/deh.c b/druntime/src/compiler/dmd/deh.c new file mode 100644 index 00000000..df466210 --- /dev/null +++ b/druntime/src/compiler/dmd/deh.c @@ -0,0 +1,724 @@ +/** + * Implementation of exception handling support routines for Windows. + * + * Copyright: Copyright Digital Mars 1999 - 2009. + * License: ExceptionFlags & EXCEPTION_UNWIND) + { + // Call all the finally blocks in this frame + _d_local_unwind(handler_table, frame, -1); + } + else + { + // Jump to catch block if matching one is found + + int ndx,prev_ndx,i; + struct DHandlerInfo *phi; + struct DCatchInfo *pci; + struct DCatchBlock *pcb; + unsigned ncatches; // number of catches in the current handler + Object *pti; + ClassInfo *ci; + + ci = NULL; // only compute it if we need it + + // walk through handler table, checking each handler + // with an index smaller than the current table_index + for (ndx = frame->table_index; ndx != -1; ndx = prev_ndx) + { + phi = &handler_table->handler_info[ndx]; + prev_ndx = phi->prev_index; + if (phi->cioffset) + { + // this is a catch handler (no finally) + pci = (struct DCatchInfo *)((char *)handler_table + phi->cioffset); + ncatches = pci->ncatches; + for (i = 0; i < ncatches; i++) + { + pcb = &pci->catch_block[i]; + + if (!ci) + { + // This code must match the translation code + if (exception_record->ExceptionCode == STATUS_DIGITAL_MARS_D_EXCEPTION) + { + //printf("ei[0] = %p\n", exception_record->ExceptionInformation[0]); + ci = **(ClassInfo ***)(exception_record->ExceptionInformation[0]); + } + else + ci = &_Class_9Throwable; + } + + if (_d_isbaseof(ci, pcb->type)) + { + // Matched the catch type, so we've found the handler. + int regebp; + + pti = _d_translate_se_to_d_exception(exception_record); + + // Initialize catch variable + regebp = (int)&frame->ebp; // EBP for this frame + *(void **)(regebp + (pcb->bpoffset)) = pti; + + // Have system call all finally blocks in intervening frames + _global_unwind(frame, exception_record); + + // Call all the finally blocks skipped in this frame + _d_local_unwind(handler_table, frame, ndx); + + frame->table_index = prev_ndx; // we are out of this handler + + // Jump to catch block. Does not return. + { + unsigned catch_esp; + fp_t catch_addr; + + catch_addr = (fp_t)(pcb->code); + catch_esp = regebp - handler_table->espoffset - sizeof(fp_t); + _asm + { + mov EAX,catch_esp + mov ECX,catch_addr + mov [EAX],ECX + mov EBP,regebp + mov ESP,EAX // reset stack + ret // jump to catch block + } + } + } + } + } + } + } + return ExceptionContinueSearch; +} + +/*********************************** + * Exception filter for use in __try..__except block + * surrounding call to Dmain() + */ + +int _d_exception_filter(struct _EXCEPTION_POINTERS *eptrs, + int retval, + Object **exception_object) +{ + *exception_object = _d_translate_se_to_d_exception(eptrs->ExceptionRecord); + return retval; +} + +/*********************************** + * Throw a D object. + */ + +void __stdcall _d_throw(Object *h) +{ + //printf("_d_throw(h = %p, &h = %p)\n", h, &h); + //printf("\tvptr = %p\n", *(void **)h); + RaiseException(STATUS_DIGITAL_MARS_D_EXCEPTION, + EXCEPTION_NONCONTINUABLE, + 1, (DWORD *)&h); +} + +/*********************************** + * Create an exception object + */ + +Object *_d_create_exception_object(ClassInfo *ci, char *msg) +{ + Throwable *exc; + + exc = (Throwable *)_d_newclass(ci); + // BUG: what if _d_newclass() throws an out of memory exception? + + if (msg) + { + exc->msglen = strlen(msg); + exc->msg = msg; + } + return (Object *)exc; +} + +/*********************************** + * Converts a Windows Structured Exception code to a D Exception Object. + */ + +Object *_d_translate_se_to_d_exception(EXCEPTION_RECORD *exception_record) +{ + Object *pti; + + switch (exception_record->ExceptionCode) { + case STATUS_DIGITAL_MARS_D_EXCEPTION: + // Generated D exception + pti = (Object *)(exception_record->ExceptionInformation[0]); + break; + + case STATUS_INTEGER_DIVIDE_BY_ZERO: + pti = _d_create_exception_object(&_Class_5Error, "Integer Divide by Zero"); + break; + + case STATUS_FLOAT_DIVIDE_BY_ZERO: + pti = _d_create_exception_object(&_Class_5Error, "Float Divide by Zero"); + break; + + case STATUS_ACCESS_VIOLATION: + pti = _d_create_exception_object(&_Class_5Error, "Access Violation"); + break; + + case STATUS_STACK_OVERFLOW: + pti = _d_create_exception_object(&_Class_5Error, "Stack Overflow"); + break; + + case STATUS_DATATYPE_MISALIGNMENT: + pti = _d_create_exception_object(&_Class_5Error, "Datatype Misalignment"); + break; + + case STATUS_ARRAY_BOUNDS_EXCEEDED: + pti = _d_create_exception_object(&_Class_5Error, "Array Bounds Exceeded"); + break; + + case STATUS_FLOAT_INVALID_OPERATION: + pti = _d_create_exception_object(&_Class_5Error, "Invalid Floating Point Operation"); + break; + + case STATUS_FLOAT_DENORMAL_OPERAND: + pti = _d_create_exception_object(&_Class_5Error, "Floating Point Denormal Operand"); + break; + + case STATUS_FLOAT_INEXACT_RESULT: + pti = _d_create_exception_object(&_Class_5Error, "Floating Point Inexact Result"); + break; + + case STATUS_FLOAT_OVERFLOW: + pti = _d_create_exception_object(&_Class_5Error, "Floating Point Overflow"); + break; + + case STATUS_FLOAT_UNDERFLOW: + pti = _d_create_exception_object(&_Class_5Error, "Floating Point Underflow"); + break; + + case STATUS_FLOAT_STACK_CHECK: + pti = _d_create_exception_object(&_Class_5Error, "Floating Point Stack Check"); + break; + + case STATUS_PRIVILEGED_INSTRUCTION: + pti = _d_create_exception_object(&_Class_5Error, "Privileged Instruction"); + break; + + case STATUS_ILLEGAL_INSTRUCTION: + pti = _d_create_exception_object(&_Class_5Error, "Illegal Instruction"); + break; + + case STATUS_BREAKPOINT: + pti = _d_create_exception_object(&_Class_5Error, "Breakpoint"); + break; + + case STATUS_IN_PAGE_ERROR: + pti = _d_create_exception_object(&_Class_5Error, "Win32 In Page Exception"); + break; +/* + case STATUS_INTEGER_OVERFLOW: // not supported on any x86 processor + case STATUS_INVALID_DISPOSITION: + case STATUS_NONCONTINUABLE_EXCEPTION: + case STATUS_SINGLE_STEP: + case DBG_CONTROL_C: // only when a debugger is attached + // In DMC, but not in Microsoft docs + case STATUS_GUARD_PAGE_VIOLATION: + case STATUS_INVALID_HANDLE: +*/ + // convert all other exception codes into a Win32Exception + default: + pti = _d_create_exception_object(&_Class_5Error, "Win32 Exception"); + break; + } + + return pti; +} + +/************************************** + * Call finally blocks in the current stack frame until stop_index. + * This is roughly equivalent to _local_unwind() for C in \src\win32\ehsup.c + */ + +void __cdecl _d_local_unwind(struct DHandlerTable *handler_table, + struct DEstablisherFrame *frame, int stop_index) +{ + struct DHandlerInfo *phi; + struct DCatchInfo *pci; + int i; + + // Set up a special exception handler to catch double-fault exceptions. + __asm + { + push dword ptr -1 + push dword ptr 0 + push offset _local_except_handler // defined in src\win32\ehsup.c + push dword ptr fs:_except_list + mov FS:_except_list,ESP + } + + for (i = frame->table_index; i != -1 && i != stop_index; i = phi->prev_index) + { + phi = &handler_table->handler_info[i]; + if (phi->finally_code) + { + // Note that it is unnecessary to adjust the ESP, as the finally block + // accesses all items on the stack as relative to EBP. + + DWORD *catch_ebp = &frame->ebp; + void *blockaddr = phi->finally_code; + + _asm + { + push EBX + mov EBX,blockaddr + push EBP + mov EBP,catch_ebp + call EBX + pop EBP + pop EBX + } + } + } + + _asm + { + pop FS:_except_list + add ESP,12 + } +} + +/*********************************** + * external version of the unwinder + */ + +__declspec(naked) void __cdecl _d_local_unwind2() +{ + __asm + { + jmp _d_local_unwind + } +} + +/*********************************** + * The frame handler, this is called for each frame that has been registered + * in the OS except_list. + * Input: + * EAX the handler table for the frame + */ + +EXCEPTION_DISPOSITION _d_monitor_handler( + EXCEPTION_RECORD *exception_record, + struct DEstablisherFrame *frame, + CONTEXT context, + void *dispatcher_context) +{ + if (exception_record->ExceptionFlags & EXCEPTION_UNWIND) + { + _d_monitorexit((Object *)frame->table_index); + } + else + { + } + return ExceptionContinueSearch; +} + +/*********************************** + */ + +void _d_monitor_prolog(void *x, void *y, Object *h) +{ + __asm + { + push EAX + } + //printf("_d_monitor_prolog(x=%p, y=%p, h=%p)\n", x, y, h); + _d_monitorenter(h); + __asm + { + pop EAX + } +} + +/*********************************** + */ + +void _d_monitor_epilog(void *x, void *y, Object *h) +{ + //printf("_d_monitor_epilog(x=%p, y=%p, h=%p)\n", x, y, h); + __asm + { + push EAX + push EDX + } + _d_monitorexit(h); + __asm + { + pop EDX + pop EAX + } +} + +#endif + +/* ======================== linux =============================== */ + +#if linux + +#include "mars.h" + +extern ClassInfo D6object9Throwable7__ClassZ; +#define _Class_9Throwable D6object9Throwable7__ClassZ; + +extern ClassInfo D6object5Error7__ClassZ; +#define _Class_5Error D6object5Error7__ClassZ + +typedef int (*fp_t)(); // function pointer in ambient memory model + +struct DHandlerInfo +{ + unsigned offset; // offset from function address to start of guarded section + int prev_index; // previous table index + unsigned cioffset; // offset to DCatchInfo data from start of table (!=0 if try-catch) + void *finally_code; // pointer to finally code to execute + // (!=0 if try-finally) +}; + +// Address of DHandlerTable, searched for by eh_finddata() + +struct DHandlerTable +{ + void *fptr; // pointer to start of function + unsigned espoffset; // offset of ESP from EBP + unsigned retoffset; // offset from start of function to return code + unsigned nhandlers; // dimension of handler_info[] + struct DHandlerInfo handler_info[1]; +}; + +struct DCatchBlock +{ + ClassInfo *type; // catch type + unsigned bpoffset; // EBP offset of catch var + void *code; // catch handler code +}; + +// Create one of these for each try-catch +struct DCatchInfo +{ + unsigned ncatches; // number of catch blocks + struct DCatchBlock catch_block[1]; // data for each catch block +}; + +// One of these is generated for each function with try-catch or try-finally + +struct FuncTable +{ + void *fptr; // pointer to start of function + struct DHandlerTable *handlertable; // eh data for this function + unsigned size; // size of function in bytes +}; + +extern struct FuncTable *table_start; +extern struct FuncTable *table_end; + +void terminate() +{ +// _asm +// { +// hlt +// } +} + +/******************************************* + * Given address that is inside a function, + * figure out which function it is in. + * Return DHandlerTable if there is one, NULL if not. + */ + +struct DHandlerTable *__eh_finddata(void *address) +{ + struct FuncTable *ft; + + for (ft = (struct FuncTable *)table_start; + ft < (struct FuncTable *)table_end; + ft++) + { + if (ft->fptr <= address && + address < (void *)((char *)ft->fptr + ft->size)) + { + return ft->handlertable; + } + } + return NULL; +} + + +/****************************** + * Given EBP, find return address to caller, and caller's EBP. + * Input: + * regbp Value of EBP for current function + * *pretaddr Return address + * Output: + * *pretaddr return address to caller + * Returns: + * caller's EBP + */ + +unsigned __eh_find_caller(unsigned regbp, unsigned *pretaddr) +{ + unsigned bp = *(unsigned *)regbp; + + if (bp) // if not end of call chain + { + // Perform sanity checks on new EBP. + // If it is screwed up, terminate() hopefully before we do more damage. + if (bp <= regbp) + // stack should grow to smaller values + terminate(); + + *pretaddr = *(unsigned *)(regbp + sizeof(int)); + } + return bp; +} + +/*********************************** + * Throw a D object. + */ + +void __stdcall _d_throw(Object *h) +{ + unsigned regebp; + + //printf("_d_throw(h = %p, &h = %p)\n", h, &h); + //printf("\tvptr = %p\n", *(void **)h); + + regebp = _EBP; + + while (1) // for each function on the stack + { + struct DHandlerTable *handler_table; + struct FuncTable *pfunc; + struct DHandlerInfo *phi; + unsigned retaddr; + unsigned funcoffset; + unsigned spoff; + unsigned retoffset; + int index; + int dim; + int ndx; + int prev_ndx; + + regebp = __eh_find_caller(regebp,&retaddr); + if (!regebp) + // if end of call chain + break; + + handler_table = __eh_finddata((void *)retaddr); // find static data associated with function + if (!handler_table) // if no static data + { + continue; + } + funcoffset = (unsigned)handler_table->fptr; + spoff = handler_table->espoffset; + retoffset = handler_table->retoffset; + +#ifdef DEBUG + printf("retaddr = x%x\n",(unsigned)retaddr); + printf("regebp=x%04x, funcoffset=x%04x, spoff=x%x, retoffset=x%x\n", + regebp,funcoffset,spoff,retoffset); +#endif + + // Find start index for retaddr in static data + dim = handler_table->nhandlers; + index = -1; + for (int i = 0; i < dim; i++) + { + phi = &handler_table->handler_info[i]; + + if ((unsigned)retaddr >= funcoffset + phi->offset) + index = i; + } + + // walk through handler table, checking each handler + // with an index smaller than the current table_index + for (ndx = index; ndx != -1; ndx = prev_ndx) + { + phi = &handler_table->handler_info[ndx]; + prev_ndx = phi->prev_index; + if (phi->cioffset) + { + // this is a catch handler (no finally) + struct DCatchInfo *pci; + int ncatches; + int i; + + pci = (struct DCatchInfo *)((char *)handler_table + phi->cioffset); + ncatches = pci->ncatches; + for (i = 0; i < ncatches; i++) + { + struct DCatchBlock *pcb; + ClassInfo *ci = **(ClassInfo ***)h; + + pcb = &pci->catch_block[i]; + + if (_d_isbaseof(ci, pcb->type)) + { // Matched the catch type, so we've found the handler. + + // Initialize catch variable + *(void **)(regebp + (pcb->bpoffset)) = h; + + // Jump to catch block. Does not return. + { + unsigned catch_esp; + fp_t catch_addr; + + catch_addr = (fp_t)(pcb->code); + catch_esp = regebp - handler_table->espoffset - sizeof(fp_t); + _asm + { + mov EAX,catch_esp + mov ECX,catch_addr + mov [EAX],ECX + mov EBP,regebp + mov ESP,EAX // reset stack + ret // jump to catch block + } + } + } + } + } + else if (phi->finally_code) + { // Call finally block + // Note that it is unnecessary to adjust the ESP, as the finally block + // accesses all items on the stack as relative to EBP. + + void *blockaddr = phi->finally_code; + + _asm + { + push EBX + mov EBX,blockaddr + push EBP + mov EBP,regebp + call EBX + pop EBP + pop EBX + } + } + } + } +} + + +#endif diff --git a/druntime/src/compiler/dmd/deh2.d b/druntime/src/compiler/dmd/deh2.d new file mode 100644 index 00000000..696538ea --- /dev/null +++ b/druntime/src/compiler/dmd/deh2.d @@ -0,0 +1,316 @@ +/** + * Implementation of exception handling support routines for Posix. + * + * Copyright: Copyright Digital Mars 2000 - 2009. + * License: funcoffset + phi.offset && + cast(uint)retaddr <= funcoffset + phi.endoffset) + index = i; + } + debug printf("index = %d\n", index); + + // walk through handler table, checking each handler + // with an index smaller than the current table_index + for (ndx = index; ndx != -1; ndx = prev_ndx) + { + phi = &handler_table.handler_info[ndx]; + prev_ndx = phi.prev_index; + if (phi.cioffset) + { + // this is a catch handler (no finally) + DCatchInfo *pci; + int ncatches; + int i; + + pci = cast(DCatchInfo *)(cast(char *)handler_table + phi.cioffset); + ncatches = pci.ncatches; + for (i = 0; i < ncatches; i++) + { + DCatchBlock *pcb; + ClassInfo ci = **cast(ClassInfo **)h; + + pcb = &pci.catch_block[i]; + + if (_d_isbaseof(ci, pcb.type)) + { // Matched the catch type, so we've found the handler. + + // Initialize catch variable + *cast(void **)(regebp + (pcb.bpoffset)) = h; + + // Jump to catch block. Does not return. + { + uint catch_esp; + fp_t catch_addr; + + catch_addr = cast(fp_t)(pcb.code); + catch_esp = regebp - handler_table.espoffset - fp_t.sizeof; + asm + { + mov EAX,catch_esp ; + mov ECX,catch_addr ; + mov [EAX],ECX ; + mov EBP,regebp ; + mov ESP,EAX ; // reset stack + ret ; // jump to catch block + } + } + } + } + } + else if (phi.finally_code) + { // Call finally block + // Note that it is unnecessary to adjust the ESP, as the finally block + // accesses all items on the stack as relative to EBP. + + void *blockaddr = phi.finally_code; + + version (OSX) + { + asm + { + sub ESP,4 ; // align stack to 16 + push EBX ; + mov EBX,blockaddr ; + push EBP ; + mov EBP,regebp ; + call EBX ; + pop EBP ; + pop EBX ; + add ESP,4 ; + } + } + else + { + asm + { + push EBX ; + mov EBX,blockaddr ; + push EBP ; + mov EBP,regebp ; + call EBX ; + pop EBP ; + pop EBX ; + } + } + } + } + } +} diff --git a/druntime/src/compiler/dmd/dmain2.BAK b/druntime/src/compiler/dmd/dmain2.BAK new file mode 100644 index 00000000..92017448 --- /dev/null +++ b/druntime/src/compiler/dmd/dmain2.BAK @@ -0,0 +1,374 @@ +/* + * Placed into the Public Domain. + * written by Walter Bright + * www.digitalmars.com + */ + +/* + * Modified by Sean Kelly for use with the D Runtime Project + */ + +module rt.dmain2; + +private +{ + import memory; + import util.console; + import core.stdc.stddef; + import core.stdc.stdlib; + import core.stdc.string; +} + +version (Windows) +{ + extern (Windows) alias int function() FARPROC; + extern (Windows) FARPROC GetProcAddress(void*, in char*); + extern (Windows) void* LoadLibraryA(in char*); + extern (Windows) int FreeLibrary(void*); + extern (Windows) void* LocalFree(void*); + extern (Windows) wchar_t* GetCommandLineW(); + extern (Windows) wchar_t** CommandLineToArgvW(wchar_t*, int*); + extern (Windows) export int WideCharToMultiByte(uint, uint, wchar_t*, int, char*, int, char*, int); + pragma(lib, "shell32.lib"); // needed for CommandLineToArgvW +} + +extern (C) void _STI_monitor_staticctor(); +extern (C) void _STD_monitor_staticdtor(); +extern (C) void _STI_critical_init(); +extern (C) void _STD_critical_term(); +extern (C) void gc_init(); +extern (C) void gc_term(); +extern (C) void _minit(); +extern (C) void _moduleCtor(); +extern (C) void _moduleDtor(); +extern (C) void thread_joinAll(); + +/*********************************** + * These are a temporary means of providing a GC hook for DLL use. They may be + * replaced with some other similar functionality later. + */ +extern (C) +{ + void* gc_getProxy(); + void gc_setProxy(void* p); + void gc_clrProxy(); + + alias void* function() gcGetFn; + alias void function(void*) gcSetFn; + alias void function() gcClrFn; +} + +extern (C) void* rt_loadLibrary(in char[] name) +{ + version (Windows) + { + char[260] temp = void; + temp[0 .. name.length] = name[]; + temp[name.length] = cast(char) 0; + void* ptr = LoadLibraryA(temp.ptr); + if (ptr is null) + return ptr; + gcSetFn gcSet = cast(gcSetFn) GetProcAddress(ptr, "gc_setProxy"); + if (gcSet !is null) + gcSet(gc_getProxy()); + return ptr; + + } + else version (linux) + { + throw new Exception("rt_loadLibrary not yet implemented on linux."); + } +} + +extern (C) bool rt_unloadLibrary(void* ptr) +{ + version (Windows) + { + gcClrFn gcClr = cast(gcClrFn) GetProcAddress(ptr, "gc_clrProxy"); + if (gcClr !is null) + gcClr(); + return FreeLibrary(ptr) != 0; + } + else version (linux) + { + throw new Exception("rt_unloadLibrary not yet implemented on linux."); + } +} + +/*********************************** + * These functions must be defined for any D program linked + * against this library. + */ +extern (C) void onAssertError(string file, size_t line); +extern (C) void onAssertErrorMsg(string file, size_t line, string msg); +extern (C) void onRangeError(string file, size_t line); +extern (C) void onHiddenFuncError(Object o); +extern (C) void onSwitchError(string file, size_t line); +extern (C) bool runModuleUnitTests(); + +// this function is called from the utf module +//extern (C) void onUnicodeError(string msg, size_t idx); + +/*********************************** + * These are internal callbacks for various language errors. + */ +extern (C) void _d_assert(string file, uint line) +{ + onAssertError(file, line); +} + +extern (C) static void _d_assert_msg(string msg, string file, uint line) +{ + onAssertErrorMsg(file, line, msg); +} + +extern (C) void _d_array_bounds(string file, uint line) +{ + onRangeError(file, line); +} + +extern (C) void _d_switch_error(string file, uint line) +{ + onSwitchError(file, line); +} + +extern (C) void _d_hidden_func() +{ + Object o; + asm + { + mov o, EAX; + } + onHiddenFuncError(o); +} + +bool _d_isHalting = false; + +extern (C) bool rt_isHalting() +{ + return _d_isHalting; +} + +extern (C) bool rt_trapExceptions = true; + +void _d_criticalInit() +{ + version (linux) + { + _STI_monitor_staticctor(); + _STI_critical_init(); + } +} + +alias void delegate(Throwable) ExceptionHandler; + +extern (C) bool rt_init(ExceptionHandler dg = null) +{ + _d_criticalInit(); + + try + { + gc_init(); + initStaticDataGC(); + version (Windows) + _minit(); + _moduleCtor(); + return true; + } + catch (Throwable e) + { + if (dg) + dg(e); + } + catch + { + + } + _d_criticalTerm(); + return false; +} + +void _d_criticalTerm() +{ + version (linux) + { + _STD_critical_term(); + _STD_monitor_staticdtor(); + } +} + +extern (C) bool rt_term(ExceptionHandler dg = null) +{ + try + { + thread_joinAll(); + _d_isHalting = true; + _moduleDtor(); + gc_term(); + return true; + } + catch (Throwable e) + { + if (dg) + dg(e); + } + catch + { + + } + finally + { + _d_criticalTerm(); + } + return false; +} + +/*********************************** + * The D main() function supplied by the user's program + */ +int main(char[][] args); + +/*********************************** + * Substitutes for the C main() function. + * It's purpose is to wrap the call to the D main() + * function and catch any unhandled exceptions. + */ + +extern (C) int main(int argc, char **argv) +{ + char[][] args; + int result; + + version (linux) + { + _STI_monitor_staticctor(); + _STI_critical_init(); + } + + version (Windows) + { + wchar_t* wcbuf = GetCommandLineW(); + size_t wclen = wcslen(wcbuf); + int wargc = 0; + wchar_t** wargs = CommandLineToArgvW(wcbuf, &wargc); + assert(wargc == argc); + + char* cargp = null; + size_t cargl = WideCharToMultiByte(65001, 0, wcbuf, wclen, null, 0, null, 0); + + cargp = cast(char*) alloca(cargl); + args = ((cast(char[]*) alloca(wargc * (char[]).sizeof)))[0 .. wargc]; + + for (size_t i = 0, p = 0; i < wargc; i++) + { + int wlen = wcslen(wargs[i]); + int clen = WideCharToMultiByte(65001, 0, &wargs[i][0], wlen, null, 0, null, 0); + args[i] = cargp[p .. p+clen]; + p += clen; assert(p <= cargl); + WideCharToMultiByte(65001, 0, &wargs[i][0], wlen, &args[i][0], clen, null, 0); + } + LocalFree(wargs); + wargs = null; + wargc = 0; + } + else version (linux) + { + char[]* am = cast(char[]*) malloc(argc * (char[]).sizeof); + scope(exit) free(am); + + for (size_t i = 0; i < argc; i++) + { + auto len = strlen(argv[i]); + am[i] = argv[i][0 .. len]; + } + args = am[0 .. argc]; + } + + bool trapExceptions = rt_trapExceptions; + + void tryExec(void delegate() dg) + { + + if (trapExceptions) + { + try + { + dg(); + } + catch (Throwable e) + { + while (e) + { + if (e.file) + { + // fprintf(stderr, "%.*s(%u): %.*s\n", e.file, e.line, e.msg); + console (e.classinfo.name)("@")(e.file)("(")(e.line)("): ")(e.msg)("\n"); + } + else + { + // fprintf(stderr, "%.*s\n", e.toString()); + console (e.toString)("\n"); + } + if (e.info) + { + console ("----------------\n"); + foreach (t; e.info) + console (t)("\n"); + } + if (e.next) + console ("\n"); + e = e.next; + } + result = EXIT_FAILURE; + } + catch (Object o) + { + // fprintf(stderr, "%.*s\n", o.toString()); + console (o.toString)("\n"); + result = EXIT_FAILURE; + } + } + else + { + dg(); + } + } + + // NOTE: The lifetime of a process is much like the lifetime of an object: + // it is initialized, then used, then destroyed. If initialization + // fails, the successive two steps are never reached. However, if + // initialization succeeds, then cleanup will occur even if the use + // step fails in some way. Here, the use phase consists of running + // the user's main function. If main terminates with an exception, + // the exception is handled and then cleanup begins. An exception + // thrown during cleanup, however, will abort the cleanup process. + + void runMain() + { + result = main(args); + } + + void runAll() + { + gc_init(); + initStaticDataGC(); + version (Windows) + _minit(); + _moduleCtor(); + if (runModuleUnitTests()) + tryExec(&runMain); + thread_joinAll(); + _d_isHalting = true; + _moduleDtor(); + gc_term(); + } + + tryExec(&runAll); + + version (linux) + { + _STD_critical_term(); + _STD_monitor_staticdtor(); + } + return result; +} diff --git a/druntime/src/compiler/dmd/dmain2.d b/druntime/src/compiler/dmd/dmain2.d new file mode 100644 index 00000000..01724839 --- /dev/null +++ b/druntime/src/compiler/dmd/dmain2.d @@ -0,0 +1,394 @@ +/** + * Contains main program entry point and support routines. + * + * Copyright: Copyright Digital Mars 2000 - 2009. + * License: p.length) + { + size_t size = p.length * sizeelem; + auto info = gc_query(p.data); + + if (info.size <= newsize || info.base != p.data) + { + if (info.size >= PAGESIZE && info.base == p.data) + { // Try to extend in-place + auto u = gc_extend(p.data, (newsize + 1) - info.size, (newsize + 1) - info.size); + if (u) + { + goto L1; + } + } + newdata = cast(byte *)gc_malloc(newsize + 1, info.attr); + newdata[0 .. size] = p.data[0 .. size]; + } + L1: + newdata[size .. newsize] = 0; + } + } + else + { + newdata = cast(byte *)gc_calloc(newsize + 1, !(ti.next.flags() & 1) ? BlkAttr.NO_SCAN : 0); + } + } + else + { + newdata = p.data; + } + + p.data = newdata; + p.length = newlength; + return newdata[0 .. newlength]; + +Loverflow: + onOutOfMemoryError(); +} + + +/** + * Resize arrays for non-zero initializers. + * p pointer to array lvalue to be updated + * newlength new .length property of array + * sizeelem size of each element of array + * initsize size of initializer + * ... initializer + */ +extern (C) byte[] _d_arraysetlengthiT(TypeInfo ti, size_t newlength, Array *p) +in +{ + assert(!p.length || p.data); +} +body +{ + byte* newdata; + size_t sizeelem = ti.next.tsize(); + void[] initializer = ti.next.init(); + size_t initsize = initializer.length; + + assert(sizeelem); + assert(initsize); + assert(initsize <= sizeelem); + assert((sizeelem / initsize) * initsize == sizeelem); + + debug(PRINTF) + { + printf("_d_arraysetlengthiT(p = %p, sizeelem = %d, newlength = %d, initsize = %d)\n", p, sizeelem, newlength, initsize); + if (p) + printf("\tp.data = %p, p.length = %d\n", p.data, p.length); + } + + if (newlength) + { + version (D_InlineAsm_X86) + { + size_t newsize = void; + + asm + { + mov EAX,newlength ; + mul EAX,sizeelem ; + mov newsize,EAX ; + jc Loverflow ; + } + } + else + { + size_t newsize = sizeelem * newlength; + + if (newsize / newlength != sizeelem) + goto Loverflow; + } + debug(PRINTF) printf("newsize = %x, newlength = %x\n", newsize, newlength); + + size_t size = p.length * sizeelem; + + if (p.data) + { + newdata = p.data; + if (newlength > p.length) + { + auto info = gc_query(p.data); + + if (info.size <= newsize || info.base != p.data) + { + if (info.size >= PAGESIZE && info.base == p.data) + { // Try to extend in-place + auto u = gc_extend(p.data, (newsize + 1) - info.size, (newsize + 1) - info.size); + if (u) + { + goto L1; + } + } + newdata = cast(byte *)gc_malloc(newsize + 1, info.attr); + newdata[0 .. size] = p.data[0 .. size]; + L1: ; + } + } + } + else + { + newdata = cast(byte *)gc_malloc(newsize + 1, !(ti.next.flags() & 1) ? BlkAttr.NO_SCAN : 0); + } + + auto q = initializer.ptr; // pointer to initializer + + if (newsize > size) + { + if (initsize == 1) + { + debug(PRINTF) printf("newdata = %p, size = %d, newsize = %d, *q = %d\n", newdata, size, newsize, *cast(byte*)q); + newdata[size .. newsize] = *(cast(byte*)q); + } + else + { + for (size_t u = size; u < newsize; u += initsize) + { + memcpy(newdata + u, q, initsize); + } + } + } + } + else + { + newdata = p.data; + } + + p.data = newdata; + p.length = newlength; + return newdata[0 .. newlength]; + +Loverflow: + onOutOfMemoryError(); +} + + +/** + * Append y[] to array x[]. + * size is size of each array element. + */ +extern (C) long _d_arrayappendT(TypeInfo ti, Array *px, byte[] y) +{ + auto sizeelem = ti.next.tsize(); // array element size + auto info = gc_query(px.data); + auto length = px.length; + auto newlength = length + y.length; + auto newsize = newlength * sizeelem; + + if (info.size < newsize || info.base != px.data) + { byte* newdata; + + if (info.size >= PAGESIZE && info.base == px.data) + { // Try to extend in-place + auto u = gc_extend(px.data, (newsize + 1) - info.size, (newsize + 1) - info.size); + if (u) + { + goto L1; + } + } + newdata = cast(byte *)gc_malloc(newCapacity(newlength, sizeelem) + 1, info.attr); + memcpy(newdata, px.data, length * sizeelem); + px.data = newdata; + } + L1: + px.length = newlength; + memcpy(px.data + length * sizeelem, y.ptr, y.length * sizeelem); + return *cast(long*)px; +} + + +/** + * + */ +size_t newCapacity(size_t newlength, size_t size) +{ + version(none) + { + size_t newcap = newlength * size; + } + else + { + /* + * Better version by Dave Fladebo: + * This uses an inverse logorithmic algorithm to pre-allocate a bit more + * space for larger arrays. + * - Arrays smaller than PAGESIZE bytes are left as-is, so for the most + * common cases, memory allocation is 1 to 1. The small overhead added + * doesn't affect small array perf. (it's virtually the same as + * current). + * - Larger arrays have some space pre-allocated. + * - As the arrays grow, the relative pre-allocated space shrinks. + * - The logorithmic algorithm allocates relatively more space for + * mid-size arrays, making it very fast for medium arrays (for + * mid-to-large arrays, this turns out to be quite a bit faster than the + * equivalent realloc() code in C, on Linux at least. Small arrays are + * just as fast as GCC). + * - Perhaps most importantly, overall memory usage and stress on the GC + * is decreased significantly for demanding environments. + */ + size_t newcap = newlength * size; + size_t newext = 0; + + if (newcap > PAGESIZE) + { + //double mult2 = 1.0 + (size / log10(pow(newcap * 2.0,2.0))); + + // redo above line using only integer math + + static int log2plus1(size_t c) + { int i; + + if (c == 0) + i = -1; + else + for (i = 1; c >>= 1; i++) + { + } + return i; + } + + /* The following setting for mult sets how much bigger + * the new size will be over what is actually needed. + * 100 means the same size, more means proportionally more. + * More means faster but more memory consumption. + */ + //long mult = 100 + (1000L * size) / (6 * log2plus1(newcap)); + long mult = 100 + (1000L * size) / log2plus1(newcap); + + // testing shows 1.02 for large arrays is about the point of diminishing return + if (mult < 102) + mult = 102; + newext = cast(size_t)((newcap * mult) / 100); + newext -= newext % size; + debug(PRINTF) printf("mult: %2.2f, alloc: %2.2f\n",mult/100.0,newext / cast(double)size); + } + newcap = newext > newcap ? newext : newcap; + debug(PRINTF) printf("newcap = %d, newlength = %d, size = %d\n", newcap, newlength, size); + } + return newcap; +} + + +/** + * + */ +extern (C) byte[] _d_arrayappendcT(TypeInfo ti, inout byte[] x, ...) +{ + auto sizeelem = ti.next.tsize(); // array element size + auto info = gc_query(x.ptr); + auto length = x.length; + auto newlength = length + 1; + auto newsize = newlength * sizeelem; + + assert(info.size == 0 || length * sizeelem <= info.size); + + debug(PRINTF) printf("_d_arrayappendcT(sizeelem = %d, ptr = %p, length = %d, cap = %d)\n", sizeelem, x.ptr, x.length, info.size); + + if (info.size <= newsize || info.base != x.ptr) + { byte* newdata; + + if (info.size >= PAGESIZE && info.base == x.ptr) + { // Try to extend in-place + auto u = gc_extend(x.ptr, (newsize + 1) - info.size, (newsize + 1) - info.size); + if (u) + { + goto L1; + } + } + debug(PRINTF) printf("_d_arrayappendcT(length = %d, newlength = %d, cap = %d)\n", length, newlength, info.size); + auto newcap = newCapacity(newlength, sizeelem); + assert(newcap >= newlength * sizeelem); + newdata = cast(byte *)gc_malloc(newcap + 1, info.attr); + memcpy(newdata, x.ptr, length * sizeelem); + (cast(void**)(&x))[1] = newdata; + } + L1: + byte *argp = cast(byte *)(&ti + 2); + + *cast(size_t *)&x = newlength; + x.ptr[length * sizeelem .. newsize] = argp[0 .. sizeelem]; + assert((cast(size_t)x.ptr & 15) == 0); + assert(gc_sizeOf(x.ptr) > x.length * sizeelem); + return x; +} + + +/** + * + */ +extern (C) byte[] _d_arraycatT(TypeInfo ti, byte[] x, byte[] y) +out (result) +{ + auto sizeelem = ti.next.tsize(); // array element size + debug(PRINTF) printf("_d_arraycatT(%d,%p ~ %d,%p sizeelem = %d => %d,%p)\n", x.length, x.ptr, y.length, y.ptr, sizeelem, result.length, result.ptr); + assert(result.length == x.length + y.length); + for (size_t i = 0; i < x.length * sizeelem; i++) + assert((cast(byte*)result)[i] == (cast(byte*)x)[i]); + for (size_t i = 0; i < y.length * sizeelem; i++) + assert((cast(byte*)result)[x.length * sizeelem + i] == (cast(byte*)y)[i]); + + size_t cap = gc_sizeOf(result.ptr); + assert(!cap || cap > result.length * sizeelem); +} +body +{ + version (none) + { + /* Cannot use this optimization because: + * char[] a, b; + * char c = 'a'; + * b = a ~ c; + * c = 'b'; + * will change the contents of b. + */ + if (!y.length) + return x; + if (!x.length) + return y; + } + + debug(PRINTF) printf("_d_arraycatT(%d,%p ~ %d,%p)\n", x.length, x.ptr, y.length, y.ptr); + auto sizeelem = ti.next.tsize(); // array element size + debug(PRINTF) printf("_d_arraycatT(%d,%p ~ %d,%p sizeelem = %d)\n", x.length, x.ptr, y.length, y.ptr, sizeelem); + size_t xlen = x.length * sizeelem; + size_t ylen = y.length * sizeelem; + size_t len = xlen + ylen; + + if (!len) + return null; + + byte* p = cast(byte*)gc_malloc(len + 1, !(ti.next.flags() & 1) ? BlkAttr.NO_SCAN : 0); + memcpy(p, x.ptr, xlen); + memcpy(p + xlen, y.ptr, ylen); + p[len] = 0; + return p[0 .. x.length + y.length]; +} + + +/** + * + */ +extern (C) byte[] _d_arraycatnT(TypeInfo ti, uint n, ...) +{ void* a; + size_t length; + byte[]* p; + uint i; + byte[] b; + auto size = ti.next.tsize(); // array element size + + p = cast(byte[]*)(&n + 1); + + for (i = 0; i < n; i++) + { + b = *p++; + length += b.length; + } + if (!length) + return null; + + a = gc_malloc(length * size, !(ti.next.flags() & 1) ? BlkAttr.NO_SCAN : 0); + p = cast(byte[]*)(&n + 1); + + uint j = 0; + for (i = 0; i < n; i++) + { + b = *p++; + if (b.length) + { + memcpy(a + j, b.ptr, b.length * size); + j += b.length * size; + } + } + + byte[] result; + *cast(int *)&result = length; // jam length + (cast(void **)&result)[1] = a; // jam ptr + return result; +} + + +/** + * + */ +extern (C) void* _d_arrayliteralT(TypeInfo ti, size_t length, ...) +{ + auto sizeelem = ti.next.tsize(); // array element size + void* result; + + debug(PRINTF) printf("_d_arrayliteralT(sizeelem = %d, length = %d)\n", sizeelem, length); + if (length == 0 || sizeelem == 0) + result = null; + else + { + result = gc_malloc(length * sizeelem, !(ti.next.flags() & 1) ? BlkAttr.NO_SCAN : 0); + + va_list q; + va_start!(size_t)(q, length); + + size_t stacksize = (sizeelem + int.sizeof - 1) & ~(int.sizeof - 1); + + if (stacksize == sizeelem) + { + memcpy(result, q, length * sizeelem); + } + else + { + for (size_t i = 0; i < length; i++) + { + memcpy(result + i * sizeelem, q, sizeelem); + q += stacksize; + } + } + + va_end(q); + } + return result; +} + + +/** + * Support for array.dup property. + */ +struct Array2 +{ + size_t length; + void* ptr; +} + + +/** + * + */ +extern (C) long _adDupT(TypeInfo ti, Array2 a) +out (result) +{ + auto sizeelem = ti.next.tsize(); // array element size + assert(memcmp((*cast(Array2*)&result).ptr, a.ptr, a.length * sizeelem) == 0); +} +body +{ + Array2 r; + + if (a.length) + { + auto sizeelem = ti.next.tsize(); // array element size + auto size = a.length * sizeelem; + r.ptr = gc_malloc(size, !(ti.next.flags() & 1) ? BlkAttr.NO_SCAN : 0); + r.length = a.length; + memcpy(r.ptr, a.ptr, size); + } + return *cast(long*)(&r); +} + + +unittest +{ + int[] a; + int[] b; + int i; + + a = new int[3]; + a[0] = 1; a[1] = 2; a[2] = 3; + b = a.dup; + assert(b.length == 3); + for (i = 0; i < 3; i++) + assert(b[i] == i + 1); +} diff --git a/druntime/src/compiler/dmd/llmath.d b/druntime/src/compiler/dmd/llmath.d new file mode 100644 index 00000000..a776a84c --- /dev/null +++ b/druntime/src/compiler/dmd/llmath.d @@ -0,0 +1,531 @@ +/** + * Support for 64-bit longs. + * + * Copyright: Copyright Digital Mars 1993 - 2009. + * License: Boost License 1.0. + * Authors: Walter Bright, Sean Kelly + * + * Copyright Digital Mars 2000 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +#include