From ca3cd82805abe24cd7659e94d62b37e9ef34a24a Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Fri, 31 May 2013 21:23:39 +0200 Subject: [PATCH] Removed obsolete html.{h, c}. Gone from upstream in 2.062 as well. --- dmd2/html.c | 771 ---------------------------------------------------- dmd2/html.h | 51 ---- 2 files changed, 822 deletions(-) delete mode 100644 dmd2/html.c delete mode 100644 dmd2/html.h diff --git a/dmd2/html.c b/dmd2/html.c deleted file mode 100644 index 81d15458..00000000 --- a/dmd2/html.c +++ /dev/null @@ -1,771 +0,0 @@ - -// Copyright (c) 1999-2009 by Digital Mars -// All Rights Reserved -// written by Walter Bright -// http://www.digitalmars.com -// License for redistribution is by either the Artistic License -// in artistic.txt, or the GNU General Public License in gpl.txt. -// See the included readme.txt for details. - - -/* HTML parser - */ - -#include -#include -#include -#include -#include -#include - -#include "html.h" - -#if MARS -#include -#include "root.h" -//#include "../mars/mars.h" -#else -#include "outbuf.h" -#include "msgs2.h" - -extern void html_err(const char *, unsigned, unsigned, ...); - -static char __file__[] = __FILE__; /* for tassert.h */ -#include "tassert.h" -#endif - -#if __GNUC__ -int memicmp(const char *s1, const char *s2, int n); -#if 0 -{ - int result = 0; - - for (int i = 0; i < n; i++) - { char c1 = s1[i]; - char c2 = s2[i]; - - result = c1 - c2; - if (result) - { - if ('A' <= c1 && c1 <= 'Z') - c1 += 'a' - 'A'; - if ('A' <= c2 && c2 <= 'Z') - c2 += 'a' - 'A'; - result = c1 - c2; - if (result) - break; - } - } - return result; -} -#endif -#endif - -extern int HtmlNamedEntity(unsigned char *p, int length); - -static int isLineSeparator(const unsigned char* p); - -/********************************** - * Determine if beginning of tag identifier - * or a continuation of a tag identifier. - */ - -inline int istagstart(int c) -{ - return (isalpha(c) || c == '_'); -} - -inline int istag(int c) -{ - return (isalnum(c) || c == '_'); -} - -/********************************************** - */ - -Html::Html(const char *sourcename, unsigned char *base, unsigned length) -{ - //printf("Html::Html()\n"); - this->sourcename = sourcename; - this->base = base; - p = base; - end = base + length; - linnum = 1; - dbuf = NULL; - inCode = 0; -} - -/********************************************** - * Print error & quit. - */ - -void Html::error(const char *format, ...) -{ - fprintf(stderr, "%s(%d) : HTML Error: ", sourcename, linnum); - - va_list ap; - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - - fprintf(stderr, "\n"); - fflush(stderr); - -//#if MARS -// global.errors++; -//#else - exit(EXIT_FAILURE); -//#endif -} - -/********************************************** - * Extract all the code from an HTML file, - * concatenate it all together, and store in buf. - */ - -#if MARS -void Html::extractCode(OutBuffer *buf) -#else -void Html::extractCode(Outbuffer *buf) -#endif -{ - //printf("Html::extractCode()\n"); - dbuf = buf; // save for other routines - buf->reserve(end - p); - inCode = 0; - while (1) - { - //printf("p = %p, *p = x%x\n", p, *p); - switch (*p) - { -#if 0 // strings are not recognized outside of tags - case '"': - case '\'': - skipString(); - continue; -#endif - case '<': - if (p[1] == '!' && isCommentStart()) - { // Comments start with - * Netscape: comments nest - * w3c: whitespace can appear between -- and > of comment close - */ - -void Html::scanComment() -{ - // Most of the complexity is dealing with the case that - // an arbitrary amount of whitespace can appear between - // the -- and the > of a comment close. - int scangt = 0; - - //printf("scanComment()\n"); - if (*p == '\n') - { linnum++; - // Always extract new lines, so that D lexer counts the - // lines right. - dbuf->writeByte(*p); - } - while (1) - { - //scangt = 1; // IE 5.0 compatibility - p++; - switch (*p) - { - case '-': - if (p[1] == '-') - { - if (p[2] == '>') // optimize for most common case - { - p += 3; - break; - } - p++; - scangt = 1; - } - else - scangt = 0; - continue; - - case '>': - if (scangt) - { // found --> - p++; - break; - } - continue; - - case ' ': - case '\t': - case '\f': - case '\v': - // skip white space - continue; - - case '\r': - if (p[1] == '\n') - goto Ldefault; - case '\n': - linnum++; // remember to count lines - // Always extract new lines, so that D lexer counts the - // lines right. - dbuf->writeByte(*p); - continue; - - case 0: - case 0x1a: - error("end of file before closing --> of comment"); - break; - - default: - Ldefault: - scangt = 0; // it's not --> - continue; - } - break; - } - //printf("*p = '%c'\n", *p); -} - -/******************************************** - * Determine if we are at the start of a comment. - * Input: - * p is on the opening '<' - * Returns: - * 0 if not start of a comment - * 1 if start of a comment, p is adjusted to point past -- - */ - -int Html::isCommentStart() -#ifdef __DMC__ - __out(result) - { - if (result == 0) - ; - else if (result == 1) - { - assert(p[-2] == '-' && p[-1] == '-'); - } - else - assert(0); - } - __body -#endif /* __DMC__ */ - { unsigned char *s; - - if (p[0] == '<' && p[1] == '!') - { - for (s = p + 2; 1; s++) - { - switch (*s) - { - case ' ': - case '\t': - case '\r': - case '\f': - case '\v': - // skip white space, even though spec says no - // white space is allowed - continue; - - case '-': - if (s[1] == '-') - { - p = s + 2; - return 1; - } - goto No; - - default: - goto No; - } - } - } - No: - return 0; - } - -int Html::isCDATAStart() -{ - const char * CDATA_START_MARKER = "0) - { - /* Always extract new lines, so that D lexer counts the lines - * right. - */ - linnum++; - dbuf->writeByte('\n'); - p += lineSepLength; - continue; - } - else if (p[0] == ']' && p[1] == ']' && p[2] == '>') - { - /* end of CDATA section */ - p += 3; - return; - } - else if (inCode) - { - /* this CDATA section contains D code */ - dbuf->writeByte(*p); - } - - p++; - } -} - - -/******************************************** - * Convert an HTML character entity into a character. - * Forms are: - * &name; named entity - * &#ddd; decimal - * &#xhhhh; hex - * Input: - * p is on the & - */ - -int Html::charEntity() -{ int c = 0; - int v; - int hex; - unsigned char *pstart = p; - - //printf("Html::charEntity('%c')\n", *p); - if (p[1] == '#') - { - p++; - if (p[1] == 'x' || p[1] == 'X') - { p++; - hex = 1; - } - else - hex = 0; - if (p[1] == ';') - goto Linvalid; - while (1) - { - p++; - switch (*p) - { - case 0: - case 0x1a: - error("end of file before end of character entity"); - goto Lignore; - - case '\n': - case '\r': - case '<': // tag start - // Termination is assumed - break; - - case ';': - // Termination is explicit - p++; - break; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - v = *p - '0'; - goto Lvalue; - - case 'a': case 'b': case 'c': - case 'd': case 'e': case 'f': - if (!hex) - goto Linvalid; - v = (*p - 'a') + 10; - goto Lvalue; - - case 'A': case 'B': case 'C': - case 'D': case 'E': case 'F': - if (!hex) - goto Linvalid; - v = (*p - 'A') + 10; - goto Lvalue; - - Lvalue: - if (hex) - c = (c << 4) + v; - else - c = (c * 10) + v; - if (c > 0x10FFFF) - { - error("character entity out of range"); - goto Lignore; - } - continue; - - default: - Linvalid: - error("invalid numeric character reference"); - goto Lignore; - } - break; - } - } - else - { - // It's a named entity; gather all characters until ; - unsigned char *idstart = p + 1; - - while (1) - { - p++; - switch (*p) - { - case 0: - case 0x1a: - error("end of file before end of character entity"); - break; - - case '\n': - case '\r': - case '<': // tag start - // Termination is assumed - c = HtmlNamedEntity(idstart, p - idstart); - if (c == -1) - goto Lignore; - break; - - case ';': - // Termination is explicit - c = HtmlNamedEntity(idstart, p - idstart); - if (c == -1) - goto Lignore; - p++; - break; - - default: - continue; - } - break; - } - } - - // Kludge to convert non-breaking space to ascii space - if (c == 160) - c = ' '; - - return c; - -Lignore: - //printf("Lignore\n"); - p = pstart + 1; - return '&'; -} - -/** - * identify DOS, Linux, Mac, Next and Unicode line endings - * 0 if this is no line separator - * >0 the length of the separator - * Note: input has to be UTF-8 - */ -static int isLineSeparator(const unsigned char* p) -{ - // Linux - if( p[0]=='\n') - return 1; - - // Mac & Dos - if( p[0]=='\r') - return (p[1]=='\n') ? 2 : 1; - - // Unicode (line || paragraph sep.) - if( p[0]==0xE2 && p[1]==0x80 && (p[2]==0xA8 || p[2]==0xA9)) - return 3; - - // Next - if( p[0]==0xC2 && p[1]==0x85) - return 2; - - return 0; -} - - diff --git a/dmd2/html.h b/dmd2/html.h deleted file mode 100644 index 90e43260..00000000 --- a/dmd2/html.h +++ /dev/null @@ -1,51 +0,0 @@ - -// Copyright (c) 1999-2009 by Digital Mars -// All Rights Reserved -// written by Walter Bright -// http://www.digitalmars.com -// License for redistribution is by either the Artistic License -// in artistic.txt, or the GNU General Public License in gpl.txt. -// See the included readme.txt for details. - -#define MARS 1 - -#if MARS -struct OutBuffer; -#else -struct Outbuffer; -#endif - -struct Html -{ - const char *sourcename; - - unsigned char *base; // pointer to start of buffer - unsigned char *end; // past end of buffer - unsigned char *p; // current character - unsigned linnum; // current line number -#if MARS - OutBuffer *dbuf; // code source buffer -#else - Outbuffer *dbuf; // code source buffer -#endif - int inCode; // !=0 if in code - - - Html(const char *sourcename, unsigned char *base, unsigned length); - - void error(const char *format, ...); -#if MARS - void extractCode(OutBuffer *buf); -#else - void extractCode(Outbuffer *buf); -#endif - void skipTag(); - void skipString(); - unsigned char *skipWhite(unsigned char *q); - void scanComment(); - int isCommentStart(); - void scanCDATA(); - int isCDATAStart(); - int charEntity(); - static int namedEntity(unsigned char *p, int length); -};