annotate dmd/html.c @ 650:aa6a0b7968f7

Added test case for bug #100 Removed dubious check for not emitting static private global in other modules without access. This should be handled properly somewhere else, it's causing unresolved global errors for stuff that should work (in MiniD)
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Sun, 05 Oct 2008 17:28:15 +0200
parents c53b6e3fe49a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
1
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
2 // Copyright (c) 1999-2006 by Digital Mars
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
3 // All Rights Reserved
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
4 // written by Walter Bright
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
5 // http://www.digitalmars.com
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
6 // License for redistribution is by either the Artistic License
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
8 // See the included readme.txt for details.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
9
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
10
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
11 /* HTML parser
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
12 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
13
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
14 #include <stdio.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
15 #include <string.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
16 #include <ctype.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
17 #include <stdarg.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
18 #include <errno.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
19 #include <wchar.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
20
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
21 #include "mars.h"
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
22 #include "html.h"
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
23
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
24 #include <assert.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
25 #include "root.h"
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
26
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
27 extern int HtmlNamedEntity(unsigned char *p, int length);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
28
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
29 static int isLineSeparator(const unsigned char* p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
30
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
31 /**********************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
32 * Determine if beginning of tag identifier
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
33 * or a continuation of a tag identifier.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
34 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
35
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
36 inline int istagstart(int c)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
37 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
38 return (isalpha(c) || c == '_');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
39 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
40
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
41 inline int istag(int c)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
42 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
43 return (isalnum(c) || c == '_');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
44 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
45
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
46 /**********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
47 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
48
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
49 Html::Html(const char *sourcename, unsigned char *base, unsigned length)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
50 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
51 //printf("Html::Html()\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
52 this->sourcename = sourcename;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
53 this->base = base;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
54 p = base;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
55 end = base + length;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
56 linnum = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
57 dbuf = NULL;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
58 inCode = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
59 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
60
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
61 /**********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
62 * Print error & quit.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
63 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
64
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
65 void Html::error(const char *format, ...)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
66 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
67 if (!global.gag)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
68 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
69 printf("%s(%d) : HTML Error: ", sourcename, linnum);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
70
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
71 va_list ap;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
72 va_start(ap, format);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
73 vprintf(format, ap);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
74 va_end(ap);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
75
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
76 printf("\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
77 fflush(stdout);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
78 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
79
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
80 global.errors++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
81 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
82
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
83 /**********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
84 * Extract all the code from an HTML file,
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
85 * concatenate it all together, and store in buf.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
86 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
87
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
88 void Html::extractCode(OutBuffer *buf)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
89 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
90 //printf("Html::extractCode()\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
91 dbuf = buf; // save for other routines
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
92 buf->reserve(end - p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
93 inCode = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
94 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
95 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
96 //printf("p = %p, *p = x%x\n", p, *p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
97 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
98 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
99 #if 0 // strings are not recognized outside of tags
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
100 case '"':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
101 case '\'':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
102 skipString();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
103 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
104 #endif
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
105 case '<':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
106 if (p[1] == '!' && isCommentStart())
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
107 { // Comments start with <!--
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
108 scanComment();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
109 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
110 else if(p[1] == '!' && isCDATAStart())
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
111 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
112 scanCDATA();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
113 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
114 else if (p[1] == '/' && istagstart(*skipWhite(p + 2)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
115 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
116 else if (istagstart(*skipWhite(p + 1)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
117 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
118 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
119 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
120 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
121
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
122 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
123 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
124 break; // end of file
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
125
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
126 case '&':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
127 if (inCode)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
128 { // Translate character entity into ascii for D parser
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
129 int c;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
130
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
131 c = charEntity();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
132 buf->writeUTF8(c);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
133 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
134 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
135 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
136 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
137
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
138 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
139 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
140 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
141 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
142 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
143 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
144 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
145 buf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
146 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
147 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
148
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
149 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
150 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
151 if (inCode)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
152 buf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
153 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
154 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
155 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
156 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
157 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
158 buf->writeByte(0); // ending sentinel
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
159 //printf("D code is: '%s'\n", (char *)buf->data);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
160 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
161
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
162 /***********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
163 * Scan to end of <> tag.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
164 * Look for <code> and </code> tags to start/stop D processing.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
165 * Input:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
166 * p is on opening '<' of tag; it's already verified that
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
167 * it's a tag by lookahead
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
168 * Output:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
169 * p is past closing '>' of tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
170 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
171
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
172 void Html::skipTag()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
173 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
174 enum TagState // what parsing state we're in
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
175 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
176 TStagstart, // start of tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
177 TStag, // in a tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
178 TSrest, // following tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
179 };
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
180 enum TagState state = TStagstart;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
181 int inot;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
182 unsigned char *tagstart = NULL;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
183 int taglen = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
184
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
185 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
186 inot = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
187 if (*p == '/')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
188 { inot = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
189 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
190 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
191 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
192 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
193 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
194 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
195 case '>': // found end of tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
196 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
197 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
198
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
199 case '"':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
200 case '\'':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
201 state = TSrest;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
202 skipString();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
203 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
204
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
205 case '<':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
206 if (p[1] == '!' && isCommentStart())
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
207 { // Comments start with <!--
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
208 scanComment();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
209 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
210 else if (p[1] == '/' && istagstart(*skipWhite(p + 2)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
211 { error("nested tag");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
212 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
213 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
214 else if (istagstart(*skipWhite(p + 1)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
215 { error("nested tag");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
216 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
217 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
218 // Treat comments as if they were whitespace
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
219 state = TSrest;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
220 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
221
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
222 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
223 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
224 error("end of file before end of tag");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
225 break; // end of file
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
226
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
227 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
228 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
229 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
230 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
231 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
232 // Always extract new lines, so that code lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
233 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
234 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
235 state = TSrest; // end of tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
236 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
237 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
238
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
239 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
240 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
241 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
242 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
243 if (state == TStagstart)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
244 { p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
245 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
246 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
247 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
248 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
249 switch (state)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
250 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
251 case TStagstart: // start of tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
252 assert(istagstart(*p));
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
253 state = TStag;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
254 tagstart = p;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
255 taglen = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
256 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
257
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
258 case TStag:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
259 if (istag(*p))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
260 { // Continuing tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
261 taglen++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
262 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
263 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
264 { // End of tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
265 state = TSrest;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
266 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
267 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
268
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
269 case TSrest:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
270 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
271 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
272 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
273 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
274 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
275 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
276 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
277
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
278 // See if we parsed a <code> or </code> tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
279 if (taglen && memicmp((char *) tagstart, (char *) "CODE", taglen) == 0
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
280 && *(p - 2) != '/') // ignore "<code />" (XHTML)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
281 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
282 if (inot)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
283 { inCode--;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
284 if (inCode < 0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
285 inCode = 0; // ignore extra </code>'s
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
286 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
287 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
288 inCode++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
289 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
290 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
291
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
292 /***********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
293 * Scan to end of attribute string.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
294 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
295
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
296 void Html::skipString()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
297 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
298 int tc = *p;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
299
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
300 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
301 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
302 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
303 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
304 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
305 case '"':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
306 case '\'':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
307 if (*p == tc)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
308 { p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
309 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
310 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
311 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
312
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
313 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
314 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
315 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
316 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
317 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
318 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
319 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
320 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
321 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
322
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
323 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
324 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
325 Leof:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
326 error("end of file before closing %c of string", tc);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
327 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
328
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
329 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
330 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
331 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
332 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
333 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
334 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
335 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
336
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
337 /*********************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
338 * If p points to any white space, skip it
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
339 * and return pointer just past it.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
340 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
341
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
342 unsigned char *Html::skipWhite(unsigned char *q)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
343 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
344 for (; 1; q++)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
345 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
346 switch (*q)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
347 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
348 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
349 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
350 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
351 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
352 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
353 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
354 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
355
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
356 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
357 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
358 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
359 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
360 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
361 return q;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
362 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
363
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
364 /***************************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
365 * Scan to end of comment.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
366 * Comments are defined any of a number of ways.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
367 * IE 5.0: <!-- followed by >
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
368 * "HTML The Definitive Guide": <!-- text with at least one space in it -->
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
369 * Netscape: <!-- --> comments nest
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
370 * w3c: whitespace can appear between -- and > of comment close
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
371 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
372
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
373 void Html::scanComment()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
374 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
375 // Most of the complexity is dealing with the case that
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
376 // an arbitrary amount of whitespace can appear between
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
377 // the -- and the > of a comment close.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
378 int scangt = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
379
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
380 //printf("scanComment()\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
381 if (*p == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
382 { linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
383 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
384 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
385 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
386 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
387 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
388 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
389 //scangt = 1; // IE 5.0 compatibility
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
390 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
391 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
392 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
393 case '-':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
394 if (p[1] == '-')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
395 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
396 if (p[2] == '>') // optimize for most common case
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
397 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
398 p += 3;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
399 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
400 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
401 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
402 scangt = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
403 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
404 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
405 scangt = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
406 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
407
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
408 case '>':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
409 if (scangt)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
410 { // found -->
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
411 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
412 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
413 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
414 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
415
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
416 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
417 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
418 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
419 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
420 // skip white space
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
421 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
422
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
423 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
424 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
425 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
426 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
427 linnum++; // remember to count lines
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
428 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
429 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
430 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
431 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
432
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
433 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
434 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
435 error("end of file before closing --> of comment");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
436 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
437
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
438 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
439 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
440 scangt = 0; // it's not -->
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
441 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
442 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
443 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
444 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
445 //printf("*p = '%c'\n", *p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
446 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
447
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
448 /********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
449 * Determine if we are at the start of a comment.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
450 * Input:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
451 * p is on the opening '<'
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
452 * Returns:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
453 * 0 if not start of a comment
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
454 * 1 if start of a comment, p is adjusted to point past --
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
455 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
456
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
457 int Html::isCommentStart()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
458 #ifdef __DMC__
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
459 __out(result)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
460 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
461 if (result == 0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
462 ;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
463 else if (result == 1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
464 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
465 assert(p[-2] == '-' && p[-1] == '-');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
466 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
467 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
468 assert(0);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
469 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
470 __body
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
471 #endif /* __DMC__ */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
472 { unsigned char *s;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
473
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
474 if (p[0] == '<' && p[1] == '!')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
475 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
476 for (s = p + 2; 1; s++)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
477 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
478 switch (*s)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
479 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
480 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
481 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
482 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
483 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
484 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
485 // skip white space, even though spec says no
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
486 // white space is allowed
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
487 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
488
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
489 case '-':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
490 if (s[1] == '-')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
491 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
492 p = s + 2;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
493 return 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
494 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
495 goto No;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
496
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
497 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
498 goto No;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
499 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
500 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
501 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
502 No:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
503 return 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
504 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
505
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
506 int Html::isCDATAStart()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
507 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
508 const char * CDATA_START_MARKER = "<![CDATA[";
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
509 size_t len = strlen(CDATA_START_MARKER);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
510
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
511 if (strncmp((char*)p, CDATA_START_MARKER, len) == 0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
512 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
513 p += len;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
514 return 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
515 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
516 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
517 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
518 return 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
519 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
520 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
521
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
522 void Html::scanCDATA()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
523 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
524 while(*p && *p != 0x1A)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
525 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
526 int lineSepLength = isLineSeparator(p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
527 if (lineSepLength>0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
528 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
529 /* Always extract new lines, so that D lexer counts the lines
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
530 * right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
531 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
532 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
533 dbuf->writeUTF8('\n');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
534 p += lineSepLength;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
535 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
536 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
537 else if (p[0] == ']' && p[1] == ']' && p[2] == '>')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
538 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
539 /* end of CDATA section */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
540 p += 3;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
541 return;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
542 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
543 else if (inCode)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
544 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
545 /* this CDATA section contains D code */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
546 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
547 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
548
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
549 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
550 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
551 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
552
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
553 /********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
554 * Convert an HTML character entity into a character.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
555 * Forms are:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
556 * &name; named entity
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
557 * &#ddd; decimal
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
558 * &#xhhhh; hex
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
559 * Input:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
560 * p is on the &
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
561 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
562
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
563 int Html::charEntity()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
564 { int c = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
565 int v;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
566 int hex;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
567 unsigned char *pstart = p;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
568
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
569 //printf("Html::charEntity('%c')\n", *p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
570 if (p[1] == '#')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
571 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
572 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
573 if (p[1] == 'x' || p[1] == 'X')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
574 { p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
575 hex = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
576 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
577 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
578 hex = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
579 if (p[1] == ';')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
580 goto Linvalid;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
581 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
582 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
583 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
584 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
585 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
586 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
587 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
588 error("end of file before end of character entity");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
589 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
590
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
591 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
592 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
593 case '<': // tag start
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
594 // Termination is assumed
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
595 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
596
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
597 case ';':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
598 // Termination is explicit
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
599 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
600 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
601
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
602 case '0': case '1': case '2': case '3': case '4':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
603 case '5': case '6': case '7': case '8': case '9':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
604 v = *p - '0';
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
605 goto Lvalue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
606
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
607 case 'a': case 'b': case 'c':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
608 case 'd': case 'e': case 'f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
609 if (!hex)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
610 goto Linvalid;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
611 v = (*p - 'a') + 10;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
612 goto Lvalue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
613
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
614 case 'A': case 'B': case 'C':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
615 case 'D': case 'E': case 'F':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
616 if (!hex)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
617 goto Linvalid;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
618 v = (*p - 'A') + 10;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
619 goto Lvalue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
620
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
621 Lvalue:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
622 if (hex)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
623 c = (c << 4) + v;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
624 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
625 c = (c * 10) + v;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
626 if (c > 0x10FFFF)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
627 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
628 error("character entity out of range");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
629 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
630 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
631 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
632
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
633 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
634 Linvalid:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
635 error("invalid numeric character reference");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
636 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
637 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
638 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
639 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
640 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
641 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
642 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
643 // It's a named entity; gather all characters until ;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
644 unsigned char *idstart = p + 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
645
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
646 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
647 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
648 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
649 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
650 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
651 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
652 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
653 error("end of file before end of character entity");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
654 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
655
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
656 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
657 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
658 case '<': // tag start
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
659 // Termination is assumed
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
660 c = HtmlNamedEntity(idstart, p - idstart);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
661 if (c == -1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
662 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
663 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
664
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
665 case ';':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
666 // Termination is explicit
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
667 c = HtmlNamedEntity(idstart, p - idstart);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
668 if (c == -1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
669 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
670 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
671 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
672
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
673 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
674 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
675 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
676 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
677 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
678 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
679
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
680 // Kludge to convert non-breaking space to ascii space
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
681 if (c == 160)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
682 c = ' ';
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
683
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
684 return c;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
685
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
686 Lignore:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
687 //printf("Lignore\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
688 p = pstart + 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
689 return '&';
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
690 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
691
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
692 /**
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
693 * identify DOS, Linux, Mac, Next and Unicode line endings
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
694 * 0 if this is no line separator
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
695 * >0 the length of the separator
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
696 * Note: input has to be UTF-8
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
697 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
698 static int isLineSeparator(const unsigned char* p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
699 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
700 // Linux
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
701 if( p[0]=='\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
702 return 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
703
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
704 // Mac & Dos
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
705 if( p[0]=='\r')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
706 return (p[1]=='\n') ? 2 : 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
707
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
708 // Unicode (line || paragraph sep.)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
709 if( p[0]==0xE2 && p[1]==0x80 && (p[2]==0xA8 || p[2]==0xA9))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
710 return 3;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
711
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
712 // Next
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
713 if( p[0]==0xC2 && p[1]==0x85)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
714 return 2;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
715
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
716 return 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
717 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
718