annotate dmd/html.c @ 1638:0de4525a9ed6

Apply workaround for #395 by klickverbot.
author Christian Kamm <kamm incasoftware de>
date Mon, 08 Mar 2010 20:06:08 +0100
parents c53b6e3fe49a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
1
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
2 // Copyright (c) 1999-2006 by Digital Mars
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
3 // All Rights Reserved
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
4 // written by Walter Bright
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
5 // http://www.digitalmars.com
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
6 // License for redistribution is by either the Artistic License
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
8 // See the included readme.txt for details.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
9
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
10
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
11 /* HTML parser
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
12 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
13
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
14 #include <stdio.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
15 #include <string.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
16 #include <ctype.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
17 #include <stdarg.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
18 #include <errno.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
19 #include <wchar.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
20
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
21 #include "mars.h"
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
22 #include "html.h"
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
23
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
24 #include <assert.h>
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
25 #include "root.h"
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
26
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
27 extern int HtmlNamedEntity(unsigned char *p, int length);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
28
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
29 static int isLineSeparator(const unsigned char* p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
30
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
31 /**********************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
32 * Determine if beginning of tag identifier
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
33 * or a continuation of a tag identifier.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
34 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
35
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
36 inline int istagstart(int c)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
37 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
38 return (isalpha(c) || c == '_');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
39 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
40
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
41 inline int istag(int c)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
42 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
43 return (isalnum(c) || c == '_');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
44 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
45
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
46 /**********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
47 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
48
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
49 Html::Html(const char *sourcename, unsigned char *base, unsigned length)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
50 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
51 //printf("Html::Html()\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
52 this->sourcename = sourcename;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
53 this->base = base;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
54 p = base;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
55 end = base + length;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
56 linnum = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
57 dbuf = NULL;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
58 inCode = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
59 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
60
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
61 /**********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
62 * Print error & quit.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
63 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
64
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
65 void Html::error(const char *format, ...)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
66 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
67 if (!global.gag)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
68 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
69 printf("%s(%d) : HTML Error: ", sourcename, linnum);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
70
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
71 va_list ap;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
72 va_start(ap, format);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
73 vprintf(format, ap);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
74 va_end(ap);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
75
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
76 printf("\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
77 fflush(stdout);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
78 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
79
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
80 global.errors++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
81 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
82
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
83 /**********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
84 * Extract all the code from an HTML file,
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
85 * concatenate it all together, and store in buf.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
86 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
87
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
88 void Html::extractCode(OutBuffer *buf)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
89 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
90 //printf("Html::extractCode()\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
91 dbuf = buf; // save for other routines
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
92 buf->reserve(end - p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
93 inCode = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
94 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
95 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
96 //printf("p = %p, *p = x%x\n", p, *p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
97 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
98 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
99 #if 0 // strings are not recognized outside of tags
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
100 case '"':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
101 case '\'':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
102 skipString();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
103 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
104 #endif
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
105 case '<':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
106 if (p[1] == '!' && isCommentStart())
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
107 { // Comments start with <!--
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
108 scanComment();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
109 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
110 else if(p[1] == '!' && isCDATAStart())
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
111 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
112 scanCDATA();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
113 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
114 else if (p[1] == '/' && istagstart(*skipWhite(p + 2)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
115 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
116 else if (istagstart(*skipWhite(p + 1)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
117 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
118 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
119 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
120 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
121
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
122 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
123 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
124 break; // end of file
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
125
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
126 case '&':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
127 if (inCode)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
128 { // Translate character entity into ascii for D parser
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
129 int c;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
130
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
131 c = charEntity();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
132 buf->writeUTF8(c);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
133 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
134 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
135 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
136 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
137
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
138 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
139 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
140 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
141 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
142 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
143 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
144 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
145 buf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
146 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
147 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
148
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
149 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
150 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
151 if (inCode)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
152 buf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
153 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
154 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
155 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
156 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
157 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
158 buf->writeByte(0); // ending sentinel
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
159 //printf("D code is: '%s'\n", (char *)buf->data);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
160 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
161
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
162 /***********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
163 * Scan to end of <> tag.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
164 * Look for <code> and </code> tags to start/stop D processing.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
165 * Input:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
166 * p is on opening '<' of tag; it's already verified that
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
167 * it's a tag by lookahead
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
168 * Output:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
169 * p is past closing '>' of tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
170 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
171
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
172 void Html::skipTag()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
173 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
174 enum TagState // what parsing state we're in
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
175 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
176 TStagstart, // start of tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
177 TStag, // in a tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
178 TSrest, // following tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
179 };
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
180 enum TagState state = TStagstart;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
181 int inot;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
182 unsigned char *tagstart = NULL;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
183 int taglen = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
184
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
185 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
186 inot = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
187 if (*p == '/')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
188 { inot = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
189 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
190 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
191 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
192 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
193 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
194 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
195 case '>': // found end of tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
196 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
197 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
198
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
199 case '"':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
200 case '\'':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
201 state = TSrest;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
202 skipString();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
203 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
204
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
205 case '<':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
206 if (p[1] == '!' && isCommentStart())
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
207 { // Comments start with <!--
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
208 scanComment();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
209 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
210 else if (p[1] == '/' && istagstart(*skipWhite(p + 2)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
211 { error("nested tag");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
212 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
213 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
214 else if (istagstart(*skipWhite(p + 1)))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
215 { error("nested tag");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
216 skipTag();
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
217 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
218 // Treat comments as if they were whitespace
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
219 state = TSrest;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
220 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
221
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
222 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
223 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
224 error("end of file before end of tag");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
225 break; // end of file
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
226
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
227 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
228 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
229 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
230 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
231 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
232 // Always extract new lines, so that code lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
233 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
234 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
235 state = TSrest; // end of tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
236 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
237 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
238
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
239 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
240 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
241 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
242 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
243 if (state == TStagstart)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
244 { p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
245 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
246 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
247 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
248 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
249 switch (state)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
250 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
251 case TStagstart: // start of tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
252 assert(istagstart(*p));
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
253 state = TStag;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
254 tagstart = p;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
255 taglen = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
256 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
257
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
258 case TStag:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
259 if (istag(*p))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
260 { // Continuing tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
261 taglen++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
262 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
263 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
264 { // End of tag name
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
265 state = TSrest;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
266 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
267 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
268
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
269 case TSrest:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
270 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
271 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
272 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
273 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
274 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
275 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
276 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
277
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
278 // See if we parsed a <code> or </code> tag
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
279 if (taglen && memicmp((char *) tagstart, (char *) "CODE", taglen) == 0
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
280 && *(p - 2) != '/') // ignore "<code />" (XHTML)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
281 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
282 if (inot)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
283 { inCode--;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
284 if (inCode < 0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
285 inCode = 0; // ignore extra </code>'s
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
286 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
287 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
288 inCode++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
289 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
290 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
291
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
292 /***********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
293 * Scan to end of attribute string.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
294 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
295
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
296 void Html::skipString()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
297 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
298 int tc = *p;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
299
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
300 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
301 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
302 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
303 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
304 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
305 case '"':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
306 case '\'':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
307 if (*p == tc)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
308 { p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
309 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
310 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
311 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
312
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
313 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
314 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
315 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
316 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
317 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
318 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
319 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
320 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
321 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
322
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
323 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
324 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
325 Leof:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
326 error("end of file before closing %c of string", tc);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
327 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
328
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
329 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
330 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
331 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
332 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
333 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
334 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
335 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
336
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
337 /*********************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
338 * If p points to any white space, skip it
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
339 * and return pointer just past it.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
340 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
341
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
342 unsigned char *Html::skipWhite(unsigned char *q)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
343 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
344 for (; 1; q++)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
345 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
346 switch (*q)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
347 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
348 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
349 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
350 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
351 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
352 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
353 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
354 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
355
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
356 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
357 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
358 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
359 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
360 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
361 return q;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
362 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
363
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
364 /***************************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
365 * Scan to end of comment.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
366 * Comments are defined any of a number of ways.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
367 * IE 5.0: <!-- followed by >
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
368 * "HTML The Definitive Guide": <!-- text with at least one space in it -->
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
369 * Netscape: <!-- --> comments nest
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
370 * w3c: whitespace can appear between -- and > of comment close
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
371 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
372
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
373 void Html::scanComment()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
374 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
375 // Most of the complexity is dealing with the case that
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
376 // an arbitrary amount of whitespace can appear between
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
377 // the -- and the > of a comment close.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
378 int scangt = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
379
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
380 //printf("scanComment()\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
381 if (*p == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
382 { linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
383 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
384 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
385 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
386 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
387 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
388 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
389 //scangt = 1; // IE 5.0 compatibility
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
390 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
391 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
392 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
393 case '-':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
394 if (p[1] == '-')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
395 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
396 if (p[2] == '>') // optimize for most common case
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
397 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
398 p += 3;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
399 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
400 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
401 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
402 scangt = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
403 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
404 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
405 scangt = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
406 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
407
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
408 case '>':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
409 if (scangt)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
410 { // found -->
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
411 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
412 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
413 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
414 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
415
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
416 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
417 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
418 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
419 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
420 // skip white space
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
421 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
422
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
423 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
424 if (p[1] == '\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
425 goto Ldefault;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
426 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
427 linnum++; // remember to count lines
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
428 // Always extract new lines, so that D lexer counts the
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
429 // lines right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
430 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
431 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
432
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
433 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
434 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
435 error("end of file before closing --> of comment");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
436 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
437
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
438 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
439 Ldefault:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
440 scangt = 0; // it's not -->
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
441 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
442 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
443 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
444 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
445 //printf("*p = '%c'\n", *p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
446 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
447
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
448 /********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
449 * Determine if we are at the start of a comment.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
450 * Input:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
451 * p is on the opening '<'
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
452 * Returns:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
453 * 0 if not start of a comment
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
454 * 1 if start of a comment, p is adjusted to point past --
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
455 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
456
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
457 int Html::isCommentStart()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
458 #ifdef __DMC__
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
459 __out(result)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
460 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
461 if (result == 0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
462 ;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
463 else if (result == 1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
464 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
465 assert(p[-2] == '-' && p[-1] == '-');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
466 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
467 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
468 assert(0);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
469 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
470 __body
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
471 #endif /* __DMC__ */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
472 { unsigned char *s;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
473
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
474 if (p[0] == '<' && p[1] == '!')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
475 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
476 for (s = p + 2; 1; s++)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
477 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
478 switch (*s)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
479 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
480 case ' ':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
481 case '\t':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
482 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
483 case '\f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
484 case '\v':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
485 // skip white space, even though spec says no
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
486 // white space is allowed
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
487 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
488
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
489 case '-':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
490 if (s[1] == '-')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
491 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
492 p = s + 2;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
493 return 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
494 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
495 goto No;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
496
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
497 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
498 goto No;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
499 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
500 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
501 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
502 No:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
503 return 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
504 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
505
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
506 int Html::isCDATAStart()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
507 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
508 const char * CDATA_START_MARKER = "<![CDATA[";
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
509 size_t len = strlen(CDATA_START_MARKER);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
510
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
511 if (strncmp((char*)p, CDATA_START_MARKER, len) == 0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
512 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
513 p += len;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
514 return 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
515 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
516 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
517 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
518 return 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
519 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
520 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
521
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
522 void Html::scanCDATA()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
523 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
524 while(*p && *p != 0x1A)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
525 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
526 int lineSepLength = isLineSeparator(p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
527 if (lineSepLength>0)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
528 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
529 /* Always extract new lines, so that D lexer counts the lines
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
530 * right.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
531 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
532 linnum++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
533 dbuf->writeUTF8('\n');
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
534 p += lineSepLength;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
535 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
536 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
537 else if (p[0] == ']' && p[1] == ']' && p[2] == '>')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
538 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
539 /* end of CDATA section */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
540 p += 3;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
541 return;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
542 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
543 else if (inCode)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
544 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
545 /* this CDATA section contains D code */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
546 dbuf->writeByte(*p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
547 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
548
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
549 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
550 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
551 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
552
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
553 /********************************************
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
554 * Convert an HTML character entity into a character.
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
555 * Forms are:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
556 * &name; named entity
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
557 * &#ddd; decimal
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
558 * &#xhhhh; hex
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
559 * Input:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
560 * p is on the &
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
561 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
562
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
563 int Html::charEntity()
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
564 { int c = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
565 int v;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
566 int hex;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
567 unsigned char *pstart = p;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
568
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
569 //printf("Html::charEntity('%c')\n", *p);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
570 if (p[1] == '#')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
571 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
572 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
573 if (p[1] == 'x' || p[1] == 'X')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
574 { p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
575 hex = 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
576 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
577 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
578 hex = 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
579 if (p[1] == ';')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
580 goto Linvalid;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
581 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
582 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
583 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
584 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
585 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
586 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
587 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
588 error("end of file before end of character entity");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
589 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
590
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
591 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
592 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
593 case '<': // tag start
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
594 // Termination is assumed
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
595 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
596
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
597 case ';':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
598 // Termination is explicit
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
599 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
600 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
601
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
602 case '0': case '1': case '2': case '3': case '4':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
603 case '5': case '6': case '7': case '8': case '9':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
604 v = *p - '0';
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
605 goto Lvalue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
606
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
607 case 'a': case 'b': case 'c':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
608 case 'd': case 'e': case 'f':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
609 if (!hex)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
610 goto Linvalid;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
611 v = (*p - 'a') + 10;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
612 goto Lvalue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
613
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
614 case 'A': case 'B': case 'C':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
615 case 'D': case 'E': case 'F':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
616 if (!hex)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
617 goto Linvalid;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
618 v = (*p - 'A') + 10;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
619 goto Lvalue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
620
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
621 Lvalue:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
622 if (hex)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
623 c = (c << 4) + v;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
624 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
625 c = (c * 10) + v;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
626 if (c > 0x10FFFF)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
627 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
628 error("character entity out of range");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
629 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
630 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
631 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
632
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
633 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
634 Linvalid:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
635 error("invalid numeric character reference");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
636 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
637 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
638 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
639 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
640 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
641 else
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
642 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
643 // It's a named entity; gather all characters until ;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
644 unsigned char *idstart = p + 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
645
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
646 while (1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
647 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
648 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
649 switch (*p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
650 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
651 case 0:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
652 case 0x1a:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
653 error("end of file before end of character entity");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
654 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
655
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
656 case '\n':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
657 case '\r':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
658 case '<': // tag start
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
659 // Termination is assumed
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
660 c = HtmlNamedEntity(idstart, p - idstart);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
661 if (c == -1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
662 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
663 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
664
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
665 case ';':
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
666 // Termination is explicit
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
667 c = HtmlNamedEntity(idstart, p - idstart);
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
668 if (c == -1)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
669 goto Lignore;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
670 p++;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
671 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
672
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
673 default:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
674 continue;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
675 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
676 break;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
677 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
678 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
679
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
680 // Kludge to convert non-breaking space to ascii space
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
681 if (c == 160)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
682 c = ' ';
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
683
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
684 return c;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
685
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
686 Lignore:
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
687 //printf("Lignore\n");
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
688 p = pstart + 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
689 return '&';
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
690 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
691
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
692 /**
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
693 * identify DOS, Linux, Mac, Next and Unicode line endings
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
694 * 0 if this is no line separator
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
695 * >0 the length of the separator
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
696 * Note: input has to be UTF-8
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
697 */
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
698 static int isLineSeparator(const unsigned char* p)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
699 {
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
700 // Linux
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
701 if( p[0]=='\n')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
702 return 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
703
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
704 // Mac & Dos
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
705 if( p[0]=='\r')
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
706 return (p[1]=='\n') ? 2 : 1;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
707
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
708 // Unicode (line || paragraph sep.)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
709 if( p[0]==0xE2 && p[1]==0x80 && (p[2]==0xA8 || p[2]==0xA9))
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
710 return 3;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
711
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
712 // Next
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
713 if( p[0]==0xC2 && p[1]==0x85)
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
714 return 2;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
715
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
716 return 0;
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
717 }
c53b6e3fe49a [svn r5] Initial commit. Most things are very rough.
lindquist
parents:
diff changeset
718