comparison dwtx/jface/internal/text/html/HTML2TextReader.d @ 162:1a5b8f8129df

...
author Frank Benoit <benoit@tionex.de>
date Mon, 08 Sep 2008 00:51:37 +0200
parents 25f1f92fa3df
children c6d7b1ea700b
comparison
equal deleted inserted replaced
161:f8d52b926852 162:1a5b8f8129df
22 import dwtx.jface.internal.text.html.HTMLMessages; // packageimport 22 import dwtx.jface.internal.text.html.HTMLMessages; // packageimport
23 23
24 import dwt.dwthelper.utils; 24 import dwt.dwthelper.utils;
25 import dwtx.dwtxhelper.PushbackReader; 25 import dwtx.dwtxhelper.PushbackReader;
26 import dwtx.dwtxhelper.Collection; 26 import dwtx.dwtxhelper.Collection;
27 static import tango.text.convert.Utf;
27 28
28 import dwt.DWT; 29 import dwt.DWT;
29 import dwt.custom.StyleRange; 30 import dwt.custom.StyleRange;
30 import dwtx.jface.text.TextPresentation; 31 import dwtx.jface.text.TextPresentation;
31 32
188 if ("dd".equals(html)) //$NON-NLS-1$ 189 if ("dd".equals(html)) //$NON-NLS-1$
189 return "\t"; //$NON-NLS-1$ 190 return "\t"; //$NON-NLS-1$
190 191
191 if ("li".equals(html)) //$NON-NLS-1$ 192 if ("li".equals(html)) //$NON-NLS-1$
192 // FIXME: this hard-coded prefix does not work for RTL languages, see https://bugs.eclipse.org/bugs/show_bug.cgi?id=91682 193 // FIXME: this hard-coded prefix does not work for RTL languages, see https://bugs.eclipse.org/bugs/show_bug.cgi?id=91682
193 return LINE_DELIM + HTMLMessages.getString("HTML2TextReader.listItemPrefix"); //$NON-NLS-1$ 194 return LINE_DELIM ~ HTMLMessages.getString("HTML2TextReader.listItemPrefix"); //$NON-NLS-1$
194 195
195 if ("/b".equals(html)) { //$NON-NLS-1$ 196 if ("/b".equals(html)) { //$NON-NLS-1$
196 stopBold(); 197 stopBold();
197 return EMPTY_STRING; 198 return EMPTY_STRING;
198 } 199 }
240 241
241 StringBuffer buf= new StringBuffer(); 242 StringBuffer buf= new StringBuffer();
242 int ch; 243 int ch;
243 do { 244 do {
244 245
245 ch= nextChar(); 246 ch= nextDChar();
246 247
247 while (ch !is -1 && ch !is '>') { 248 while (ch !is -1 && ch !is '>') {
248 buf.append(Character.toLowerCase(cast(wchar) ch)); 249 buf.append(dcharToString(Character.toLowerCase(cast(dchar) ch)));
249 ch= nextChar(); 250 ch= nextDChar();
250 if (ch is '"'){ 251 if (ch is '"'){
251 buf.append(Character.toLowerCase(cast(wchar) ch)); 252 buf.append(dcharToString(Character.toLowerCase(cast(dchar) ch)));
252 ch= nextChar(); 253 ch= nextDChar();
253 while (ch !is -1 && ch !is '"'){ 254 while (ch !is -1 && ch !is '"'){
254 buf.append(Character.toLowerCase(cast(wchar) ch)); 255 buf.append(dcharToString(Character.toLowerCase(cast(dchar) ch)));
255 ch= nextChar(); 256 ch= nextDChar();
256 } 257 }
257 } 258 }
258 if (ch is '<' && !isInComment(buf)) { 259 if (ch is '<' && !isInComment(buf)) {
259 unread(ch); 260 unreadDChar(ch);
260 return '<' + buf.toString(); 261 return '<' ~ buf.toString();
261 } 262 }
262 } 263 }
263 264
264 if (ch is -1) 265 if (ch is -1)
265 return null; 266 return null;
266 267
267 if (!isInComment(buf) || isCommentEnd(buf)) { 268 if (!isInComment(buf) || isCommentEnd(buf)) {
268 break; 269 break;
269 } 270 }
270 // unfinished comment 271 // unfinished comment
271 buf.append(cast(wchar) ch); 272 buf.append(dcharToString(cast(dchar) ch));
272 } while (true); 273 } while (true);
273 274
274 return html2Text(buf.toString()); 275 return html2Text(buf.toString());
275 } 276 }
276 277
277 private static bool isInComment(StringBuffer buf) { 278 private static bool isInComment(StringBuffer buf) {
278 return buf.length() >= 3 && "!--".equals(buf.substring(0, 3)); //$NON-NLS-1$ 279 return buf.length() >= 3 && "!--".equals(buf.slice().substring(0, 3)); //$NON-NLS-1$
279 } 280 }
280 281
281 private static bool isCommentEnd(StringBuffer buf) { 282 private static bool isCommentEnd(StringBuffer buf) {
282 int tagLen= buf.length(); 283 int tagLen= buf.length();
283 return tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)); //$NON-NLS-1$ 284 return tagLen >= 5 && "--".equals(buf.slice().substring(tagLen - 2)); //$NON-NLS-1$
284 } 285 }
285 286
286 private String processPreformattedText(int c) { 287 private String processPreformattedText(int c) {
287 if (c is '\r' || c is '\n') 288 if (c is '\r' || c is '\n')
288 fCounter++; 289 fCounter++;
289 return null; 290 return null;
290 } 291 }
291 292
292 293
293 private void unread(int ch) { 294 private void unreadDChar(dchar ch) {
294 (cast(PushbackReader) getReader()).unread(ch); 295 char[4] buf;
296 dchar[1] ibuf;
297 ibuf[0] = ch;
298 foreach( char c; tango.text.convert.Utf.toString( ibuf[], buf[] )){
299 (cast(PushbackReader) getReader()).unread(c);
300 }
295 } 301 }
296 302
297 protected String entity2Text(String symbol) { 303 protected String entity2Text(String symbol) {
298 if (symbol.length() > 1 && symbol.charAt(0) is '#') { 304 if (symbol.length() > 1 && symbol.charAt(0) is '#') {
299 int ch; 305 int ch;
301 if (symbol.charAt(1) is 'x') { 307 if (symbol.charAt(1) is 'x') {
302 ch= Integer.parseInt(symbol.substring(2), 16); 308 ch= Integer.parseInt(symbol.substring(2), 16);
303 } else { 309 } else {
304 ch= Integer.parseInt(symbol.substring(1), 10); 310 ch= Integer.parseInt(symbol.substring(1), 10);
305 } 311 }
306 return EMPTY_STRING + cast(wchar)ch; 312 return dcharToString( cast(dchar)ch);
307 } catch (NumberFormatException e) { 313 } catch (NumberFormatException e) {
308 } 314 }
309 } else { 315 } else {
310 String str= cast(String) fgEntityLookup.get(symbol); 316 String str= stringcast( fgEntityLookup.get(symbol));
311 if (str !is null) { 317 if (str !is null) {
312 return str; 318 return str;
313 } 319 }
314 } 320 }
315 return "&" + symbol; // not found //$NON-NLS-1$ 321 return "&" ~ symbol; // not found //$NON-NLS-1$
316 } 322 }
317 323
318 /* 324 /*
319 * A '&' has been read. Process a entity 325 * A '&' has been read. Process a entity
320 */ 326 */
321 private String processEntity() { 327 private String processEntity() {
322 StringBuffer buf= new StringBuffer(); 328 StringBuffer buf= new StringBuffer();
323 int ch= nextChar(); 329 int ch= nextDChar();
324 while (Character.isLetterOrDigit(cast(wchar)ch) || ch is '#') { 330 while (Character.isLetterOrDigit(cast(dchar)ch) || ch is '#') {
325 buf.append(cast(wchar) ch); 331 buf.append(dcharToString(cast(dchar) ch));
326 ch= nextChar(); 332 ch= nextDChar();
327 } 333 }
328 334
329 if (ch is ';') 335 if (ch is ';')
330 return entity2Text(buf.toString()); 336 return entity2Text(buf.toString());
331 337
332 buf.insert(0, '&'); 338 buf.select(0, 0);
339 buf.prepend("&");
333 if (ch !is -1) 340 if (ch !is -1)
334 buf.append(cast(wchar) ch); 341 buf.append(dcharToString(cast(dchar) ch));
335 return buf.toString(); 342 return buf.toString();
336 } 343 }
337 } 344 }