Mercurial > projects > dwt-addons
annotate dwtx/jface/text/rules/PatternRule.d @ 200:eb3414669eb0 default tip
fix for dmd 1.041 and tango 0.99.8
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Sat, 28 Mar 2009 03:09:57 +0100 |
parents | eb98a5cbfd78 |
children |
rev | line source |
---|---|
129 | 1 /******************************************************************************* |
2 * Copyright (c) 2000, 2007 IBM Corporation and others. | |
3 * All rights reserved. This program and the accompanying materials | |
4 * are made available under the terms of the Eclipse Public License v1.0 | |
5 * which accompanies this distribution, and is available at | |
6 * http://www.eclipse.org/legal/epl-v10.html | |
7 * | |
8 * Contributors: | |
9 * IBM Corporation - initial API and implementation | |
10 * Christopher Lenz (cmlenz@gmx.de) - support for line continuation | |
11 * Port to the D programming language: | |
12 * Frank Benoit <benoit@tionex.de> | |
13 *******************************************************************************/ | |
14 | |
131 | 15 |
151 | 16 module dwtx.jface.text.rules.PatternRule; |
17 | |
131 | 18 import dwtx.jface.text.rules.FastPartitioner; // packageimport |
19 import dwtx.jface.text.rules.ITokenScanner; // packageimport | |
20 import dwtx.jface.text.rules.Token; // packageimport | |
21 import dwtx.jface.text.rules.RuleBasedScanner; // packageimport | |
22 import dwtx.jface.text.rules.EndOfLineRule; // packageimport | |
23 import dwtx.jface.text.rules.WordRule; // packageimport | |
24 import dwtx.jface.text.rules.WhitespaceRule; // packageimport | |
25 import dwtx.jface.text.rules.WordPatternRule; // packageimport | |
26 import dwtx.jface.text.rules.IPredicateRule; // packageimport | |
27 import dwtx.jface.text.rules.DefaultPartitioner; // packageimport | |
28 import dwtx.jface.text.rules.NumberRule; // packageimport | |
29 import dwtx.jface.text.rules.SingleLineRule; // packageimport | |
30 import dwtx.jface.text.rules.RuleBasedDamagerRepairer; // packageimport | |
31 import dwtx.jface.text.rules.ICharacterScanner; // packageimport | |
32 import dwtx.jface.text.rules.IRule; // packageimport | |
33 import dwtx.jface.text.rules.DefaultDamagerRepairer; // packageimport | |
34 import dwtx.jface.text.rules.IToken; // packageimport | |
35 import dwtx.jface.text.rules.IPartitionTokenScanner; // packageimport | |
36 import dwtx.jface.text.rules.MultiLineRule; // packageimport | |
37 import dwtx.jface.text.rules.RuleBasedPartitioner; // packageimport | |
38 import dwtx.jface.text.rules.RuleBasedPartitionScanner; // packageimport | |
39 import dwtx.jface.text.rules.BufferedRuleBasedScanner; // packageimport | |
40 import dwtx.jface.text.rules.IWhitespaceDetector; // packageimport | |
41 | |
129 | 42 import dwt.dwthelper.utils; |
43 | |
153
f70d9508c95c
Fix java Collection imports
Frank Benoit <benoit@tionex.de>
parents:
151
diff
changeset
|
44 import dwtx.dwtxhelper.Collection; |
f70d9508c95c
Fix java Collection imports
Frank Benoit <benoit@tionex.de>
parents:
151
diff
changeset
|
45 |
129 | 46 |
47 import dwtx.core.runtime.Assert; | |
48 | |
49 | |
50 | |
51 | |
52 | |
53 /** | |
54 * Standard implementation of <code>IPredicateRule</code>. | |
55 * Is is capable of detecting a pattern which begins with a given start | |
56 * sequence and ends with a given end sequence. If the end sequence is | |
57 * not specified, it can be either end of line, end or file, or both. Additionally, | |
58 * the pattern can be constrained to begin in a certain column. The rule can also | |
59 * be used to check whether the text to scan covers half of the pattern, i.e. contains | |
60 * the end sequence required by the rule. | |
61 */ | |
62 public class PatternRule : IPredicateRule { | |
63 | |
64 /** | |
65 * Comparator that orders <code>char[]</code> in decreasing array lengths. | |
66 * | |
67 * @since 3.1 | |
68 */ | |
69 private static class DecreasingCharArrayLengthComparator : Comparator { | |
70 public int compare(Object o1, Object o2) { | |
162 | 71 return stringcast( o2).length - stringcast( o1).length; |
129 | 72 } |
73 } | |
74 | |
75 /** Internal setting for the un-initialized column constraint */ | |
76 protected static final int UNDEFINED= -1; | |
77 | |
78 /** The token to be returned on success */ | |
79 protected IToken fToken; | |
80 /** The pattern's start sequence */ | |
81 protected char[] fStartSequence; | |
82 /** The pattern's end sequence */ | |
83 protected char[] fEndSequence; | |
84 /** The pattern's column constrain */ | |
162 | 85 protected int fColumn; |
129 | 86 /** The pattern's escape character */ |
87 protected char fEscapeCharacter; | |
88 /** | |
89 * Indicates whether the escape character continues a line | |
90 * @since 3.0 | |
91 */ | |
92 protected bool fEscapeContinuesLine; | |
93 /** Indicates whether end of line terminates the pattern */ | |
94 protected bool fBreaksOnEOL; | |
95 /** Indicates whether end of file terminates the pattern */ | |
96 protected bool fBreaksOnEOF; | |
97 | |
98 /** | |
99 * Line delimiter comparator which orders according to decreasing delimiter length. | |
100 * @since 3.1 | |
101 */ | |
162 | 102 private Comparator fLineDelimiterComparator; |
129 | 103 /** |
104 * Cached line delimiters. | |
105 * @since 3.1 | |
106 */ | |
107 private char[][] fLineDelimiters; | |
108 /** | |
109 * Cached sorted {@linkplain #fLineDelimiters}. | |
110 * @since 3.1 | |
111 */ | |
112 private char[][] fSortedLineDelimiters; | |
113 | |
114 /** | |
115 * Creates a rule for the given starting and ending sequence. | |
116 * When these sequences are detected the rule will return the specified token. | |
117 * Alternatively, the sequence can also be ended by the end of the line. | |
118 * Any character which follows the given escapeCharacter will be ignored. | |
119 * | |
120 * @param startSequence the pattern's start sequence | |
121 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value | |
122 * @param token the token which will be returned on success | |
123 * @param escapeCharacter any character following this one will be ignored | |
124 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern | |
125 */ | |
133
7d818bd32d63
Fix ctors to this with gvim regexp
Frank Benoit <benoit@tionex.de>
parents:
131
diff
changeset
|
126 public this(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL) { |
162 | 127 fColumn= UNDEFINED; |
128 fLineDelimiterComparator= new DecreasingCharArrayLengthComparator(); | |
129 | |
129 | 130 Assert.isTrue(startSequence !is null && startSequence.length() > 0); |
131 Assert.isTrue(endSequence !is null || breaksOnEOL); | |
162 | 132 Assert.isNotNull(cast(Object)token); |
129 | 133 |
134 fStartSequence= startSequence.toCharArray(); | |
135 fEndSequence= (endSequence is null ? new char[0] : endSequence.toCharArray()); | |
136 fToken= token; | |
137 fEscapeCharacter= escapeCharacter; | |
138 fBreaksOnEOL= breaksOnEOL; | |
139 } | |
140 | |
141 /** | |
142 * Creates a rule for the given starting and ending sequence. | |
143 * When these sequences are detected the rule will return the specified token. | |
144 * Alternatively, the sequence can also be ended by the end of the line or the end of the file. | |
145 * Any character which follows the given escapeCharacter will be ignored. | |
146 * | |
147 * @param startSequence the pattern's start sequence | |
148 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value | |
149 * @param token the token which will be returned on success | |
150 * @param escapeCharacter any character following this one will be ignored | |
151 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern | |
152 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern | |
153 * @since 2.1 | |
154 */ | |
133
7d818bd32d63
Fix ctors to this with gvim regexp
Frank Benoit <benoit@tionex.de>
parents:
131
diff
changeset
|
155 public this(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL, bool breaksOnEOF) { |
129 | 156 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL); |
157 fBreaksOnEOF= breaksOnEOF; | |
158 } | |
159 | |
160 /** | |
161 * Creates a rule for the given starting and ending sequence. | |
162 * When these sequences are detected the rule will return the specified token. | |
163 * Alternatively, the sequence can also be ended by the end of the line or the end of the file. | |
164 * Any character which follows the given escapeCharacter will be ignored. An end of line | |
165 * immediately after the given <code>lineContinuationCharacter</code> will not cause the | |
166 * pattern to terminate even if <code>breakOnEOL</code> is set to true. | |
167 * | |
168 * @param startSequence the pattern's start sequence | |
169 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value | |
170 * @param token the token which will be returned on success | |
171 * @param escapeCharacter any character following this one will be ignored | |
172 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern | |
173 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern | |
174 * @param escapeContinuesLine indicates whether the specified escape character is used for line | |
175 * continuation, so that an end of line immediately after the escape character does not | |
176 * terminate the pattern, even if <code>breakOnEOL</code> is set | |
177 * @since 3.0 | |
178 */ | |
133
7d818bd32d63
Fix ctors to this with gvim regexp
Frank Benoit <benoit@tionex.de>
parents:
131
diff
changeset
|
179 public this(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL, bool breaksOnEOF, bool escapeContinuesLine) { |
129 | 180 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF); |
181 fEscapeContinuesLine= escapeContinuesLine; | |
182 } | |
183 | |
184 /** | |
185 * Sets a column constraint for this rule. If set, the rule's token | |
186 * will only be returned if the pattern is detected starting at the | |
187 * specified column. If the column is smaller then 0, the column | |
188 * constraint is considered removed. | |
189 * | |
190 * @param column the column in which the pattern starts | |
191 */ | |
192 public void setColumnConstraint(int column) { | |
193 if (column < 0) | |
194 column= UNDEFINED; | |
195 fColumn= column; | |
196 } | |
197 | |
198 | |
199 /** | |
200 * Evaluates this rules without considering any column constraints. | |
201 * | |
202 * @param scanner the character scanner to be used | |
203 * @return the token resulting from this evaluation | |
204 */ | |
205 protected IToken doEvaluate(ICharacterScanner scanner) { | |
206 return doEvaluate(scanner, false); | |
207 } | |
208 | |
209 /** | |
210 * Evaluates this rules without considering any column constraints. Resumes | |
211 * detection, i.e. look sonly for the end sequence required by this rule if the | |
212 * <code>resume</code> flag is set. | |
213 * | |
214 * @param scanner the character scanner to be used | |
215 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise | |
216 * @return the token resulting from this evaluation | |
217 * @since 2.0 | |
218 */ | |
219 protected IToken doEvaluate(ICharacterScanner scanner, bool resume) { | |
220 | |
221 if (resume) { | |
222 | |
223 if (endSequenceDetected(scanner)) | |
224 return fToken; | |
225 | |
226 } else { | |
227 | |
228 int c= scanner.read(); | |
229 if (c is fStartSequence[0]) { | |
230 if (sequenceDetected(scanner, fStartSequence, false)) { | |
231 if (endSequenceDetected(scanner)) | |
232 return fToken; | |
233 } | |
234 } | |
235 } | |
236 | |
237 scanner.unread(); | |
238 return Token.UNDEFINED; | |
239 } | |
240 | |
241 /* | |
242 * @see IRule#evaluate(ICharacterScanner) | |
243 */ | |
244 public IToken evaluate(ICharacterScanner scanner) { | |
245 return evaluate(scanner, false); | |
246 } | |
247 | |
248 /** | |
249 * Returns whether the end sequence was detected. As the pattern can be considered | |
250 * ended by a line delimiter, the result of this method is <code>true</code> if the | |
251 * rule breaks on the end of the line, or if the EOF character is read. | |
252 * | |
253 * @param scanner the character scanner to be used | |
254 * @return <code>true</code> if the end sequence has been detected | |
255 */ | |
256 protected bool endSequenceDetected(ICharacterScanner scanner) { | |
257 | |
258 char[][] originalDelimiters= scanner.getLegalLineDelimiters(); | |
259 int count= originalDelimiters.length; | |
260 if (fLineDelimiters is null || originalDelimiters.length !is count) { | |
162 | 261 fSortedLineDelimiters= new char[][](count); |
129 | 262 } else { |
263 while (count > 0 && fLineDelimiters[count-1] is originalDelimiters[count-1]) | |
264 count--; | |
265 } | |
266 if (count !is 0) { | |
267 fLineDelimiters= originalDelimiters; | |
268 System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length); | |
269 Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator); | |
270 } | |
271 | |
272 int readCount= 1; | |
273 int c; | |
274 while ((c= scanner.read()) !is ICharacterScanner.EOF) { | |
275 if (c is fEscapeCharacter) { | |
276 // Skip escaped character(s) | |
277 if (fEscapeContinuesLine) { | |
278 c= scanner.read(); | |
279 for (int i= 0; i < fSortedLineDelimiters.length; i++) { | |
280 if (c is fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], true)) | |
281 break; | |
282 } | |
283 } else | |
284 scanner.read(); | |
285 | |
286 } else if (fEndSequence.length > 0 && c is fEndSequence[0]) { | |
287 // Check if the specified end sequence has been found. | |
288 if (sequenceDetected(scanner, fEndSequence, true)) | |
289 return true; | |
290 } else if (fBreaksOnEOL) { | |
291 // Check for end of line since it can be used to terminate the pattern. | |
292 for (int i= 0; i < fSortedLineDelimiters.length; i++) { | |
293 if (c is fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], true)) | |
294 return true; | |
295 } | |
296 } | |
297 readCount++; | |
298 } | |
162 | 299 |
129 | 300 if (fBreaksOnEOF) |
301 return true; | |
302 | |
303 for (; readCount > 0; readCount--) | |
304 scanner.unread(); | |
305 | |
306 return false; | |
307 } | |
308 | |
309 /** | |
310 * Returns whether the next characters to be read by the character scanner | |
311 * are an exact match with the given sequence. No escape characters are allowed | |
312 * within the sequence. If specified the sequence is considered to be found | |
313 * when reading the EOF character. | |
314 * | |
315 * @param scanner the character scanner to be used | |
316 * @param sequence the sequence to be detected | |
317 * @param eofAllowed indicated whether EOF terminates the pattern | |
318 * @return <code>true</code> if the given sequence has been detected | |
319 */ | |
320 protected bool sequenceDetected(ICharacterScanner scanner, char[] sequence, bool eofAllowed) { | |
321 for (int i= 1; i < sequence.length; i++) { | |
322 int c= scanner.read(); | |
323 if (c is ICharacterScanner.EOF && eofAllowed) { | |
324 return true; | |
325 } else if (c !is sequence[i]) { | |
326 // Non-matching character detected, rewind the scanner back to the start. | |
327 // Do not unread the first character. | |
328 scanner.unread(); | |
329 for (int j= i-1; j > 0; j--) | |
330 scanner.unread(); | |
331 return false; | |
332 } | |
333 } | |
334 | |
335 return true; | |
336 } | |
337 | |
338 /* | |
339 * @see IPredicateRule#evaluate(ICharacterScanner, bool) | |
340 * @since 2.0 | |
341 */ | |
342 public IToken evaluate(ICharacterScanner scanner, bool resume) { | |
343 if (fColumn is UNDEFINED) | |
344 return doEvaluate(scanner, resume); | |
345 | |
346 int c= scanner.read(); | |
347 scanner.unread(); | |
348 if (c is fStartSequence[0]) | |
349 return (fColumn is scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED); | |
350 return Token.UNDEFINED; | |
351 } | |
352 | |
353 /* | |
354 * @see IPredicateRule#getSuccessToken() | |
355 * @since 2.0 | |
356 */ | |
357 public IToken getSuccessToken() { | |
358 return fToken; | |
359 } | |
360 } |