Mercurial > projects > dwt-addons
annotate dwtx/jface/text/rules/PatternRule.d @ 151:eb21d3dfc767
fix module statement
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Sun, 24 Aug 2008 23:55:45 +0200 |
parents | 7d818bd32d63 |
children | f70d9508c95c |
rev | line source |
---|---|
129 | 1 /******************************************************************************* |
2 * Copyright (c) 2000, 2007 IBM Corporation and others. | |
3 * All rights reserved. This program and the accompanying materials | |
4 * are made available under the terms of the Eclipse Public License v1.0 | |
5 * which accompanies this distribution, and is available at | |
6 * http://www.eclipse.org/legal/epl-v10.html | |
7 * | |
8 * Contributors: | |
9 * IBM Corporation - initial API and implementation | |
10 * Christopher Lenz (cmlenz@gmx.de) - support for line continuation | |
11 * Port to the D programming language: | |
12 * Frank Benoit <benoit@tionex.de> | |
13 *******************************************************************************/ | |
14 | |
131 | 15 |
151 | 16 module dwtx.jface.text.rules.PatternRule; |
17 | |
131 | 18 import dwtx.jface.text.rules.FastPartitioner; // packageimport |
19 import dwtx.jface.text.rules.ITokenScanner; // packageimport | |
20 import dwtx.jface.text.rules.Token; // packageimport | |
21 import dwtx.jface.text.rules.RuleBasedScanner; // packageimport | |
22 import dwtx.jface.text.rules.EndOfLineRule; // packageimport | |
23 import dwtx.jface.text.rules.WordRule; // packageimport | |
24 import dwtx.jface.text.rules.WhitespaceRule; // packageimport | |
25 import dwtx.jface.text.rules.WordPatternRule; // packageimport | |
26 import dwtx.jface.text.rules.IPredicateRule; // packageimport | |
27 import dwtx.jface.text.rules.DefaultPartitioner; // packageimport | |
28 import dwtx.jface.text.rules.NumberRule; // packageimport | |
29 import dwtx.jface.text.rules.SingleLineRule; // packageimport | |
30 import dwtx.jface.text.rules.IWordDetector; // packageimport | |
31 import dwtx.jface.text.rules.RuleBasedDamagerRepairer; // packageimport | |
32 import dwtx.jface.text.rules.ICharacterScanner; // packageimport | |
33 import dwtx.jface.text.rules.IRule; // packageimport | |
34 import dwtx.jface.text.rules.DefaultDamagerRepairer; // packageimport | |
35 import dwtx.jface.text.rules.IToken; // packageimport | |
36 import dwtx.jface.text.rules.IPartitionTokenScanner; // packageimport | |
37 import dwtx.jface.text.rules.MultiLineRule; // packageimport | |
38 import dwtx.jface.text.rules.RuleBasedPartitioner; // packageimport | |
39 import dwtx.jface.text.rules.RuleBasedPartitionScanner; // packageimport | |
40 import dwtx.jface.text.rules.BufferedRuleBasedScanner; // packageimport | |
41 import dwtx.jface.text.rules.IWhitespaceDetector; // packageimport | |
42 | |
129 | 43 import dwt.dwthelper.utils; |
44 | |
45 import java.util.Arrays; | |
46 import java.util.Comparator; | |
47 | |
48 import dwtx.core.runtime.Assert; | |
49 | |
50 | |
51 | |
52 | |
53 | |
54 /** | |
55 * Standard implementation of <code>IPredicateRule</code>. | |
56 * Is is capable of detecting a pattern which begins with a given start | |
57 * sequence and ends with a given end sequence. If the end sequence is | |
58 * not specified, it can be either end of line, end or file, or both. Additionally, | |
59 * the pattern can be constrained to begin in a certain column. The rule can also | |
60 * be used to check whether the text to scan covers half of the pattern, i.e. contains | |
61 * the end sequence required by the rule. | |
62 */ | |
63 public class PatternRule : IPredicateRule { | |
64 | |
65 /** | |
66 * Comparator that orders <code>char[]</code> in decreasing array lengths. | |
67 * | |
68 * @since 3.1 | |
69 */ | |
70 private static class DecreasingCharArrayLengthComparator : Comparator { | |
71 public int compare(Object o1, Object o2) { | |
72 return ((char[]) o2).length - ((char[]) o1).length; | |
73 } | |
74 } | |
75 | |
76 /** Internal setting for the un-initialized column constraint */ | |
77 protected static final int UNDEFINED= -1; | |
78 | |
79 /** The token to be returned on success */ | |
80 protected IToken fToken; | |
81 /** The pattern's start sequence */ | |
82 protected char[] fStartSequence; | |
83 /** The pattern's end sequence */ | |
84 protected char[] fEndSequence; | |
85 /** The pattern's column constrain */ | |
86 protected int fColumn= UNDEFINED; | |
87 /** The pattern's escape character */ | |
88 protected char fEscapeCharacter; | |
89 /** | |
90 * Indicates whether the escape character continues a line | |
91 * @since 3.0 | |
92 */ | |
93 protected bool fEscapeContinuesLine; | |
94 /** Indicates whether end of line terminates the pattern */ | |
95 protected bool fBreaksOnEOL; | |
96 /** Indicates whether end of file terminates the pattern */ | |
97 protected bool fBreaksOnEOF; | |
98 | |
99 /** | |
100 * Line delimiter comparator which orders according to decreasing delimiter length. | |
101 * @since 3.1 | |
102 */ | |
103 private Comparator fLineDelimiterComparator= new DecreasingCharArrayLengthComparator(); | |
104 /** | |
105 * Cached line delimiters. | |
106 * @since 3.1 | |
107 */ | |
108 private char[][] fLineDelimiters; | |
109 /** | |
110 * Cached sorted {@linkplain #fLineDelimiters}. | |
111 * @since 3.1 | |
112 */ | |
113 private char[][] fSortedLineDelimiters; | |
114 | |
115 /** | |
116 * Creates a rule for the given starting and ending sequence. | |
117 * When these sequences are detected the rule will return the specified token. | |
118 * Alternatively, the sequence can also be ended by the end of the line. | |
119 * Any character which follows the given escapeCharacter will be ignored. | |
120 * | |
121 * @param startSequence the pattern's start sequence | |
122 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value | |
123 * @param token the token which will be returned on success | |
124 * @param escapeCharacter any character following this one will be ignored | |
125 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern | |
126 */ | |
133
7d818bd32d63
Fix ctors to this with gvim regexp
Frank Benoit <benoit@tionex.de>
parents:
131
diff
changeset
|
127 public this(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL) { |
129 | 128 Assert.isTrue(startSequence !is null && startSequence.length() > 0); |
129 Assert.isTrue(endSequence !is null || breaksOnEOL); | |
130 Assert.isNotNull(token); | |
131 | |
132 fStartSequence= startSequence.toCharArray(); | |
133 fEndSequence= (endSequence is null ? new char[0] : endSequence.toCharArray()); | |
134 fToken= token; | |
135 fEscapeCharacter= escapeCharacter; | |
136 fBreaksOnEOL= breaksOnEOL; | |
137 } | |
138 | |
139 /** | |
140 * Creates a rule for the given starting and ending sequence. | |
141 * When these sequences are detected the rule will return the specified token. | |
142 * Alternatively, the sequence can also be ended by the end of the line or the end of the file. | |
143 * Any character which follows the given escapeCharacter will be ignored. | |
144 * | |
145 * @param startSequence the pattern's start sequence | |
146 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value | |
147 * @param token the token which will be returned on success | |
148 * @param escapeCharacter any character following this one will be ignored | |
149 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern | |
150 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern | |
151 * @since 2.1 | |
152 */ | |
133
7d818bd32d63
Fix ctors to this with gvim regexp
Frank Benoit <benoit@tionex.de>
parents:
131
diff
changeset
|
153 public this(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL, bool breaksOnEOF) { |
129 | 154 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL); |
155 fBreaksOnEOF= breaksOnEOF; | |
156 } | |
157 | |
158 /** | |
159 * Creates a rule for the given starting and ending sequence. | |
160 * When these sequences are detected the rule will return the specified token. | |
161 * Alternatively, the sequence can also be ended by the end of the line or the end of the file. | |
162 * Any character which follows the given escapeCharacter will be ignored. An end of line | |
163 * immediately after the given <code>lineContinuationCharacter</code> will not cause the | |
164 * pattern to terminate even if <code>breakOnEOL</code> is set to true. | |
165 * | |
166 * @param startSequence the pattern's start sequence | |
167 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value | |
168 * @param token the token which will be returned on success | |
169 * @param escapeCharacter any character following this one will be ignored | |
170 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern | |
171 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern | |
172 * @param escapeContinuesLine indicates whether the specified escape character is used for line | |
173 * continuation, so that an end of line immediately after the escape character does not | |
174 * terminate the pattern, even if <code>breakOnEOL</code> is set | |
175 * @since 3.0 | |
176 */ | |
133
7d818bd32d63
Fix ctors to this with gvim regexp
Frank Benoit <benoit@tionex.de>
parents:
131
diff
changeset
|
177 public this(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL, bool breaksOnEOF, bool escapeContinuesLine) { |
129 | 178 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF); |
179 fEscapeContinuesLine= escapeContinuesLine; | |
180 } | |
181 | |
182 /** | |
183 * Sets a column constraint for this rule. If set, the rule's token | |
184 * will only be returned if the pattern is detected starting at the | |
185 * specified column. If the column is smaller then 0, the column | |
186 * constraint is considered removed. | |
187 * | |
188 * @param column the column in which the pattern starts | |
189 */ | |
190 public void setColumnConstraint(int column) { | |
191 if (column < 0) | |
192 column= UNDEFINED; | |
193 fColumn= column; | |
194 } | |
195 | |
196 | |
197 /** | |
198 * Evaluates this rules without considering any column constraints. | |
199 * | |
200 * @param scanner the character scanner to be used | |
201 * @return the token resulting from this evaluation | |
202 */ | |
203 protected IToken doEvaluate(ICharacterScanner scanner) { | |
204 return doEvaluate(scanner, false); | |
205 } | |
206 | |
207 /** | |
208 * Evaluates this rules without considering any column constraints. Resumes | |
209 * detection, i.e. look sonly for the end sequence required by this rule if the | |
210 * <code>resume</code> flag is set. | |
211 * | |
212 * @param scanner the character scanner to be used | |
213 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise | |
214 * @return the token resulting from this evaluation | |
215 * @since 2.0 | |
216 */ | |
217 protected IToken doEvaluate(ICharacterScanner scanner, bool resume) { | |
218 | |
219 if (resume) { | |
220 | |
221 if (endSequenceDetected(scanner)) | |
222 return fToken; | |
223 | |
224 } else { | |
225 | |
226 int c= scanner.read(); | |
227 if (c is fStartSequence[0]) { | |
228 if (sequenceDetected(scanner, fStartSequence, false)) { | |
229 if (endSequenceDetected(scanner)) | |
230 return fToken; | |
231 } | |
232 } | |
233 } | |
234 | |
235 scanner.unread(); | |
236 return Token.UNDEFINED; | |
237 } | |
238 | |
239 /* | |
240 * @see IRule#evaluate(ICharacterScanner) | |
241 */ | |
242 public IToken evaluate(ICharacterScanner scanner) { | |
243 return evaluate(scanner, false); | |
244 } | |
245 | |
246 /** | |
247 * Returns whether the end sequence was detected. As the pattern can be considered | |
248 * ended by a line delimiter, the result of this method is <code>true</code> if the | |
249 * rule breaks on the end of the line, or if the EOF character is read. | |
250 * | |
251 * @param scanner the character scanner to be used | |
252 * @return <code>true</code> if the end sequence has been detected | |
253 */ | |
254 protected bool endSequenceDetected(ICharacterScanner scanner) { | |
255 | |
256 char[][] originalDelimiters= scanner.getLegalLineDelimiters(); | |
257 int count= originalDelimiters.length; | |
258 if (fLineDelimiters is null || originalDelimiters.length !is count) { | |
259 fSortedLineDelimiters= new char[count][]; | |
260 } else { | |
261 while (count > 0 && fLineDelimiters[count-1] is originalDelimiters[count-1]) | |
262 count--; | |
263 } | |
264 if (count !is 0) { | |
265 fLineDelimiters= originalDelimiters; | |
266 System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length); | |
267 Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator); | |
268 } | |
269 | |
270 int readCount= 1; | |
271 int c; | |
272 while ((c= scanner.read()) !is ICharacterScanner.EOF) { | |
273 if (c is fEscapeCharacter) { | |
274 // Skip escaped character(s) | |
275 if (fEscapeContinuesLine) { | |
276 c= scanner.read(); | |
277 for (int i= 0; i < fSortedLineDelimiters.length; i++) { | |
278 if (c is fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], true)) | |
279 break; | |
280 } | |
281 } else | |
282 scanner.read(); | |
283 | |
284 } else if (fEndSequence.length > 0 && c is fEndSequence[0]) { | |
285 // Check if the specified end sequence has been found. | |
286 if (sequenceDetected(scanner, fEndSequence, true)) | |
287 return true; | |
288 } else if (fBreaksOnEOL) { | |
289 // Check for end of line since it can be used to terminate the pattern. | |
290 for (int i= 0; i < fSortedLineDelimiters.length; i++) { | |
291 if (c is fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], true)) | |
292 return true; | |
293 } | |
294 } | |
295 readCount++; | |
296 } | |
297 | |
298 if (fBreaksOnEOF) | |
299 return true; | |
300 | |
301 for (; readCount > 0; readCount--) | |
302 scanner.unread(); | |
303 | |
304 return false; | |
305 } | |
306 | |
307 /** | |
308 * Returns whether the next characters to be read by the character scanner | |
309 * are an exact match with the given sequence. No escape characters are allowed | |
310 * within the sequence. If specified the sequence is considered to be found | |
311 * when reading the EOF character. | |
312 * | |
313 * @param scanner the character scanner to be used | |
314 * @param sequence the sequence to be detected | |
315 * @param eofAllowed indicated whether EOF terminates the pattern | |
316 * @return <code>true</code> if the given sequence has been detected | |
317 */ | |
318 protected bool sequenceDetected(ICharacterScanner scanner, char[] sequence, bool eofAllowed) { | |
319 for (int i= 1; i < sequence.length; i++) { | |
320 int c= scanner.read(); | |
321 if (c is ICharacterScanner.EOF && eofAllowed) { | |
322 return true; | |
323 } else if (c !is sequence[i]) { | |
324 // Non-matching character detected, rewind the scanner back to the start. | |
325 // Do not unread the first character. | |
326 scanner.unread(); | |
327 for (int j= i-1; j > 0; j--) | |
328 scanner.unread(); | |
329 return false; | |
330 } | |
331 } | |
332 | |
333 return true; | |
334 } | |
335 | |
336 /* | |
337 * @see IPredicateRule#evaluate(ICharacterScanner, bool) | |
338 * @since 2.0 | |
339 */ | |
340 public IToken evaluate(ICharacterScanner scanner, bool resume) { | |
341 if (fColumn is UNDEFINED) | |
342 return doEvaluate(scanner, resume); | |
343 | |
344 int c= scanner.read(); | |
345 scanner.unread(); | |
346 if (c is fStartSequence[0]) | |
347 return (fColumn is scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED); | |
348 return Token.UNDEFINED; | |
349 } | |
350 | |
351 /* | |
352 * @see IPredicateRule#getSuccessToken() | |
353 * @since 2.0 | |
354 */ | |
355 public IToken getSuccessToken() { | |
356 return fToken; | |
357 } | |
358 } |