129
|
1 /*******************************************************************************
|
|
2 * Copyright (c) 2000, 2007 IBM Corporation and others.
|
|
3 * All rights reserved. This program and the accompanying materials
|
|
4 * are made available under the terms of the Eclipse Public License v1.0
|
|
5 * which accompanies this distribution, and is available at
|
|
6 * http://www.eclipse.org/legal/epl-v10.html
|
|
7 *
|
|
8 * Contributors:
|
|
9 * IBM Corporation - initial API and implementation
|
|
10 * Christopher Lenz (cmlenz@gmx.de) - support for line continuation
|
|
11 * Port to the D programming language:
|
|
12 * Frank Benoit <benoit@tionex.de>
|
|
13 *******************************************************************************/
|
|
14
|
131
|
15
|
|
16 import dwtx.jface.text.rules.FastPartitioner; // packageimport
|
|
17 import dwtx.jface.text.rules.ITokenScanner; // packageimport
|
|
18 import dwtx.jface.text.rules.Token; // packageimport
|
|
19 import dwtx.jface.text.rules.RuleBasedScanner; // packageimport
|
|
20 import dwtx.jface.text.rules.EndOfLineRule; // packageimport
|
|
21 import dwtx.jface.text.rules.WordRule; // packageimport
|
|
22 import dwtx.jface.text.rules.WhitespaceRule; // packageimport
|
|
23 import dwtx.jface.text.rules.WordPatternRule; // packageimport
|
|
24 import dwtx.jface.text.rules.IPredicateRule; // packageimport
|
|
25 import dwtx.jface.text.rules.DefaultPartitioner; // packageimport
|
|
26 import dwtx.jface.text.rules.NumberRule; // packageimport
|
|
27 import dwtx.jface.text.rules.SingleLineRule; // packageimport
|
|
28 import dwtx.jface.text.rules.IWordDetector; // packageimport
|
|
29 import dwtx.jface.text.rules.RuleBasedDamagerRepairer; // packageimport
|
|
30 import dwtx.jface.text.rules.ICharacterScanner; // packageimport
|
|
31 import dwtx.jface.text.rules.IRule; // packageimport
|
|
32 import dwtx.jface.text.rules.DefaultDamagerRepairer; // packageimport
|
|
33 import dwtx.jface.text.rules.IToken; // packageimport
|
|
34 import dwtx.jface.text.rules.IPartitionTokenScanner; // packageimport
|
|
35 import dwtx.jface.text.rules.MultiLineRule; // packageimport
|
|
36 import dwtx.jface.text.rules.RuleBasedPartitioner; // packageimport
|
|
37 import dwtx.jface.text.rules.RuleBasedPartitionScanner; // packageimport
|
|
38 import dwtx.jface.text.rules.BufferedRuleBasedScanner; // packageimport
|
|
39 import dwtx.jface.text.rules.IWhitespaceDetector; // packageimport
|
|
40
|
129
|
41 module dwtx.jface.text.rules.PatternRule;
|
|
42
|
|
43 import dwt.dwthelper.utils;
|
|
44
|
|
45 import java.util.Arrays;
|
|
46 import java.util.Comparator;
|
|
47
|
|
48 import dwtx.core.runtime.Assert;
|
|
49
|
|
50
|
|
51
|
|
52
|
|
53
|
|
54 /**
|
|
55 * Standard implementation of <code>IPredicateRule</code>.
|
|
56 * Is is capable of detecting a pattern which begins with a given start
|
|
57 * sequence and ends with a given end sequence. If the end sequence is
|
|
58 * not specified, it can be either end of line, end or file, or both. Additionally,
|
|
59 * the pattern can be constrained to begin in a certain column. The rule can also
|
|
60 * be used to check whether the text to scan covers half of the pattern, i.e. contains
|
|
61 * the end sequence required by the rule.
|
|
62 */
|
|
63 public class PatternRule : IPredicateRule {
|
|
64
|
|
65 /**
|
|
66 * Comparator that orders <code>char[]</code> in decreasing array lengths.
|
|
67 *
|
|
68 * @since 3.1
|
|
69 */
|
|
70 private static class DecreasingCharArrayLengthComparator : Comparator {
|
|
71 public int compare(Object o1, Object o2) {
|
|
72 return ((char[]) o2).length - ((char[]) o1).length;
|
|
73 }
|
|
74 }
|
|
75
|
|
76 /** Internal setting for the un-initialized column constraint */
|
|
77 protected static final int UNDEFINED= -1;
|
|
78
|
|
79 /** The token to be returned on success */
|
|
80 protected IToken fToken;
|
|
81 /** The pattern's start sequence */
|
|
82 protected char[] fStartSequence;
|
|
83 /** The pattern's end sequence */
|
|
84 protected char[] fEndSequence;
|
|
85 /** The pattern's column constrain */
|
|
86 protected int fColumn= UNDEFINED;
|
|
87 /** The pattern's escape character */
|
|
88 protected char fEscapeCharacter;
|
|
89 /**
|
|
90 * Indicates whether the escape character continues a line
|
|
91 * @since 3.0
|
|
92 */
|
|
93 protected bool fEscapeContinuesLine;
|
|
94 /** Indicates whether end of line terminates the pattern */
|
|
95 protected bool fBreaksOnEOL;
|
|
96 /** Indicates whether end of file terminates the pattern */
|
|
97 protected bool fBreaksOnEOF;
|
|
98
|
|
99 /**
|
|
100 * Line delimiter comparator which orders according to decreasing delimiter length.
|
|
101 * @since 3.1
|
|
102 */
|
|
103 private Comparator fLineDelimiterComparator= new DecreasingCharArrayLengthComparator();
|
|
104 /**
|
|
105 * Cached line delimiters.
|
|
106 * @since 3.1
|
|
107 */
|
|
108 private char[][] fLineDelimiters;
|
|
109 /**
|
|
110 * Cached sorted {@linkplain #fLineDelimiters}.
|
|
111 * @since 3.1
|
|
112 */
|
|
113 private char[][] fSortedLineDelimiters;
|
|
114
|
|
115 /**
|
|
116 * Creates a rule for the given starting and ending sequence.
|
|
117 * When these sequences are detected the rule will return the specified token.
|
|
118 * Alternatively, the sequence can also be ended by the end of the line.
|
|
119 * Any character which follows the given escapeCharacter will be ignored.
|
|
120 *
|
|
121 * @param startSequence the pattern's start sequence
|
|
122 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
|
|
123 * @param token the token which will be returned on success
|
|
124 * @param escapeCharacter any character following this one will be ignored
|
|
125 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
|
|
126 */
|
|
127 public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL) {
|
|
128 Assert.isTrue(startSequence !is null && startSequence.length() > 0);
|
|
129 Assert.isTrue(endSequence !is null || breaksOnEOL);
|
|
130 Assert.isNotNull(token);
|
|
131
|
|
132 fStartSequence= startSequence.toCharArray();
|
|
133 fEndSequence= (endSequence is null ? new char[0] : endSequence.toCharArray());
|
|
134 fToken= token;
|
|
135 fEscapeCharacter= escapeCharacter;
|
|
136 fBreaksOnEOL= breaksOnEOL;
|
|
137 }
|
|
138
|
|
139 /**
|
|
140 * Creates a rule for the given starting and ending sequence.
|
|
141 * When these sequences are detected the rule will return the specified token.
|
|
142 * Alternatively, the sequence can also be ended by the end of the line or the end of the file.
|
|
143 * Any character which follows the given escapeCharacter will be ignored.
|
|
144 *
|
|
145 * @param startSequence the pattern's start sequence
|
|
146 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
|
|
147 * @param token the token which will be returned on success
|
|
148 * @param escapeCharacter any character following this one will be ignored
|
|
149 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
|
|
150 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern
|
|
151 * @since 2.1
|
|
152 */
|
|
153 public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL, bool breaksOnEOF) {
|
|
154 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL);
|
|
155 fBreaksOnEOF= breaksOnEOF;
|
|
156 }
|
|
157
|
|
158 /**
|
|
159 * Creates a rule for the given starting and ending sequence.
|
|
160 * When these sequences are detected the rule will return the specified token.
|
|
161 * Alternatively, the sequence can also be ended by the end of the line or the end of the file.
|
|
162 * Any character which follows the given escapeCharacter will be ignored. An end of line
|
|
163 * immediately after the given <code>lineContinuationCharacter</code> will not cause the
|
|
164 * pattern to terminate even if <code>breakOnEOL</code> is set to true.
|
|
165 *
|
|
166 * @param startSequence the pattern's start sequence
|
|
167 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
|
|
168 * @param token the token which will be returned on success
|
|
169 * @param escapeCharacter any character following this one will be ignored
|
|
170 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
|
|
171 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern
|
|
172 * @param escapeContinuesLine indicates whether the specified escape character is used for line
|
|
173 * continuation, so that an end of line immediately after the escape character does not
|
|
174 * terminate the pattern, even if <code>breakOnEOL</code> is set
|
|
175 * @since 3.0
|
|
176 */
|
|
177 public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, bool breaksOnEOL, bool breaksOnEOF, bool escapeContinuesLine) {
|
|
178 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF);
|
|
179 fEscapeContinuesLine= escapeContinuesLine;
|
|
180 }
|
|
181
|
|
182 /**
|
|
183 * Sets a column constraint for this rule. If set, the rule's token
|
|
184 * will only be returned if the pattern is detected starting at the
|
|
185 * specified column. If the column is smaller then 0, the column
|
|
186 * constraint is considered removed.
|
|
187 *
|
|
188 * @param column the column in which the pattern starts
|
|
189 */
|
|
190 public void setColumnConstraint(int column) {
|
|
191 if (column < 0)
|
|
192 column= UNDEFINED;
|
|
193 fColumn= column;
|
|
194 }
|
|
195
|
|
196
|
|
197 /**
|
|
198 * Evaluates this rules without considering any column constraints.
|
|
199 *
|
|
200 * @param scanner the character scanner to be used
|
|
201 * @return the token resulting from this evaluation
|
|
202 */
|
|
203 protected IToken doEvaluate(ICharacterScanner scanner) {
|
|
204 return doEvaluate(scanner, false);
|
|
205 }
|
|
206
|
|
207 /**
|
|
208 * Evaluates this rules without considering any column constraints. Resumes
|
|
209 * detection, i.e. look sonly for the end sequence required by this rule if the
|
|
210 * <code>resume</code> flag is set.
|
|
211 *
|
|
212 * @param scanner the character scanner to be used
|
|
213 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
|
|
214 * @return the token resulting from this evaluation
|
|
215 * @since 2.0
|
|
216 */
|
|
217 protected IToken doEvaluate(ICharacterScanner scanner, bool resume) {
|
|
218
|
|
219 if (resume) {
|
|
220
|
|
221 if (endSequenceDetected(scanner))
|
|
222 return fToken;
|
|
223
|
|
224 } else {
|
|
225
|
|
226 int c= scanner.read();
|
|
227 if (c is fStartSequence[0]) {
|
|
228 if (sequenceDetected(scanner, fStartSequence, false)) {
|
|
229 if (endSequenceDetected(scanner))
|
|
230 return fToken;
|
|
231 }
|
|
232 }
|
|
233 }
|
|
234
|
|
235 scanner.unread();
|
|
236 return Token.UNDEFINED;
|
|
237 }
|
|
238
|
|
239 /*
|
|
240 * @see IRule#evaluate(ICharacterScanner)
|
|
241 */
|
|
242 public IToken evaluate(ICharacterScanner scanner) {
|
|
243 return evaluate(scanner, false);
|
|
244 }
|
|
245
|
|
246 /**
|
|
247 * Returns whether the end sequence was detected. As the pattern can be considered
|
|
248 * ended by a line delimiter, the result of this method is <code>true</code> if the
|
|
249 * rule breaks on the end of the line, or if the EOF character is read.
|
|
250 *
|
|
251 * @param scanner the character scanner to be used
|
|
252 * @return <code>true</code> if the end sequence has been detected
|
|
253 */
|
|
254 protected bool endSequenceDetected(ICharacterScanner scanner) {
|
|
255
|
|
256 char[][] originalDelimiters= scanner.getLegalLineDelimiters();
|
|
257 int count= originalDelimiters.length;
|
|
258 if (fLineDelimiters is null || originalDelimiters.length !is count) {
|
|
259 fSortedLineDelimiters= new char[count][];
|
|
260 } else {
|
|
261 while (count > 0 && fLineDelimiters[count-1] is originalDelimiters[count-1])
|
|
262 count--;
|
|
263 }
|
|
264 if (count !is 0) {
|
|
265 fLineDelimiters= originalDelimiters;
|
|
266 System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length);
|
|
267 Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator);
|
|
268 }
|
|
269
|
|
270 int readCount= 1;
|
|
271 int c;
|
|
272 while ((c= scanner.read()) !is ICharacterScanner.EOF) {
|
|
273 if (c is fEscapeCharacter) {
|
|
274 // Skip escaped character(s)
|
|
275 if (fEscapeContinuesLine) {
|
|
276 c= scanner.read();
|
|
277 for (int i= 0; i < fSortedLineDelimiters.length; i++) {
|
|
278 if (c is fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], true))
|
|
279 break;
|
|
280 }
|
|
281 } else
|
|
282 scanner.read();
|
|
283
|
|
284 } else if (fEndSequence.length > 0 && c is fEndSequence[0]) {
|
|
285 // Check if the specified end sequence has been found.
|
|
286 if (sequenceDetected(scanner, fEndSequence, true))
|
|
287 return true;
|
|
288 } else if (fBreaksOnEOL) {
|
|
289 // Check for end of line since it can be used to terminate the pattern.
|
|
290 for (int i= 0; i < fSortedLineDelimiters.length; i++) {
|
|
291 if (c is fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], true))
|
|
292 return true;
|
|
293 }
|
|
294 }
|
|
295 readCount++;
|
|
296 }
|
|
297
|
|
298 if (fBreaksOnEOF)
|
|
299 return true;
|
|
300
|
|
301 for (; readCount > 0; readCount--)
|
|
302 scanner.unread();
|
|
303
|
|
304 return false;
|
|
305 }
|
|
306
|
|
307 /**
|
|
308 * Returns whether the next characters to be read by the character scanner
|
|
309 * are an exact match with the given sequence. No escape characters are allowed
|
|
310 * within the sequence. If specified the sequence is considered to be found
|
|
311 * when reading the EOF character.
|
|
312 *
|
|
313 * @param scanner the character scanner to be used
|
|
314 * @param sequence the sequence to be detected
|
|
315 * @param eofAllowed indicated whether EOF terminates the pattern
|
|
316 * @return <code>true</code> if the given sequence has been detected
|
|
317 */
|
|
318 protected bool sequenceDetected(ICharacterScanner scanner, char[] sequence, bool eofAllowed) {
|
|
319 for (int i= 1; i < sequence.length; i++) {
|
|
320 int c= scanner.read();
|
|
321 if (c is ICharacterScanner.EOF && eofAllowed) {
|
|
322 return true;
|
|
323 } else if (c !is sequence[i]) {
|
|
324 // Non-matching character detected, rewind the scanner back to the start.
|
|
325 // Do not unread the first character.
|
|
326 scanner.unread();
|
|
327 for (int j= i-1; j > 0; j--)
|
|
328 scanner.unread();
|
|
329 return false;
|
|
330 }
|
|
331 }
|
|
332
|
|
333 return true;
|
|
334 }
|
|
335
|
|
336 /*
|
|
337 * @see IPredicateRule#evaluate(ICharacterScanner, bool)
|
|
338 * @since 2.0
|
|
339 */
|
|
340 public IToken evaluate(ICharacterScanner scanner, bool resume) {
|
|
341 if (fColumn is UNDEFINED)
|
|
342 return doEvaluate(scanner, resume);
|
|
343
|
|
344 int c= scanner.read();
|
|
345 scanner.unread();
|
|
346 if (c is fStartSequence[0])
|
|
347 return (fColumn is scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
|
|
348 return Token.UNDEFINED;
|
|
349 }
|
|
350
|
|
351 /*
|
|
352 * @see IPredicateRule#getSuccessToken()
|
|
353 * @since 2.0
|
|
354 */
|
|
355 public IToken getSuccessToken() {
|
|
356 return fToken;
|
|
357 }
|
|
358 }
|