Mercurial > projects > dwt-addons

diff dwtx/jface/text/FindReplaceDocumentAdapter.d @ 129:eb30df5ca28b
Added JFace Text sources
author: Frank Benoit <benoit@tionex.de>
date: Sat, 23 Aug 2008 19:10:48 +0200
children: c4fb132a086c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dwtx/jface/text/FindReplaceDocumentAdapter.d	Sat Aug 23 19:10:48 2008 +0200
@@ -0,0 +1,644 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2008 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     IBM Corporation - initial API and implementation
+ *     Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing - https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949
+ *     Cagatay Calli <ccalli@gmail.com> - [find/replace] define & fix behavior of retain caps with other escapes and text before \C - https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061
+ * Port to the D programming language:
+ *     Frank Benoit <benoit@tionex.de>
+ *******************************************************************************/
+module dwtx.jface.text.FindReplaceDocumentAdapter;
+
+import dwt.dwthelper.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import dwtx.core.runtime.Assert;
+
+
+/**
+ * Provides search and replace operations on
+ * {@link dwtx.jface.text.IDocument}.
+ * <p>
+ * Replaces
+ * {@link dwtx.jface.text.IDocument#search(int, String, bool, bool, bool)}.
+ *
+ * @since 3.0
+ */
+public class FindReplaceDocumentAdapter : CharSequence {
+
+    /**
+     * Internal type for operation codes.
+     */
+    private static class FindReplaceOperationCode {
+    }
+
+    // Find/replace operation codes.
+    private static final FindReplaceOperationCode FIND_FIRST= new FindReplaceOperationCode();
+    private static final FindReplaceOperationCode FIND_NEXT= new FindReplaceOperationCode();
+    private static final FindReplaceOperationCode REPLACE= new FindReplaceOperationCode();
+    private static final FindReplaceOperationCode REPLACE_FIND_NEXT= new FindReplaceOperationCode();
+
+    /**
+     * Retain case mode constants.
+     * @since 3.4
+     */
+    private static final int RC_MIXED= 0;
+    private static final int RC_UPPER= 1;
+    private static final int RC_LOWER= 2;
+    private static final int RC_FIRSTUPPER= 3;
+
+
+    /**
+     * The adapted document.
+     */
+    private IDocument fDocument;
+
+    /**
+     * State for findReplace.
+     */
+    private FindReplaceOperationCode fFindReplaceState= null;
+
+    /**
+     * The matcher used in findReplace.
+     */
+    private Matcher fFindReplaceMatcher;
+
+    /**
+     * The match offset from the last findReplace call.
+     */
+    private int fFindReplaceMatchOffset;
+    
+    /**
+     * Retain case mode
+     */
+    private int fRetainCaseMode;
+
+    /**
+     * Constructs a new find replace document adapter.
+     *
+     * @param document the adapted document
+     */
+    public FindReplaceDocumentAdapter(IDocument document) {
+        Assert.isNotNull(document);
+        fDocument= document;
+    }
+
+    /**
+     * Returns the location of a given string in this adapter's document based on a set of search criteria.
+     *
+     * @param startOffset document offset at which search starts
+     * @param findString the string to find
+     * @param forwardSearch the search direction
+     * @param caseSensitive indicates whether lower and upper case should be distinguished
+     * @param wholeWord indicates whether the findString should be limited by white spaces as
+     *          defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>.
+     * @param regExSearch if <code>true</code> findString represents a regular expression
+     *          Must not be used in combination with <code>wholeWord</code>.
+     * @return the find or replace region or <code>null</code> if there was no match
+     * @throws BadLocationException if startOffset is an invalid document offset
+     * @throws PatternSyntaxException if a regular expression has invalid syntax
+     */
+    public IRegion find(int startOffset, String findString, bool forwardSearch, bool caseSensitive, bool wholeWord, bool regExSearch) throws BadLocationException {
+        Assert.isTrue(!(regExSearch && wholeWord));
+
+        // Adjust offset to special meaning of -1
+        if (startOffset is -1 && forwardSearch)
+            startOffset= 0;
+        if (startOffset is -1 && !forwardSearch)
+            startOffset= length() - 1;
+
+        return findReplace(FIND_FIRST, startOffset, findString, null, forwardSearch, caseSensitive, wholeWord, regExSearch);
+    }
+
+    /**
+     * Stateful findReplace executes a FIND, REPLACE, REPLACE_FIND or FIND_FIRST operation.
+     * In case of REPLACE and REPLACE_FIND it sends a <code>DocumentEvent</code> to all
+     * registered <code>IDocumentListener</code>.
+     *
+     * @param startOffset document offset at which search starts
+     *          this value is only used in the FIND_FIRST operation and otherwise ignored
+     * @param findString the string to find
+     *          this value is only used in the FIND_FIRST operation and otherwise ignored
+     * @param replaceText the string to replace the current match
+     *          this value is only used in the REPLACE and REPLACE_FIND operations and otherwise ignored
+     * @param forwardSearch the search direction
+     * @param caseSensitive indicates whether lower and upper case should be distinguished
+     * @param wholeWord indicates whether the findString should be limited by white spaces as
+     *          defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>.
+     * @param regExSearch if <code>true</code> this operation represents a regular expression
+     *          Must not be used in combination with <code>wholeWord</code>.
+     * @param operationCode specifies what kind of operation is executed
+     * @return the find or replace region or <code>null</code> if there was no match
+     * @throws BadLocationException if startOffset is an invalid document offset
+     * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
+     * @throws PatternSyntaxException if a regular expression has invalid syntax
+     */
+    private IRegion findReplace(final FindReplaceOperationCode operationCode, int startOffset, String findString, String replaceText, bool forwardSearch, bool caseSensitive, bool wholeWord, bool regExSearch) throws BadLocationException {
+
+        // Validate option combinations
+        Assert.isTrue(!(regExSearch && wholeWord));
+
+        // Validate state
+        if ((operationCode is REPLACE || operationCode is REPLACE_FIND_NEXT) && (fFindReplaceState !is FIND_FIRST && fFindReplaceState !is FIND_NEXT))
+            throw new IllegalStateException("illegal findReplace state: cannot replace without preceding find"); //$NON-NLS-1$
+
+        if (operationCode is FIND_FIRST) {
+            // Reset
+
+            if (findString is null || findString.length() is 0)
+                return null;
+
+            // Validate start offset
+            if (startOffset < 0 || startOffset >= length())
+                throw new BadLocationException();
+
+            int patternFlags= 0;
+
+            if (regExSearch) {
+                patternFlags |= Pattern.MULTILINE;
+                findString= substituteLinebreak(findString);
+            }
+
+            if (!caseSensitive)
+                patternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
+
+            if (wholeWord)
+                findString= "\\b" + findString + "\\b"; //$NON-NLS-1$ //$NON-NLS-2$
+
+            if (!regExSearch && !wholeWord)
+                findString= asRegPattern(findString);
+
+            fFindReplaceMatchOffset= startOffset;
+            if (fFindReplaceMatcher !is null && fFindReplaceMatcher.pattern().pattern().equals(findString) && fFindReplaceMatcher.pattern().flags() is patternFlags) {
+                /*
+                 * Commented out for optimization:
+                 * The call is not needed since FIND_FIRST uses find(int) which resets the matcher
+                 */
+                // fFindReplaceMatcher.reset();
+            } else {
+                Pattern pattern= Pattern.compile(findString, patternFlags);
+                fFindReplaceMatcher= pattern.matcher(this);
+            }
+        }
+
+        // Set state
+        fFindReplaceState= operationCode;
+
+        if (operationCode is REPLACE || operationCode is REPLACE_FIND_NEXT) {
+            if (regExSearch) {
+                Pattern pattern= fFindReplaceMatcher.pattern();
+                String prevMatch= fFindReplaceMatcher.group();
+                try {
+                    replaceText= interpretReplaceEscapes(replaceText, prevMatch);
+                    Matcher replaceTextMatcher= pattern.matcher(prevMatch);
+                    replaceText= replaceTextMatcher.replaceFirst(replaceText);
+                } catch (IndexOutOfBoundsException ex) {
+                    throw new PatternSyntaxException(ex.getLocalizedMessage(), replaceText, -1);
+                }
+            }
+
+            int offset= fFindReplaceMatcher.start();
+            int length= fFindReplaceMatcher.group().length();
+
+            if (fDocument instanceof IRepairableDocumentExtension
+                    && ((IRepairableDocumentExtension)fDocument).isLineInformationRepairNeeded(offset, length, replaceText)) {
+                String message= TextMessages.getString("FindReplaceDocumentAdapter.incompatibleLineDelimiter"); //$NON-NLS-1$
+                throw new PatternSyntaxException(message, replaceText, offset);
+            }
+
+            fDocument.replace(offset, length, replaceText);
+
+            if (operationCode is REPLACE) {
+                return new Region(offset, replaceText.length());
+            }
+        }
+
+        if (operationCode !is REPLACE) {
+            if (forwardSearch) {
+
+                bool found= false;
+                if (operationCode is FIND_FIRST)
+                    found= fFindReplaceMatcher.find(startOffset);
+                else
+                    found= fFindReplaceMatcher.find();
+
+                if (operationCode is REPLACE_FIND_NEXT)
+                    fFindReplaceState= FIND_NEXT;
+
+                if (found && fFindReplaceMatcher.group().length() > 0)
+                    return new Region(fFindReplaceMatcher.start(), fFindReplaceMatcher.group().length());
+                return null;
+            }
+
+            // backward search
+            bool found= fFindReplaceMatcher.find(0);
+            int index= -1;
+            int length= -1;
+            while (found && fFindReplaceMatcher.start() + fFindReplaceMatcher.group().length() <= fFindReplaceMatchOffset + 1) {
+                index= fFindReplaceMatcher.start();
+                length= fFindReplaceMatcher.group().length();
+                found= fFindReplaceMatcher.find(index + 1);
+            }
+            fFindReplaceMatchOffset= index;
+            if (index > -1) {
+                // must set matcher to correct position
+                fFindReplaceMatcher.find(index);
+                return new Region(index, length);
+            }
+            return null;
+        }
+
+        return null;
+    }
+
+    /**
+     * Substitutes \R in a regex find pattern with (?>\r\n?|\n)
+     * 
+     * @param findString the original find pattern
+     * @return the transformed find pattern
+     * @throws PatternSyntaxException if \R is added at an illegal position (e.g. in a character set)
+     * @since 3.4
+     */
+    private String substituteLinebreak(String findString) throws PatternSyntaxException {
+        int length= findString.length();
+        StringBuffer buf= new StringBuffer(length);
+        
+        int inCharGroup= 0;
+        int inBraces= 0;
+        bool inQuote= false;
+        for (int i= 0; i < length; i++) {
+            char ch= findString.charAt(i);
+            switch (ch) {
+                case '[':
+                    buf.append(ch);
+                    if (! inQuote)
+                        inCharGroup++;
+                    break;
+                    
+                case ']':
+                    buf.append(ch);
+                    if (! inQuote)
+                        inCharGroup--;
+                    break;
+                    
+                case '{':
+                    buf.append(ch);
+                    if (! inQuote && inCharGroup is 0)
+                        inBraces++;
+                    break;
+                    
+                case '}':
+                    buf.append(ch);
+                    if (! inQuote && inCharGroup is 0)
+                        inBraces--;
+                    break;
+                    
+                case '\\':
+                    if (i + 1 < length) {
+                        char ch1= findString.charAt(i + 1);
+                        if (inQuote) {
+                            if (ch1 is 'E')
+                                inQuote= false;
+                            buf.append(ch).append(ch1);
+                            i++;
+                            
+                        } else if (ch1 is 'R') {
+                            if (inCharGroup > 0 || inBraces > 0) {
+                                String msg= TextMessages.getString("FindReplaceDocumentAdapter.illegalLinebreak"); //$NON-NLS-1$
+                                throw new PatternSyntaxException(msg, findString, i);
+                            }
+                            buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$
+                            i++;
+                        
+                        } else {
+                            if (ch1 is 'Q') {
+                                inQuote= true;
+                            }
+                            buf.append(ch).append(ch1);
+                            i++;
+                        }
+                    } else {
+                        buf.append(ch);
+                    }
+                    break;
+                    
+                default:
+                    buf.append(ch);
+                    break;
+            }
+            
+        }
+        return buf.toString();
+    }
+    
+    /**
+     * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed)
+     * and appends the character <code>ch</code> to <code>buf</code> after processing.
+     * 
+     * @param buf the output buffer
+     * @param ch the character to process
+     * @since 3.4
+     */
+    private void interpretRetainCase(StringBuffer buf, char ch) {
+        if (fRetainCaseMode is RC_UPPER)
+            buf.append(Character.toUpperCase(ch));
+        else if (fRetainCaseMode is RC_LOWER)
+            buf.append(Character.toLowerCase(ch));
+        else if (fRetainCaseMode is RC_FIRSTUPPER) {
+            buf.append(Character.toUpperCase(ch));
+            fRetainCaseMode= RC_MIXED;
+        } else
+            buf.append(ch);
+    }
+
+    /**
+     * Interprets escaped characters in the given replace pattern.
+     * 
+     * @param replaceText the replace pattern
+     * @param foundText the found pattern to be replaced
+     * @return a replace pattern with escaped characters substituted by the respective characters
+     * @since 3.4
+     */
+    private String interpretReplaceEscapes(String replaceText, String foundText) {
+        int length= replaceText.length();
+        bool inEscape= false;
+        StringBuffer buf= new StringBuffer(length);
+        
+        /* every string we did not check looks mixed at first
+         * so initialize retain case mode with RC_MIXED
+         */
+        fRetainCaseMode= RC_MIXED;
+        
+        for (int i= 0; i < length; i++) {
+            final char ch= replaceText.charAt(i);
+            if (inEscape) {
+                i= interpretReplaceEscape(ch, i, buf, replaceText, foundText);
+                inEscape= false;
+                
+            } else if (ch is '\\') {
+                inEscape= true;
+                
+            } else if (ch is '$') {
+                buf.append(ch);
+
+                /*
+                 * Feature in java.util.regex.Matcher#replaceFirst(String):
+                 * $00, $000, etc. are interpreted as $0 and
+                 * $01, $001, etc. are interpreted as $1, etc. .
+                 * If we support \0 as replacement pattern for capturing group 0,
+                 * it would not be possible any more to write a replacement pattern
+                 * that appends 0 to a capturing group (like $0\0).
+                 * The fix is to interpret \00 and $00 as $0\0, and
+                 * \01 and $01 as $0\1, etc.
+                 */
+                if (i + 2 < length) {
+                    char ch1= replaceText.charAt(i + 1);
+                    char ch2= replaceText.charAt(i + 2);
+                    if (ch1 is '0' && '0' <= ch2 && ch2 <= '9') {
+                        buf.append("0\\"); //$NON-NLS-1$
+                        i++; // consume the 0
+                    }
+                }
+            } else {
+                interpretRetainCase(buf, ch);
+            }
+        }
+        
+        if (inEscape) {
+            // '\' as last character is invalid, but we still add it to get an error message
+            buf.append('\\');
+        }
+        return buf.toString();
+    }
+
+    /**
+     * Interprets the escaped character <code>ch</code> at offset <code>i</code>
+     * of the <code>replaceText</code> and appends the interpretation to <code>buf</code>.
+     * 
+     * @param ch the escaped character
+     * @param i the offset
+     * @param buf the output buffer
+     * @param replaceText the original replace pattern
+     * @param foundText the found pattern to be replaced
+     * @return the new offset
+     * @since 3.4
+     */
+    private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText, String foundText) {
+        int length= replaceText.length();
+        switch (ch) {
+            case 'r':
+                buf.append('\r');
+                break;
+            case 'n':
+                buf.append('\n');
+                break;
+            case 't':
+                buf.append('\t');
+                break;
+            case 'f':
+                buf.append('\f');
+                break;
+            case 'a':
+                buf.append('\u0007');
+                break;
+            case 'e':
+                buf.append('\u001B');
+                break;
+            case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries
+                buf.append(TextUtilities.getDefaultLineDelimiter(fDocument));
+                break;
+            /*
+             * \0 for octal is not supported in replace string, since it
+             * would conflict with capturing group \0, etc.
+             */
+            case '0':
+                buf.append('$').append(ch);
+                /*
+                 * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)"
+                 * in interpretReplaceEscape(String) above.
+                 */
+                if (i + 1 < length) {
+                    char ch1= replaceText.charAt(i + 1);
+                    if ('0' <= ch1 && ch1 <= '9') {
+                        buf.append('\\');
+                    }
+                }
+                break;
+                
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+                buf.append('$').append(ch);
+                break;
+
+            case 'c':
+                if (i + 1 < length) {
+                    char ch1= replaceText.charAt(i + 1);
+                    interpretRetainCase(buf, (char)(ch1 ^ 64));
+                    i++;
+                } else {
+                    String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalControlEscape", "\\c"); //$NON-NLS-1$ //$NON-NLS-2$
+                    throw new PatternSyntaxException(msg, replaceText, i);
+                }
+                break;
+                
+            case 'x':
+                if (i + 2 < length) {
+                    int parsedInt;
+                    try {
+                        parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 3), 16);
+                        if (parsedInt < 0)
+                            throw new NumberFormatException();
+                    } catch (NumberFormatException e) {
+                        String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, i + 3)); //$NON-NLS-1$
+                        throw new PatternSyntaxException(msg, replaceText, i);
+                    }
+                    interpretRetainCase(buf, (char) parsedInt);
+                    i+= 2;
+                } else {
+                    String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
+                    throw new PatternSyntaxException(msg, replaceText, i);
+                }
+                break;
+                
+            case 'u':
+                if (i + 4 < length) {
+                    int parsedInt;
+                    try {
+                        parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 5), 16);
+                        if (parsedInt < 0)
+                            throw new NumberFormatException();
+                    } catch (NumberFormatException e) {
+                        String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, i + 5)); //$NON-NLS-1$
+                        throw new PatternSyntaxException(msg, replaceText, i);
+                    }
+                    interpretRetainCase(buf, (char) parsedInt);
+                    i+= 4;
+                } else {
+                    String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
+                    throw new PatternSyntaxException(msg, replaceText, i);
+                }
+                break;
+                
+            case 'C':
+                if(foundText.toUpperCase().equals(foundText)) // is whole match upper-case?
+                    fRetainCaseMode= RC_UPPER;
+                else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case?
+                    fRetainCaseMode= RC_LOWER;
+                else if(Character.isUpperCase(foundText.charAt(0))) // is first character upper-case?
+                    fRetainCaseMode= RC_FIRSTUPPER;
+                else
+                    fRetainCaseMode= RC_MIXED;
+                break;
+
+            default:
+                // unknown escape k: append uninterpreted \k
+                buf.append('\\').append(ch);
+                break;
+        }
+        return i;
+    }
+
+    /**
+     * Converts a non-regex string to a pattern
+     * that can be used with the regex search engine.
+     *
+     * @param string the non-regex pattern
+     * @return the string converted to a regex pattern
+     */
+    private String asRegPattern(String string) {
+        StringBuffer out= new StringBuffer(string.length());
+        bool quoting= false;
+
+        for (int i= 0, length= string.length(); i < length; i++) {
+            char ch= string.charAt(i);
+            if (ch is '\\') {
+                if (quoting) {
+                    out.append("\\E"); //$NON-NLS-1$
+                    quoting= false;
+                }
+                out.append("\\\\"); //$NON-NLS-1$
+                continue;
+            }
+            if (!quoting) {
+                out.append("\\Q"); //$NON-NLS-1$
+                quoting= true;
+            }
+            out.append(ch);
+        }
+        if (quoting)
+            out.append("\\E"); //$NON-NLS-1$
+
+        return out.toString();
+    }
+
+    /**
+     * Substitutes the previous match with the given text.
+     * Sends a <code>DocumentEvent</code> to all registered <code>IDocumentListener</code>.
+     *
+     * @param text the substitution text
+     * @param regExReplace if <code>true</code> <code>text</code> represents a regular expression
+     * @return the replace region or <code>null</code> if there was no match
+     * @throws BadLocationException if startOffset is an invalid document offset
+     * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
+     * @throws PatternSyntaxException if a regular expression has invalid syntax
+     *
+     * @see DocumentEvent
+     * @see IDocumentListener
+     */
+    public IRegion replace(String text, bool regExReplace) throws BadLocationException {
+        return findReplace(REPLACE, -1, null, text, false, false, false, regExReplace);
+    }
+
+    // ---------- CharSequence implementation ----------
+
+    /*
+     * @see java.lang.CharSequence#length()
+     */
+    public int length() {
+        return fDocument.getLength();
+    }
+
+    /*
+     * @see java.lang.CharSequence#charAt(int)
+     */
+    public char charAt(int index) {
+        try {
+            return fDocument.getChar(index);
+        } catch (BadLocationException e) {
+            throw new IndexOutOfBoundsException();
+        }
+    }
+
+    /*
+     * @see java.lang.CharSequence#subSequence(int, int)
+     */
+    public CharSequence subSequence(int start, int end) {
+        try {
+            return fDocument.get(start, end - start);
+        } catch (BadLocationException e) {
+            throw new IndexOutOfBoundsException();
+        }
+    }
+
+    /*
+     * @see java.lang.Object#toString()
+     */
+    public String toString() {
+        return fDocument.get();
+    }
+}
author	Frank Benoit <benoit@tionex.de>
date	Sat, 23 Aug 2008 19:10:48 +0200
parents
children	c4fb132a086c