92
|
1 /*******************************************************************************
|
|
2
|
|
3 @file USet.d
|
|
4
|
|
5 Copyright (c) 2004 Kris Bell
|
|
6
|
|
7 This software is provided 'as-is', without any express or implied
|
|
8 warranty. In no event will the authors be held liable for damages
|
|
9 of any kind arising from the use of this software.
|
|
10
|
|
11 Permission is hereby granted to anyone to use this software for any
|
|
12 purpose, including commercial applications, and to alter it and/or
|
|
13 redistribute it freely, subject to the following restrictions:
|
|
14
|
|
15 1. The origin of this software must not be misrepresented; you must
|
|
16 not claim that you wrote the original software. If you use this
|
|
17 software in a product, an acknowledgment within documentation of
|
|
18 said product would be appreciated but is not required.
|
|
19
|
|
20 2. Altered source versions must be plainly marked as such, and must
|
|
21 not be misrepresented as being the original software.
|
|
22
|
|
23 3. This notice may not be removed or altered from any distribution
|
|
24 of the source.
|
|
25
|
|
26 4. Derivative works are permitted, but they must carry this notice
|
|
27 in full and credit the original source.
|
|
28
|
|
29
|
|
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
31
|
|
32
|
|
33 @version Initial version, November 2004
|
|
34 @author Kris
|
|
35
|
|
36 Note that this package and documentation is built around the ICU
|
|
37 project (http://oss.software.ibm.com/icu/). Below is the license
|
|
38 statement as specified by that software:
|
|
39
|
|
40
|
|
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
42
|
|
43
|
|
44 ICU License - ICU 1.8.1 and later
|
|
45
|
|
46 COPYRIGHT AND PERMISSION NOTICE
|
|
47
|
|
48 Copyright (c) 1995-2003 International Business Machines Corporation and
|
|
49 others.
|
|
50
|
|
51 All rights reserved.
|
|
52
|
|
53 Permission is hereby granted, free of charge, to any person obtaining a
|
|
54 copy of this software and associated documentation files (the
|
|
55 "Software"), to deal in the Software without restriction, including
|
|
56 without limitation the rights to use, copy, modify, merge, publish,
|
|
57 distribute, and/or sell copies of the Software, and to permit persons
|
|
58 to whom the Software is furnished to do so, provided that the above
|
|
59 copyright notice(s) and this permission notice appear in all copies of
|
|
60 the Software and that both the above copyright notice(s) and this
|
|
61 permission notice appear in supporting documentation.
|
|
62
|
|
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
64 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
|
66 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
67 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
|
|
68 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
|
|
69 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
70 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
|
71 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
72
|
|
73 Except as contained in this notice, the name of a copyright holder
|
|
74 shall not be used in advertising or otherwise to promote the sale, use
|
|
75 or other dealings in this Software without prior written authorization
|
|
76 of the copyright holder.
|
|
77
|
|
78 ----------------------------------------------------------------------
|
|
79
|
|
80 All trademarks and registered trademarks mentioned herein are the
|
|
81 property of their respective owners.
|
|
82
|
|
83 *******************************************************************************/
|
|
84
|
|
85 module dwtx.dwtxhelper.mangoicu.USet;
|
|
86
|
|
87 private import dwtx.dwtxhelper.mangoicu.ICU,
|
|
88 dwtx.dwtxhelper.mangoicu.UString;
|
|
89
|
|
90 /*******************************************************************************
|
|
91
|
|
92 A mutable set of Unicode characters and multicharacter strings.
|
|
93
|
|
94 Objects of this class represent character classes used in regular
|
|
95 expressions. A character specifies a subset of Unicode code points.
|
|
96 Legal code points are U+0000 to U+10FFFF, inclusive.
|
|
97
|
|
98 UnicodeSet supports two APIs. The first is the operand API that
|
|
99 allows the caller to modify the value of a UnicodeSet object. It
|
|
100 conforms to Java 2's java.util.Set interface, although UnicodeSet
|
|
101 does not actually implement that interface. All methods of Set are
|
|
102 supported, with the modification that they take a character range
|
|
103 or single character instead of an Object, and they take a UnicodeSet
|
|
104 instead of a Collection. The operand API may be thought of in terms
|
|
105 of boolean logic: a boolean OR is implemented by add, a boolean AND
|
|
106 is implemented by retain, a boolean XOR is implemented by complement
|
|
107 taking an argument, and a boolean NOT is implemented by complement
|
|
108 with no argument. In terms of traditional set theory function names,
|
|
109 add is a union, retain is an intersection, remove is an asymmetric
|
|
110 difference, and complement with no argument is a set complement with
|
|
111 respect to the superset range MIN_VALUE-MAX_VALUE
|
|
112
|
|
113 The second API is the applyPattern()/toPattern() API from the
|
|
114 java.text.Format-derived classes. Unlike the methods that add
|
|
115 characters, add categories, and control the logic of the set,
|
|
116 the method applyPattern() sets all attributes of a UnicodeSet
|
|
117 at once, based on a string pattern.
|
|
118
|
|
119 See <A HREF="http://oss.software.ibm.com/icu/apiref/uset_8h.html">
|
|
120 this page</A> for full details.
|
|
121
|
|
122 *******************************************************************************/
|
|
123
|
|
124 class USet : ICU
|
|
125 {
|
|
126 package Handle handle;
|
|
127
|
|
128 enum Options
|
|
129 {
|
|
130 None = 0,
|
|
131 IgnoreSpace = 1,
|
|
132 CaseInsensitive = 2,
|
|
133 }
|
|
134
|
|
135
|
|
136 /***********************************************************************
|
|
137
|
|
138 Creates a USet object that contains the range of characters
|
|
139 start..end, inclusive
|
|
140
|
|
141 ***********************************************************************/
|
|
142
|
|
143 this (wchar start, wchar end)
|
|
144 {
|
|
145 handle = uset_open (start, end);
|
|
146 }
|
|
147
|
|
148 /***********************************************************************
|
|
149
|
|
150 Creates a set from the given pattern. See the UnicodeSet
|
|
151 class description for the syntax of the pattern language
|
|
152
|
|
153 ***********************************************************************/
|
|
154
|
|
155 this (UStringView pattern, Options o = Options.None)
|
|
156 {
|
|
157 UErrorCode e;
|
|
158
|
|
159 handle = uset_openPatternOptions (pattern.get.ptr, pattern.len, o, e);
|
|
160 testError (e, "failed to open pattern-based charset");
|
|
161 }
|
|
162
|
|
163 /***********************************************************************
|
|
164
|
|
165 Internal constructor invoked via UCollator
|
|
166
|
|
167 ***********************************************************************/
|
|
168
|
|
169 package this (Handle handle)
|
|
170 {
|
|
171 this.handle = handle;
|
|
172 }
|
|
173
|
|
174 /***********************************************************************
|
|
175
|
|
176 Disposes of the storage used by a USet object
|
|
177
|
|
178 ***********************************************************************/
|
|
179
|
|
180 ~this ()
|
|
181 {
|
|
182 uset_close (handle);
|
|
183 }
|
|
184
|
|
185 /***********************************************************************
|
|
186
|
|
187 Modifies the set to represent the set specified by the
|
|
188 given pattern. See the UnicodeSet class description for
|
|
189 the syntax of the pattern language. See also the User
|
|
190 Guide chapter about UnicodeSet. Empties the set passed
|
|
191 before applying the pattern.
|
|
192
|
|
193 ***********************************************************************/
|
|
194
|
|
195 void applyPattern (UStringView pattern, Options o = Options.None)
|
|
196 {
|
|
197 UErrorCode e;
|
|
198
|
|
199 uset_applyPattern (handle, pattern.get.ptr, pattern.len, o, e);
|
|
200 testError (e, "failed to apply pattern");
|
|
201 }
|
|
202
|
|
203 /***********************************************************************
|
|
204
|
|
205 Returns a string representation of this set. If the result
|
|
206 of calling this function is passed to a uset_openPattern(),
|
|
207 it will produce another set that is equal to this one.
|
|
208
|
|
209 ***********************************************************************/
|
|
210
|
|
211 void toPattern (UString dst, bool escape)
|
|
212 {
|
|
213 uint fmt (wchar* p, uint len, inout UErrorCode e)
|
|
214 {
|
|
215 return uset_toPattern (handle, p, len, escape, e);
|
|
216 }
|
|
217
|
|
218 dst.format (&fmt, "failed to convert charset to a pattern");
|
|
219 }
|
|
220
|
|
221 /***********************************************************************
|
|
222
|
|
223 Adds the given character to the given USet. After this call,
|
|
224 contains (c) will return true.
|
|
225
|
|
226 ***********************************************************************/
|
|
227
|
|
228 void add (wchar c)
|
|
229 {
|
|
230 uset_add (handle, c);
|
|
231 }
|
|
232
|
|
233 /***********************************************************************
|
|
234
|
|
235 Adds all of the elements in the specified set to this set
|
|
236 if they're not already present. This operation effectively
|
|
237 modifies this set so that its value is the union of the two
|
|
238 sets. The behavior of this operation is unspecified if the
|
|
239 specified collection is modified while the operation is in
|
|
240 progress.
|
|
241
|
|
242 ***********************************************************************/
|
|
243
|
|
244 void addSet (USet other)
|
|
245 {
|
|
246 uset_addAll (handle, other.handle);
|
|
247 }
|
|
248
|
|
249 /***********************************************************************
|
|
250
|
|
251 Adds the given range of characters to the given USet. After
|
|
252 this call, contains(start, end) will return true
|
|
253
|
|
254 ***********************************************************************/
|
|
255
|
|
256 void addRange (wchar start, wchar end)
|
|
257 {
|
|
258 uset_addRange (handle, start, end);
|
|
259 }
|
|
260
|
|
261 /***********************************************************************
|
|
262
|
|
263 Adds the given string to the given USet. After this call,
|
|
264 containsString (str, strLen) will return true
|
|
265
|
|
266 ***********************************************************************/
|
|
267
|
|
268 void addString (UStringView t)
|
|
269 {
|
|
270 uset_addString (handle, t.get.ptr, t.len);
|
|
271 }
|
|
272
|
|
273 /***********************************************************************
|
|
274
|
|
275 Removes the given character from this USet. After the
|
|
276 call, contains(c) will return false
|
|
277
|
|
278 ***********************************************************************/
|
|
279
|
|
280 void remove (wchar c)
|
|
281 {
|
|
282 uset_remove (handle, c);
|
|
283 }
|
|
284
|
|
285 /***********************************************************************
|
|
286
|
|
287 Removes the given range of characters from this USet.
|
|
288 After the call, contains(start, end) will return false
|
|
289
|
|
290 ***********************************************************************/
|
|
291
|
|
292 void removeRange (wchar start, wchar end)
|
|
293 {
|
|
294 uset_removeRange (handle, start, end);
|
|
295 }
|
|
296
|
|
297 /***********************************************************************
|
|
298
|
|
299 Removes the given string from this USet. After the call,
|
|
300 containsString (str, strLen) will return false
|
|
301
|
|
302 ***********************************************************************/
|
|
303
|
|
304 void removeString (UStringView t)
|
|
305 {
|
|
306 uset_removeString (handle, t.get.ptr, t.len);
|
|
307 }
|
|
308
|
|
309 /***********************************************************************
|
|
310
|
|
311 Inverts this set. This operation modifies this set so
|
|
312 that its value is its complement. This operation does
|
|
313 not affect the multicharacter strings, if any
|
|
314
|
|
315 ***********************************************************************/
|
|
316
|
|
317 void complement ()
|
|
318 {
|
|
319 uset_complement (handle);
|
|
320 }
|
|
321
|
|
322 /***********************************************************************
|
|
323
|
|
324 Removes all of the elements from this set. This set will
|
|
325 be empty after this call returns.
|
|
326
|
|
327 ***********************************************************************/
|
|
328
|
|
329 void clear ()
|
|
330 {
|
|
331 uset_clear (handle);
|
|
332 }
|
|
333
|
|
334 /***********************************************************************
|
|
335
|
|
336 Returns true if this USet contains no characters and no
|
|
337 strings
|
|
338
|
|
339 ***********************************************************************/
|
|
340
|
|
341 bool isEmpty ()
|
|
342 {
|
|
343 return uset_isEmpty (handle) != 0;
|
|
344 }
|
|
345
|
|
346 /***********************************************************************
|
|
347
|
|
348 Returns true if this USet contains the given character
|
|
349
|
|
350 ***********************************************************************/
|
|
351
|
|
352 bool contains (wchar c)
|
|
353 {
|
|
354 return uset_contains (handle, c) != 0;
|
|
355 }
|
|
356
|
|
357 /***********************************************************************
|
|
358
|
|
359 Returns true if this USet contains all characters c where
|
|
360 start <= c && c <= end
|
|
361
|
|
362 ***********************************************************************/
|
|
363
|
|
364 bool containsRange (wchar start, wchar end)
|
|
365 {
|
|
366 return uset_containsRange (handle, start, end) != 0;
|
|
367 }
|
|
368
|
|
369 /***********************************************************************
|
|
370
|
|
371 Returns true if this USet contains the given string
|
|
372
|
|
373 ***********************************************************************/
|
|
374
|
|
375 bool containsString (UStringView t)
|
|
376 {
|
|
377 return uset_containsString (handle, t.get.ptr, t.len) != 0;
|
|
378 }
|
|
379
|
|
380 /***********************************************************************
|
|
381
|
|
382 ***********************************************************************/
|
|
383
|
|
384 uint size ()
|
|
385 {
|
|
386 return uset_size (handle);
|
|
387 }
|
|
388
|
|
389
|
|
390 /***********************************************************************
|
|
391
|
|
392 Bind the ICU functions from a shared library. This is
|
|
393 complicated by the issues regarding D and DLLs on the
|
|
394 Windows platform
|
|
395
|
|
396 ***********************************************************************/
|
|
397
|
|
398 private static void* library;
|
|
399
|
|
400 /***********************************************************************
|
|
401
|
|
402 ***********************************************************************/
|
|
403
|
|
404 private static extern (C)
|
|
405 {
|
|
406 Handle function (wchar start, wchar end) uset_open;
|
|
407 void function (Handle) uset_close;
|
|
408 Handle function (wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_openPatternOptions;
|
|
409 uint function (Handle, wchar* pattern, uint patternLength, uint options, inout UErrorCode e) uset_applyPattern;
|
|
410 uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout UErrorCode e) uset_toPattern;
|
|
411 void function (Handle, wchar c) uset_add;
|
|
412 void function (Handle, Handle additionalSet) uset_addAll;
|
|
413 void function (Handle, wchar start, wchar end) uset_addRange;
|
|
414 void function (Handle, wchar* str, uint strLen) uset_addString;
|
|
415 void function (Handle, wchar c) uset_remove;
|
|
416 void function (Handle, wchar start, wchar end) uset_removeRange;
|
|
417 void function (Handle, wchar* str, uint strLen) uset_removeString;
|
|
418 void function (Handle) uset_complement;
|
|
419 void function (Handle) uset_clear;
|
|
420 byte function (Handle) uset_isEmpty;
|
|
421 byte function (Handle, wchar c) uset_contains;
|
|
422 byte function (Handle, wchar start, wchar end) uset_containsRange;
|
|
423 byte function (Handle, wchar* str, uint strLen) uset_containsString;
|
|
424 uint function (Handle) uset_size;
|
|
425 }
|
|
426
|
|
427 /***********************************************************************
|
|
428
|
|
429 ***********************************************************************/
|
|
430
|
|
431 static FunctionLoader.Bind[] targets =
|
|
432 [
|
|
433 {cast(void**) &uset_open, "uset_open"},
|
|
434 {cast(void**) &uset_close, "uset_close"},
|
|
435 {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"},
|
|
436 {cast(void**) &uset_applyPattern, "uset_applyPattern"},
|
|
437 {cast(void**) &uset_toPattern, "uset_toPattern"},
|
|
438 {cast(void**) &uset_add, "uset_add"},
|
|
439 {cast(void**) &uset_addAll, "uset_addAll"},
|
|
440 {cast(void**) &uset_addRange, "uset_addRange"},
|
|
441 {cast(void**) &uset_addString, "uset_addString"},
|
|
442 {cast(void**) &uset_remove, "uset_remove"},
|
|
443 {cast(void**) &uset_removeRange, "uset_removeRange"},
|
|
444 {cast(void**) &uset_removeString, "uset_removeString"},
|
|
445 {cast(void**) &uset_complement, "uset_complement"},
|
|
446 {cast(void**) &uset_clear, "uset_clear"},
|
|
447 {cast(void**) &uset_isEmpty, "uset_isEmpty"},
|
|
448 {cast(void**) &uset_contains, "uset_contains"},
|
|
449 {cast(void**) &uset_containsRange, "uset_containsRange"},
|
|
450 {cast(void**) &uset_containsString, "uset_containsString"},
|
|
451 {cast(void**) &uset_size, "uset_size"},
|
|
452 ];
|
|
453
|
|
454 /***********************************************************************
|
|
455
|
|
456 ***********************************************************************/
|
|
457
|
|
458 static this ()
|
|
459 {
|
|
460 library = FunctionLoader.bind (icuuc, targets);
|
|
461 }
|
|
462
|
|
463 /***********************************************************************
|
|
464
|
|
465 ***********************************************************************/
|
|
466
|
|
467 static ~this ()
|
|
468 {
|
|
469 FunctionLoader.unbind (library);
|
|
470 }
|
|
471 }
|
|
472
|