001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.util.Collection;
032 import java.util.Comparator;
033 import java.util.SortedSet;
034 import java.util.TreeSet;
035
036 import org.w3c.dom.Element;
037 import org.w3c.dom.NamedNodeMap;
038 import org.w3c.dom.Node;
039
040 /**Some simple common text utilities of scope throughout the application.
041 */
042 public final class TextUtils
043 {
044 /**Prevent construction of an instance. */
045 private TextUtils() { }
046
047 /**Suffixes used by sizeAsText. */
048 private static final String sizeSuffixes[] = { "B", "kB", "MB", "GB", "TB", "EB" };
049 /**Renders size in bytes as text, abbreviated if requested.
050 * Can produce normal or abbreviated output.
051 */
052 public static final String sizeAsText(final long size, final boolean abbrev)
053 {
054 final StringBuilder sb = new StringBuilder(abbrev ? 5 : 16);
055 if(abbrev)
056 {
057 long divisor = 1;
058 for(int i = 0; i < sizeSuffixes.length; ++i, divisor *= 1024)
059 {
060 if(size < divisor * 1000)
061 {
062 // Takes 3 or less digits with this suffix, so show it!
063 sb.append((size + divisor/2) / divisor);
064 sb.append(sizeSuffixes[i]);
065 return(sb.toString()); // Done.
066 }
067 }
068 // We may fall through to the default case...
069 }
070 sb.append(size);
071 sb.append("Byte");
072 return(sb.toString());
073 }
074
075 /**Returns true if given String is 7-bit clean, is is pure ASCII, or is null.
076 */
077 public static boolean isASCII7(final String s)
078 {
079 if(s == null) { return(true); }
080 for(int i = s.length(); --i >= 0; )
081 { if((s.charAt(i) & ~0x7f) != 0) { return(false); } }
082 return(true); // Seems OK.
083 }
084
085 /**Recursively copy second Node and contents into first node.
086 * This works even when the second Node is from a different Document to the first
087 * and importNode() does not work (eg due to JDK1.5 impl bug).
088 */
089 public static final void importCopy(final Node dest, final Node src)
090 {
091 if((dest == null) || (src == null))
092 { throw new IllegalArgumentException(); }
093
094 final Element cp = dest.getOwnerDocument().createElement(src.getNodeName());
095
096 // Deal with attrs, etc.
097 final boolean hasAttributes = (src.getNodeType() == Node.ELEMENT_NODE) &&
098 (src.getAttributes().getLength() > 0);
099 if(hasAttributes)
100 {
101 final NamedNodeMap attribs = src.getAttributes();
102 for(int i = 0; i < attribs.getLength(); i++)
103 { cp.setAttribute(attribs.item(i).getNodeName(), attribs.item(i).getNodeValue()); }
104 }
105
106 // Copy in any node value.
107 cp.setNodeValue(src.getNodeValue());
108
109 // Now add the new node to the destination tree.
110 dest.appendChild(cp);
111
112 // Recursively copy children.
113 for(Node child = src.getFirstChild(); child != null; child = child.getNextSibling())
114 { importCopy(cp, child); }
115 }
116
117 /**String to write as line terminator when converting to XML.
118 * A simple LF would suit UNIX-like systems
119 * though would probably be accepted by most.
120 * A bulkier CRLF is more like the Internet "standard".
121 */
122 private static final String XML_LINE_TERM = "\r\n";
123
124 /**Write DOM tree as XML/XHTML String; never "" nor null.
125 * Can write as terse/efficient single-line form or longer more human-friendly format.
126 * <p>
127 * When writing XHTML we make text nodes XML/HTML-safe, though allow embedded entities.
128 *
129 * @param node DOM tree root; never null
130 * @param toXHTML if true write with formatting suitable to include directly in HTML/XHTML for human consumption
131 * (eg using dl/ul/ol nested lists to produce formatted text representing the structure)
132 * rather than a pure XML representation of the Node tree itself
133 * @param terse if true write as compactly as possible, else make more human-readable if possible
134 */
135 public static final String toXML(final Node node, final boolean toXHTML, final boolean terse)
136 {
137 final StringBuilder result = new StringBuilder(64);
138 toXML(result, node, toXHTML, terse);
139 return(result.toString());
140 }
141
142 /**Write DOM tree as XML/XHTML String; appends to supplied StringBuilder.
143 * Can write as terse/efficient single-line form or longer more human-friendly format.
144 * <p>
145 * When writing XHTML we make text nodes XML/HTML-safe, though allow embedded entities.
146 *
147 * @param node DOM tree root; never null
148 * @param toXHTML if true write with formatting suitable to include directly in HTML/XHTML for human consumption
149 * (eg using dl/ul/ol nested lists to produce formatted text representing the structure)
150 * rather than a pure XML representation of the Node tree itself
151 * @param terse if true write as compactly as possible, else make more human-readable if possible
152 */
153 public static final void toXML(final StringBuilder result,
154 final Node node, final boolean toXHTML, final boolean terse)
155 {
156 if((node == null) || (result == null))
157 { throw new IllegalArgumentException(); }
158
159 final String nodeValue = node.getNodeValue();
160
161 final boolean hasValue = (node.getNodeValue() != null);
162
163 final boolean hasChildren = (node.getFirstChild() != null);
164
165 final boolean hasAttributes = (node.getNodeType() == Node.ELEMENT_NODE) &&
166 (node.getAttributes().getLength() > 0);
167
168 if(toXHTML) // XHTML/HTML formatted text.
169 {
170 result.append("<dl compact><dt>");
171 result.append("<b>").append(node.getNodeName()).append("</b>");
172
173 if(hasAttributes)
174 {
175 final NamedNodeMap attribs = node.getAttributes();
176 for(int i = 0; i < attribs.getLength(); i++)
177 {
178 result.append(' ');
179 result.append(attribs.item(i).getNodeName());
180 result.append("=\"");
181 result.append(escapeHTMLMetaChars(attribs.item(i).getNodeValue()));
182 result.append('"');
183 }
184 }
185
186 // Write any value with < > & " ' escaped...
187 if(hasValue) { result.append(" [").append(escapeHTMLMetaChars(nodeValue.trim())).append("] "); }
188
189 // Recursively deal with any child nodes...
190 if(hasChildren)
191 {
192 result.append("<dd>");
193 for(Node child = node.getFirstChild(); child != null; child = child.getNextSibling())
194 { toXML(result, child, toXHTML, terse); }
195 }
196
197 result.append("</dl>");
198 }
199 else // Pure XML.
200 {
201 result.append('<').append(node.getNodeName());
202
203 if(hasAttributes)
204 {
205 final NamedNodeMap attribs = node.getAttributes();
206 for(int i = 0; i < attribs.getLength(); i++)
207 {
208 result.append(' ');
209 result.append(attribs.item(i).getNodeName());
210 result.append("=\"");
211 result.append(escapeHTMLMetaChars(attribs.item(i).getNodeValue()));
212 result.append('"');
213 }
214 }
215
216 // If we have no value or children
217 // then we can use the <.../> format with no separate closing tag.
218 final boolean unitag = (!hasValue && !hasChildren);
219
220 if(unitag) { result.append('/'); }
221
222 result.append('>');
223
224 if(!unitag)
225 {
226 if(hasValue) { result.append(escapeHTMLMetaChars(nodeValue.trim())); }
227
228 for(Node child = node.getFirstChild(); child != null; child = child.getNextSibling())
229 { result.append(toXML(child, toXHTML, terse)); }
230
231 result.append("</").append(node.getNodeName()).append('>');
232 }
233 }
234
235 if(!terse) { result.append(XML_LINE_TERM); } // End the line for readability...
236 }
237
238 /**Rewrite HTML so that it displays as "raw" text and is safe to use in attribute values.
239 * Meta-characters (<, >, & " ') are rewritten
240 * to entity codes so that when shown on screen it looks like
241 * the raw HTML text when displayed in-line in HTML,
242 * and so as to avoid problems when embedded in XML and HTML.
243 * <p>
244 * If the input text does not contain these meta-characters
245 * then it is returned unchanged.
246 * <p>
247 * Additionally, all characters < 32 (ASCII control characters)
248 * are converted to spaces.
249 * <p>
250 * If null is passed in, then this returns null,
251 * to simplify its use in some cases where a null might be present.
252 *
253 * @param in String possibly containing HTML/XML meta-characters
254 */
255 public static String escapeHTMLMetaChars(final String in)
256 {
257 if(in == null) { return(null); }
258
259 // Check for "meta" characters in input.
260 // If none found then we can return the input String as-is.
261 boolean foundMetaChar = false;
262 for(int i = in.length(); --i >= 0; )
263 {
264 final char c = in.charAt(i);
265 if((c < 32) || ("<>\"'&".indexOf(c) != -1))
266 {
267 foundMetaChar = true;
268 break;
269 }
270 }
271 if(!foundMetaChar) { return(in); }
272
273 // Take a working writable copy of the input.
274 final StringBuilder work = new StringBuilder(in);
275
276 // Work backwards through the string
277 // (for efficiency and simplicity)
278 // carefully escaping the meta-characters.
279 for(int i = work.length(); --i >= 0; )
280 {
281 final char c = work.charAt(i);
282 String replacement;
283 switch(c)
284 {
285 case '<': { replacement = "<"; break; }
286 case '>': { replacement = ">"; break; }
287 case '&': { replacement = "&"; break; }
288 case '"': { replacement = """; break; }
289 case '\'': { replacement = "'"; break; }
290 default:
291 {
292 // Replace ASCII control codes...
293 if(c < 32) { replacement = " "; break; }
294
295 // No replacement needed.
296 continue;
297 }
298 }
299
300 // Delete the old character.
301 work.deleteCharAt(i);
302 // Insert the new string
303 // (we could merge these two ops for efficiency).
304 work.insert(i, replacement);
305 }
306
307 return(work.toString());
308 }
309
310 /**Private to encode8To6()/decode8To6(); automagically created on first access. */
311 private static final class Base64Cache
312 {
313 /**Prevent construction of any instance. */
314 private Base64Cache() {}
315
316 /**Private to encode8To6()/decode8To6(). */
317 private static final char[] alphabet = {
318 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7
319 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15
320 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23
321 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31
322 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39
323 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47
324 'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55
325 '4', '5', '6', '7', '8', '9', '+', '/'}; // 56 to 63
326
327 /**Index/value of each legit 7-bit-ASCII character from the alphabet.
328 * All other (ie illegal) values map to -1.
329 * <p>
330 * This is effectively the reverse mapping for alphabet[].
331 */
332 private static final int valueOf[] = new int[128];
333
334 /**Initialise valueOf[]. */
335 static
336 {
337 for(int i = valueOf.length; --i >= 0; ) { valueOf[i] = -1; }
338 for(int i = alphabet.length; --i >= 0; ) { valueOf[alphabet[i]] = i; }
339 }
340
341 /**Gets valueOf given char; non-negative.
342 * @throws IllegalArgumentException for invalid char (not from allowed alphabet)
343 * @return value in range 0--63 inclusive
344 */
345 public static int getValueOf(final char c)
346 {
347 if(c > 128) { throw new IllegalArgumentException(); }
348 final int v = valueOf[c];
349 if(v == -1) { throw new IllegalArgumentException(); }
350 return(v);
351 }
352 }
353
354 /**Decode to a byte array (8 bit) from ASCII Base-64 (6 bit); never null.
355 * The input data must always be a multiple of 4 characters,
356 * and all character must come from the standard Base64 set [A-Za-z0-9+/=].
357 *
358 * @param base64Text data in base-46, eg as encoded by encode8To6(); never null
359 * @return binary data; never null
360 */
361 public static byte[] decode8To6(final String base64Text)
362 {
363 if(base64Text == null) { throw new IllegalArgumentException(); }
364 final int inputLen = base64Text.length();
365 // Handle empty input specially.
366 if(inputLen == 0) { return(new byte[0]); }
367 // Check input is legitimate length (eg appropriately padded with trailing '='s).
368 if((inputLen & 3) != 0)
369 { throw new IllegalArgumentException("input length must be multiple of 4: was "+base64Text.length()); }
370
371 // Compute the maximum result length (absent trailing '='s).
372 final int maxResultSize = 3 * (inputLen / 4);
373 // Compute the actual result size.
374 final boolean oneTrailingEquals = (base64Text.charAt(inputLen-1) == '=');
375 final boolean twoTrailingEquals = oneTrailingEquals && (base64Text.charAt(inputLen-2) == '=');
376 final int resultSize = twoTrailingEquals ? (maxResultSize-2) :
377 (oneTrailingEquals ? (maxResultSize-1) :
378 maxResultSize);
379 // Make the array for the final result...
380 final byte result[] = new byte[resultSize];
381
382 // Decoding TO block of 3 bytes at index i in the result
383 // FROM block of 4 characters at index j in the source.
384 for(int i = 0, j = 0; i < resultSize; i += 3, j += 4)
385 {
386 // Special treatment for the last block if it is partial.
387 final boolean partialLastBlock = ((i+3) > resultSize);
388
389 // Always the first two chars of input block must be real encoded data.
390 final int v0 = Base64Cache.getValueOf(base64Text.charAt(j+0));
391 final int v1 = Base64Cache.getValueOf(base64Text.charAt(j+1));
392 // Always at least one output byte.
393 result[i+0] = (byte)((v0 << 2) + (v1 >>> 4)); // b0 = v0v0v0v0 v0v0v1v1
394
395 if(!partialLastBlock)
396 {
397 // Full block; two more bytes to decode from three more input chars.
398 final int v2 = Base64Cache.getValueOf(base64Text.charAt(j+2));
399 final int v3 = Base64Cache.getValueOf(base64Text.charAt(j+3));
400 result[i+1] = (byte)(((v1 & 0xf) << 4) + (v2 >>> 2)); // b1 = v1v1v1v1 v2v2v2v2
401 result[i+2] = (byte)(((v2 & 3) << 6) + v3 ); // b2 = v2v2v3v3 v3v3v3v3
402 }
403 else if(!twoTrailingEquals)
404 {
405 // Partial last block with more than one byte to decode from one more input char.
406 final int v2 = Base64Cache.getValueOf(base64Text.charAt(j+2));
407 result[i+1] = (byte)(((v1 & 0xf) << 4) + (v2 >>> 2)); // b1 = v1v1v1v1 v2v2v2v2
408 }
409 }
410
411 return(result);
412 }
413
414 /**Encode a byte array (8 bit) in ASCII Base-64 (6 bit); never null.
415 * With thanks to Apache.
416 * @param data8Bit binary input data; never null
417 * @return ASCII base-64 representation as String; never null
418 */
419 public static String encode8To6(final byte[] data8Bit)
420 {
421 final StringBuilder sb = new StringBuilder(data8Bit.length * 2);
422
423 int i = 0; // Index in data8Bit[].
424 while((i + 3) <= data8Bit.length)
425 {
426 int bits24 = (data8Bit[i++] & 0xFF) << 16;
427 bits24 |= (data8Bit[i++] & 0xFF) << 8;
428 bits24 |= (data8Bit[i++] & 0xFF);
429 sb.append(Base64Cache.alphabet[(bits24 & 0x00FC0000) >> 18]);
430 sb.append(Base64Cache.alphabet[(bits24 & 0x0003F000) >> 12]);
431 sb.append(Base64Cache.alphabet[(bits24 & 0x00000FC0) >> 6]);
432 sb.append(Base64Cache.alphabet[(bits24 & 0x0000003F)]);
433 }
434
435 if(data8Bit.length - i == 2)
436 {
437 int bits24 = (data8Bit[i] & 0xFF) << 16;
438 bits24 |= (data8Bit[i + 1] & 0xFF) << 8;
439 sb.append(Base64Cache.alphabet[(bits24 & 0x00FC0000) >> 18]);
440 sb.append(Base64Cache.alphabet[(bits24 & 0x0003F000) >> 12]);
441 sb.append(Base64Cache.alphabet[(bits24 & 0x00000FC0) >> 6]);
442 sb.append('=');
443 }
444 else if(data8Bit.length - i == 1)
445 {
446 final int bits24 = (data8Bit[i] & 0xFF) << 16;
447 sb.append(Base64Cache.alphabet[(bits24 & 0x00FC0000) >> 18]);
448 sb.append(Base64Cache.alphabet[(bits24 & 0x0003F000) >> 12]);
449 sb.append('=');
450 sb.append('=');
451 }
452
453 return(sb.toString());
454 }
455
456 /**Sanitise text for XHTML/HTML/WML/XML use.
457 * The transformations are:
458 * <ul>
459 * <li>A null argument is converted to an empty string.
460 * <li>Whitespace is trimmed from the ends.
461 * <li>Converts any ``<'' to ``{,'' ``>'' to ``}''.
462 * <li>Unless in "allowEntities" modes, converts any ``&'' to ``+'',
463 * else leaves syntactically-correct entities intact and counts
464 * as a single character.
465 * <li>Any non-ASCII characters are converted to a safe value;
466 * if entity codes are permitted then these values are converted
467 * to entity codes.
468 * <li>The result is limited to at most the specified number of characters.
469 * </ul>
470 * <p>
471 * Our simple definition of a syntactically-correct entity is that
472 * it may contain any selection of ASCII digits and letters and optionally
473 * a leading hash (`#'), but certainly no whitespace. We may enforce a
474 * length limit.
475 * <p>
476 * If the input string is OK it is returned untouched.
477 * <p>
478 * TODO: Needs version with extra argument to allow entity sequences
479 * (of the form '&'xxx;) to be treated as single characters
480 * and to delete any '<'...'>' sequences,
481 * ie to do a more sophisticated job of sanitising XML/HTML
482 * that has some simple markup (primarily entity codes needed for i18n).
483 * TODO: jUnit tests
484 *
485 * @param maxLen if positive the output is limited to at most
486 * this number of characters;
487 * if >= 3 truncated text is marked with a trailing ``...''
488 * in the last three positions
489 * @param allowEntities if true, treats HTML/XML entity sequences
490 * as if single characters; the entities are vaguely tested for
491 * correct syntax and may not be allowed if invalid
492 */
493 public static String sanitiseForXML(final String input,
494 final int maxLen,
495 final boolean allowEntities)
496 {
497 if(maxLen < 0) { throw new IllegalArgumentException(); }
498
499 if(input == null) { return(""); }
500
501 // First trim whitespace from either end.
502 String in = input.trim();
503
504 // Always remove potentially-unsafe tag meta-characters.
505 in = in.replace('<', '{').replace('>', '}');
506
507 // If non-null, the input string has had all entities replaced with
508 // just the leading '&' with the entity code
509 // (not including the trailing ';')
510 // at the matching index in this array,
511 // so all entities will appear as a single character.
512 String entities[] = null;
513
514 // If not allowing entities zap all '&' chars...
515 if(!allowEntities)
516 { in = in.replace('&', '+'); }
517 // If we are allowing entities, and there are any,
518 // convert them all to placeholders.
519 else if(in.indexOf('&') >= 0)
520 {
521 // in may get truncated but not longer while this array is in use.
522 entities = new String[in.length()];
523
524 // Break out the (valid) entities one at a time...
525 for(int nextAmp = -1; (nextAmp = in.indexOf('&', nextAmp + 1)) != -1; )
526 {
527 // Look for the next semicolon...
528 final int nextSc = in.indexOf(';', nextAmp + 1);
529 if(nextSc == -1)
530 {
531 // Whoops; if no more semicolons, then
532 // all ampersands from current are bogus and we can stop.
533 in = ((nextAmp >= 0) ? in.substring(0, nextAmp) : "") +
534 (in.substring(nextAmp).replace('&', '+'));
535 break;
536 }
537
538
539 // TODO: do more validity checking of a putative entity:
540 // check that it letters and digits or a hash and all digits.
541
542
543 // Now assume that the entity is valid,
544 // so break it out into the entities array
545 // leaving the '&' behind
546 // and putting the body (not the trailing ';')
547 // in the entities[] array at the '&' index.
548 entities[nextAmp] = in.substring(nextAmp+1, nextSc);
549 in = in.substring(0, nextAmp+1) +
550 in.substring(nextSc+1);
551 }
552 }
553
554 // Truncate if necessary while
555 // any entities that we care about are "broken out"...
556 if((maxLen >= 0) && (in.length() > maxLen))
557 {
558 if(maxLen >= 3) { in = in.substring(0, maxLen-3) + "..."; }
559 else { in = in.substring(0, maxLen); }
560 }
561
562 // If we broke out any entities,
563 // put any remaining ones back in-line.
564 if(entities != null)
565 {
566 final StringBuilder sb = new StringBuilder(in.length()); // At least this long.
567
568 final int len = in.length();
569 for(int i = 0; i < len; ++i)
570 {
571 final char c = in.charAt(i);
572 sb.append(c);
573 if(c != '&')
574 { continue; } // Usual case; no entity involved.
575
576 // Expand an entity.
577 assert(entities[i] != null);
578 sb.append(entities[i]).append(';');
579 }
580
581 // Convert back to expanded string.
582 in = sb.toString();
583
584 // entities = null; // Allow GC.
585 }
586
587 // Look for any char outwith range 32--126.
588 // If one is found then replace every occurrence with ' ' and retrim at end,
589 // unless allowing entities in which case replace with an entity code.
590 for(int i = in.length(); --i >= 0; )
591 {
592 final char c = in.charAt(i);
593 if((c < 32) || (c > 126))
594 {
595 // Whole string needs fixing...
596 final StringBuilder sb = new StringBuilder(2 * in.length());
597 for(int j = 0; j < in.length(); ++j)
598 {
599 final char d = in.charAt(j);
600 if((d < 32) || (d > 126))
601 {
602 // If allowing entities,
603 // convert non-ASCII chars to entities.
604 if(!allowEntities) { sb.append(' '); }
605 else { sb.append("&#").append(0xffff & d).append(';'); }
606 }
607 else { sb.append(d); }
608 }
609 in = sb.toString();
610 if(!allowEntities) { in = in.trim(); }
611 break;
612 }
613 }
614
615 if(input.equals(in)) { return(input); } // Avoid making copies!
616 return(in);
617 }
618
619
620 /**Extension for a CharSequence that holds only 8-bit character values.
621 * The implementation of this may often hold the underlying data as a byte array.
622 */
623 public static interface CharSequence8Bit extends CharSequence
624 {
625 /**Return private copy (8-bit) text with each byte corresponding to one char; never null.
626 * May be far more efficient than converting via a String
627 * if the aim is to write directly to a file for example.
628 */
629 public byte[] toByteArray();
630 }
631
632 /**Orders CharSequence objects as if by String.compareTo(); not null.
633 * If both arguments to compare() are the same reference (even null) this returns 0 immediately.
634 */
635 public static final Comparator<CharSequence> CASE_SENSITIVE_ORDER = new Comparator<CharSequence>(){
636 public int compare(final CharSequence cs1, final CharSequence cs2)
637 {
638 if(cs1 == cs2) { return(0); } // Optimisation...
639 final int l1 = cs1.length();
640 final int l2 = cs2.length();
641 final int l = Math.min(l1, l2);
642 for(int i = 0; i < l; ++i)
643 {
644 final char c1 = cs1.charAt(i);
645 final char c2 = cs2.charAt(i);
646 if(c1 != c2)
647 { return(c1 - c2); }
648 }
649 return(l1 - l2);
650 }
651 };
652
653 /**Orders CharSequence objects as if by String.compareToIgnoreCase(); not null.
654 * If both arguments to compare() are the same reference (even null) this returns 0 immediately.
655 */
656 public static final Comparator<CharSequence> CASE_INSENSITIVE_ORDER = new Comparator<CharSequence>(){
657 public int compare(final CharSequence cs1, final CharSequence cs2)
658 {
659 if(cs1 == cs2) { return(0); } // Optimisation...
660 final int l1 = cs1.length();
661 final int l2 = cs2.length();
662 final int l = Math.min(l1, l2);
663 for(int i = 0; i < l; ++i)
664 {
665 char c1 = cs1.charAt(i);
666 char c2 = cs2.charAt(i);
667 if(c1 != c2)
668 {
669 c1 = Character.toUpperCase(c1);
670 c2 = Character.toUpperCase(c2);
671 if(c1 != c2)
672 {
673 c1 = Character.toLowerCase(c1);
674 c2 = Character.toLowerCase(c2);
675 if (c1 != c2)
676 { return(c1 - c2); }
677 }
678 }
679 }
680 return(l1 - l2);
681 }
682 };
683
684 /**First index of specified character in given (non-null) CharSequence as for String.
685 * @return index of first occurrence of c, else -1 if no occurrence of c
686 */
687 public static final int indexOf(final CharSequence cs, final char c)
688 { return(TextUtils.indexOf(cs, c, 0)); }
689
690 /**First index of specified character in given (non-null) CharSequence from/after specified index as for String.
691 * @return index of first occurrence of c, else -1 if no occurrence of c
692 */
693 public static final int indexOf(final CharSequence cs, final char c, final int fromIndex)
694 {
695 if(cs == null) { throw new IllegalArgumentException(); }
696 final int len = cs.length();
697 for(int i = Math.max(0, fromIndex); i < len; ++i)
698 { if(c == cs.charAt(i)) { return(i); } }
699 return(-1);
700 }
701
702 /**Last index of specified character in given (non-null) CharSequence as for String.
703 * @return index of last occurrence of c, else -1 if no occurrence of c
704 */
705 public static final int lastIndexOf(final CharSequence cs, final char c)
706 { return(TextUtils.lastIndexOf(cs, c, Integer.MAX_VALUE)); }
707
708 /**Last index of specified character in given (non-null) CharSequence from/before specified start index as for String.
709 * @return index of last occurrence of c, else -1 if no occurrence of c
710 */
711 public static final int lastIndexOf(final CharSequence cs, final char c, final int fromIndex)
712 {
713 if(cs == null) { throw new IllegalArgumentException(); }
714 final int len = cs.length();
715 for(int i = ((fromIndex >= len) ? len - 1 : fromIndex); --i >= 0; )
716 { if(c == cs.charAt(i)) { return(i); } }
717 return(-1);
718 }
719
720 /**Checks that the specified region of two (non-null) CharSequences matches exactly (as for String).
721 */
722 public static boolean regionMatches(final CharSequence cs1, final int start1,
723 final CharSequence cs2, final int start2,
724 final int len)
725 {
726 if((null == cs1) || (null == cs2)) { throw new IllegalArgumentException(); }
727 if((start1 < 0) || (start2 < 0)) { throw new IllegalArgumentException(); }
728 if((start1 + len > cs1.length()) || (start2 + len > cs2.length())) { throw new IllegalArgumentException(); }
729 for(int i = len; --i >= 0; )
730 { if(cs1.charAt(start1 + i) != cs2.charAt(start2 + i)) { return(false); } }
731 return(true);
732 }
733
734 /**Checks that two (non-null) CharSequences represent the same sequence of chars.
735 */
736 public static boolean contentEquals(final CharSequence cs1, final CharSequence cs2)
737 {
738 if((null == cs1) || (null == cs2)) { throw new IllegalArgumentException(); }
739 if(cs1 == cs2) { return(true); } // Optimisation.
740 final int len = cs1.length();
741 if(len != cs2.length()) { return(false); }
742 for(int i = len; --i >= 0; )
743 { if(cs1.charAt(i) != cs2.charAt(i)) { return(false); } }
744 return(true);
745 }
746
747 /**Checks that two (non-null) CharSequences represent the same sequence of chars, ignoring case. */
748 public static boolean contentEqualsIgnoreCase(final CharSequence cs1, final CharSequence cs2)
749 {
750 if((null == cs1) || (null == cs2)) { throw new IllegalArgumentException(); }
751 if(cs1 == cs2) { return(true); } // Optimisation.
752 final int len = cs1.length();
753 if(len != cs2.length()) { return(false); }
754 // Use our case-insensitive comparator for consistency.
755 return(0 == CASE_INSENSITIVE_ORDER.compare(cs1, cs2));
756 }
757
758 /**Compares two (non-null) CharSequences for lexical order. */
759 public static int compare(final CharSequence cs1, final CharSequence cs2)
760 {
761 if((null == cs1) || (null == cs2)) { throw new IllegalArgumentException(); }
762 final int l1 = cs1.length();
763 final int l2 = cs2.length();
764 final int l = Math.min(l1, l2);
765 for(int i = 0; i < l; ++i)
766 {
767 final char c1 = cs1.charAt(i);
768 final char c2 = cs2.charAt(i);
769 if(c1 != c2) { return(c1 - c2); }
770 }
771 return(l1 - l2);
772 }
773
774 /**Returns true if the first sequence starts with the second, else false. */
775 public static boolean startsWith(final CharSequence mainText,
776 final CharSequence putativePrefix)
777 {
778 final int ppl = putativePrefix.length();
779 return((mainText.length() >= ppl) &&
780 regionMatches(mainText, 0, putativePrefix, 0, ppl));
781 }
782
783 /**Returns true if the first sequence ends with the second, else false. */
784 public static boolean endsWith(final CharSequence mainText,
785 final CharSequence putativeSuffix)
786 {
787 final int psl = putativeSuffix.length();
788 final int ml = mainText.length();
789 return((ml >= psl) &&
790 regionMatches(mainText, ml-psl, putativeSuffix, 0, psl));
791 }
792
793 /**Create and populate a SortedSet of CharSequence in natural/total case-sensitive order that will work with any mix of immutable CharSequence keys; never null.
794 * This may not be consistent with equals() if keys are of mixed types.
795 * <p>
796 * The result is not thread-safe and is mutable.
797 *
798 * @param csc initial collection to populate from; can be null to avoid initial population
799 */
800 public static <T extends CharSequence> SortedSet<T> createCharSequenceSortedSet(final Collection<T> csc)
801 { return(createCharSequenceSortedSet(csc, TextUtils.CASE_SENSITIVE_ORDER)); }
802
803 /**Create and populate a SortedSet of CharSequence in specified order that will work with any mix of immutable CharSequence keys; never null.
804 * This may not be consistent with equals() if keys are of mixed types.
805 * <p>
806 * The result is not thread-safe and is mutable.
807 *
808 * @param csc initial collection to populate from; can be null to avoid initial population
809 * @param comparator used to order the items; never null
810 */
811 public static <T extends CharSequence> SortedSet<T> createCharSequenceSortedSet(final Collection<T> csc, final Comparator<CharSequence> comparator)
812 {
813 if(null == comparator) { throw new IllegalArgumentException(); }
814 final SortedSet<T> result = new TreeSet<T>(comparator);
815 if(null != csc) { result.addAll(csc); }
816 return(result);
817 }
818 }