001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.io.IOException;
032 import java.io.InvalidObjectException;
033 import java.io.ObjectInputStream;
034 import java.io.ObjectInputValidation;
035 import java.io.ObjectOutputStream;
036 import java.io.ObjectStreamException;
037 import java.io.Serializable;
038 import java.lang.ref.SoftReference;
039 import java.lang.ref.WeakReference;
040 import java.security.MessageDigest;
041 import java.security.NoSuchAlgorithmException;
042 import java.util.Comparator;
043 import java.util.Iterator;
044 import java.util.LinkedList;
045 import java.util.List;
046 import java.util.Set;
047
048 import org.hd.d.pg2k.svrCore.TextUtils.CharSequence8Bit;
049
050 import ORG.hd.d.IsDebug;
051
052 // TODO: Allow sharing of the underlying byte[] between any two non-prev equal-content Name types.
053
054 /**Compact immutable in-memory representation of a (usually-short, sharable-prefix/suffix) 8-bit ASCII String.
055 * This can share a common prefix and suffix with another instance,
056 * avoiding having to store the shared elements itself.
057 * <p>
058 * This is particularly suitable for densely-packed sorted lists of values,
059 * such as sets of exhibit names which with tend to have by necessity long shared prefixes.
060 * <p>
061 * This is suitable for names but also other usually-short 8-bit text values,
062 * in particular those that don't have the sort of internal structure and repetition
063 * that Compact7BitString can take advantage of.
064 * <p>
065 * This supports random access reasonably efficiently
066 * (eg through the CharSequence interface)
067 * but some elements will be slower to access than others.
068 * (chars at either end will tend to be the slowest to access.)
069 * <p>
070 * Converting to and from this form should be relatively resource-light,
071 * eg with low CPU and memory requirements,
072 * though all instances are automatically intern()ed which has some cost.
073 * <p>
074 * Where there is a conflict between memory consumption and CPU,
075 * this implementation is biased to minimise memory use at the possible expense of CPU.
076 * <p>
077 * Internally this uses a byte[] to store the data
078 * (a basic halving of in-memory size in most JVMs),
079 * and use of a common prefix with a similar
080 * (nominally lexically previous close value)
081 * should help further trim memory use.
082 * <p>
083 * (This implementation internally attempts to minimise the number of objects kept live
084 * and that need to be traversed to complete construction.)
085 * <p>
086 * These classes are not publicly constructible in order to help control instance count,
087 * and indeed may use MemoryTools.intern() to eliminate duplicates.
088 * <p>
089 * This uses an internal cache to attempt to find ad-hoc prefix matches with existing instances
090 * if no 'prev' value is offered during creation of a new instance.
091 * This cache is memory-sensitive and will be silently sacrificed
092 * if the system is short of memory.
093 * <p>
094 * Note that MemoryTools.intern() may not release a 'duplicate'
095 * if there remain other instances hanging from it.
096 * <p>
097 * This is not final though all its constructors are private
098 * to allow only inner-class derived types in order to preserve immutability.
099 * <p>
100 * As this is performance-critical code, and may have to run on slow/non-JIT JVMs,
101 * most assert()s that check purely-internally-imposed invariants
102 * are entirely eliminated from release code;
103 * those that might still be triggered by runtime issues external to this class,
104 * such as run-time presence of hashing algorithms,
105 * remain present to enabled at runtime if required.
106 */
107 public class Name implements CharSequence8Bit, Comparable<Name>,
108 Serializable, ObjectInputValidation,
109 MemoryTools.Internable // Not currently supporting InternableRank
110 {
111 /**Create a Name instance; never null unless input is null.
112 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
113 * <p>
114 * If passed a null this returns a null.
115 * <p>
116 * If passed a Name (but not including sub-classes) this returns it untouched.
117 * @param text the 8-bit text to store; if null then null is returned
118 */
119 public static Name create(final CharSequence text)
120 { return(create(text, null)); }
121
122 /**Create a Name instance with a previous value to attempt to share a prefix with; never null unless input is null.
123 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
124 * <p>
125 * If passed a null this returns a null.
126 * <p>
127 * If passed a Name (but not including sub-classes) this returns it untouched.
128 *
129 * @param text the 8-bit text to store; if null then null is returned
130 * @param prev previous value with which to attempt to share a prefix; null if none
131 */
132 public static Name create(final CharSequence text,
133 final Name prev)
134 {
135 if(null == text) { return(null); }
136
137 if(Name.class.equals(text.getClass()))
138 {
139 // Optimisation: if this is already a Name (and not a sub-class) then return it as-is
140 // if no prev has been offered (that might share a longer prefix).
141 if(prev == null) { return((Name) text); }
142 // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
143 else if(prev.contentEquals(text)) { return(prev); }
144 }
145
146 final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
147 // If no prev was specified we look for one in the ad-hoc cache.
148 final Name prevToUse;
149 if(prev != null) { prevToUse = prev; }
150 else
151 {
152 // If we find an exact match with the right class in the ad-hoc cache then return it.
153 prevToUse = adHocPrefixCache.findPrev(text);
154 if((prevToUse != null) && (Name.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(text))
155 { return(prevToUse); }
156 }
157 // Return intern()ed value to eliminate accidental duplicates.
158 // Pass it through the ad-hoc prefix cache too...
159 final Name result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new Name(text, prevToUse)));
160 if(EXTRA_ASSERTS) { assert(Name.class.equals(result.getClass())); }
161 return(result);
162 }
163
164 /**Attempts to create a Name instance, but falls back to returning an intern()ed String instance otherwise.
165 * This copes will possible non-8-bit values by returning a String rather than a Name.
166 * <p>
167 * Essentially this returns the most memory-efficient representation reasonably possible.
168 *
169 * @return null if text is null, else Name if 8-bit text, else intern()ed String instance
170 */
171 public static CharSequence createOrStringFallback(final CharSequence text, final Name prev)
172 {
173 try { return(create(text, prev)); }
174 catch(final IllegalArgumentException e) { return(MemoryTools.intern(text.toString())); }
175 }
176
177 /**Shared empty instance; not null. */
178 public static final Name EMPTY = create("");
179
180 /**Maximum 'prev' chain length that we will support; non-negative.
181 * Limits the amount of recursion and CPU involved in processing 'nested'/'chained' prefixes,
182 * at the possible expense of some lost compression (ie memory savings).
183 * <p>
184 * This limit also helps avoid blocking GC through overly tangled and straggly data structures.
185 * <p>
186 * Usually positive and in the range (say) 1 to 32 to allow compression,
187 * though higher values also hurt things such as sorting
188 * which forces lots of chain-chasing while comparing prefixes.
189 * <p>
190 * As at 2009/08/02 the maximum actual value when creating fresh ExhibitFull values is 18,
191 * ie 18 is sufficient to yield maximum compression/sharing.
192 * <p>
193 * We may need to raise or lower this limit even though serialised data already exists,
194 * so we gently coerce deserialised Name instances to cap this value where necessary.
195 */
196 private static final int MAX_PREV_CHAIN_LENGTH = 18;
197
198 /**If true, always try to search back through prev values for an as-good or better match.
199 * This in principle allows less redundant intermediate values to be retained in memory
200 * and postpones any forced truncation of the chain.
201 */
202 private static final boolean ALWAYS_LOOK_FOR_EARLIER_PREV = true;
203
204 /**Number of bits for prefix length (unsigned) value.
205 * Should usually be greater than the number of suffix bits,
206 * since the primary compaction mechanism is prefix sharing.
207 * <p>
208 * 9 is needed for maximal compression on 20090707 test set,
209 * but compression at 8 is very close.
210 */
211 private static final int PREFIX_BITS = 9;
212
213 /**Mask for prefix bits (at bottom of word).
214 * Broken out to help pre-JIT execution performance.
215 */
216 private static final int PREFIX_MASK = ((1<<PREFIX_BITS)-1);
217
218 /**Number of bits for suffix length (unsigned) value.
219 * This uses whatever bits are left after prefix bits have been taken
220 * eg from 16 bits in a shared short.
221 */
222 private static final int SUFFIX_BITS = 16 - PREFIX_BITS;
223
224 /**Mask for suffix bits (at top of word).
225 * Broken out to help pre-JIT execution performance.
226 */
227 private static final int SUFFIX_MASK = ((1<<SUFFIX_BITS)-1);
228
229 /**Max shared prefix length in characters; strictly positive.
230 * An actual match greater than this will be capped to this.
231 */
232 private static final int MAX_PREFIX_LENGTH = PREFIX_MASK;
233
234 /**Max shared prefix length in characters; strictly positive.
235 * An actual match greater than this will be capped to this.
236 */
237 private static final int MAX_SUFFIX_LENGTH = SUFFIX_MASK;
238
239 /**Minimum terminus match length; strictly positive.
240 * This helps eliminate completely spurious linkages that may hurt GC
241 * and trail rubbish in the serialised form of such spuriously-linked values.
242 * <p>
243 * We won't share very short prefixes/suffixes.
244 * <p>
245 * We avoid matches of "" (that should be dealt with by intern()) when this is >0.
246 * We should probably avoid picking up single letters such as trailing full-stops when this is >1.
247 * We should probably allow everything of at least length 3 or 4
248 * (such as ".jpg" or ".au" for example)
249 * which is probably roughly the difference in bytes in a serialised stream
250 * of adaptively encoding a null vs a non-null prev pointer.
251 */
252 private static final int MIN_TERMINUS_MATCH_LENGTH = 3;
253
254 /**Minimum prefix/suffix total share length to use a prev; strictly positive.
255 * If 1 then we always try to use a 'prev' when there is even one character matching,
256 * but this may entrain lots of 'dead wood' that might otherwise be garbage.
257 * <p>
258 * A higher value may ultimately result in lower memory consumption
259 * (and no loss of compactness with or without compression of the serialised form)
260 * though with some loss in the headline immediate 'characters saved' figure.
261 * <p>
262 * This does not need to be enforced on (old) deserialised data,
263 * since for example intern() may often resolve such issues silently,
264 * and in any case it is not a correctness issue.
265 * <p>
266 * For consistency we line this up with the minimum prefix/suffix match length.
267 */
268 private static final int MIN_SHARE_CHARS = MIN_TERMINUS_MATCH_LENGTH;
269
270 /**Create a new instance with a previous value to attempt to share a prefix with.
271 * Note that the 'prev' value may be any sub-type of Name.
272 * <p>
273 * Private in order to better manage instance control.
274 *
275 * @param text the 8-bit text to store and must not equal the prev; never null but can be empty
276 * @param putativePrev non-empty previous value with which to attempt to share a prefix; null if none
277 */
278 private Name(final CharSequence text, final Name putativePrev)
279 {
280 if(EXTRA_ASSERTS) { assert(null != text) : "factory method should already have screened out null text value"; }
281 // Verify all text is 7-bit.
282 final int length = text.length();
283 for(int i = length; --i >= 0; )
284 { if(text.charAt(i) > 255) { throw new IllegalArgumentException(); } }
285
286 // Condition use of the prev value.
287 Name p = putativePrev;
288 // If the prev chain is already maximum length
289 // then try going back at least one step
290 // which may still be of some help with compression.
291 // We may in any case always look back for the oldest good-enough prev
292 // to help trim memory use and preserve elbow room.
293 //
294 // This treats suffix matching as a secondary opportunistic task
295 // and we'll optimise any prefix by preference
296 // unless the initial prefix is shorter than the initial suffix.
297 if(null != p)
298 {
299 final int initialPrefixMatchLength = computeCommonPrefixLength(text, p);
300 final int initialSuffixMatchLength = computeCommonSuffixLength(text, p);
301
302 // Abandon it now now if the offered prev has no shared termini with us!
303 if((initialPrefixMatchLength == 0) && (initialSuffixMatchLength == 0))
304 { p = null; }
305 else
306 {
307 if(ALWAYS_LOOK_FOR_EARLIER_PREV)
308 {
309 // Prefer prefix matching if better or same as suffix length.
310 if(initialPrefixMatchLength >= initialSuffixMatchLength)
311 {
312 // Attempt to quickly find at-least-as-good recent prev
313 // when there is a prefix match available.
314 while((p != null) && (p.prev != null) &&
315 (computeCommonPrefixLength(text, p.prev) >= initialPrefixMatchLength))
316 { p = p.prev; }
317 }
318 else
319 {
320 // If no prefix match was available
321 // then try to find the shortest chain while retaining the current prefix.
322 while((p != null) && (p.prev != null) &&
323 (computeCommonSuffixLength(text, p.prev) >= initialSuffixMatchLength))
324 { p = p.prev; }
325 }
326 }
327
328 final int ppCL = p.getPrevChainLength();
329 if(EXTRA_ASSERTS) { assert(ppCL >= 0); }
330 if(EXTRA_ASSERTS) { assert(ppCL <= MAX_PREV_CHAIN_LENGTH); }
331 if(ppCL >= MAX_PREV_CHAIN_LENGTH)
332 {
333 if(DEBUG) { System.out.println("SharedPrefix7BitString: limiting chain length"); }
334 // Go back at least one step; further if it doesn't hurt prefix match length much.
335 // This should give headroom to build a new useful near-maximal-length prefix.
336 final int targetMatchLength = Math.max(initialPrefixMatchLength / 2, MIN_SHARE_CHARS);
337 do { p = p.prev; }
338 while((p != null) && (p.prev != null) &&
339 ((p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) ||
340 (computeCommonPrefixLength(text, p.prev) >= targetMatchLength)));
341 }
342 // Ensure that any prev that we now have has a short enough chain length to tag on to.
343 if(EXTRA_ASSERTS) { assert((p == null) || (p.getPrevChainLength() < MAX_PREV_CHAIN_LENGTH)); }
344 }
345 }
346 // Find how much of prev is actually in common with the new text.
347 int sharedPrefixLen = (null == p) ? 0 : computeCommonPrefixLength(text, p);
348 // Only store/use any putative prev if there is in fact some common prefix/suffix string.
349 if(null != p)
350 { p = ((sharedPrefixLen > 0) || (computeCommonSuffixLength(text, p) > 0)) ? p : null; }
351 if(EXTRA_ASSERTS) { assert((sharedPrefixLen == 0) || (p != null)); } // No shared prefix possible without a non-null prev.
352 if(EXTRA_ASSERTS) { assert(getPrevChainLength() <= MAX_PREV_CHAIN_LENGTH); }
353
354 // Now extract any matching suffix from the tail residue after extracting a maximal prefix.
355 int tailLen = length - sharedPrefixLen;
356 int residualSharedSuffixLen = (null == p) ? 0 : computeCommonSuffixLength(text.subSequence(sharedPrefixLen, length), p.subSequence(sharedPrefixLen, p.length()));
357 final int putativeSharedLength = sharedPrefixLen + residualSharedSuffixLen;
358
359 // If not enough shared length to justify a link to prev
360 // then avoid any such sharing...
361 if(putativeSharedLength < MIN_SHARE_CHARS)
362 {
363 p = null; // Reset to no sharing.
364 sharedPrefixLen = 0; // Reset to no sharing.
365 residualSharedSuffixLen = 0; // Reset to no sharing.
366 tailLen = length; // Reset to no sharing.
367 }
368
369 // No record the sharing that we are going to use.
370 prev = p;
371 // Now pack the termini lengths in together.
372 terminiLengths = (short) (sharedPrefixLen | (residualSharedSuffixLen << PREFIX_BITS));
373 // Set up the fast-access shadow prefix-length value.
374 _prefixLen = (short) sharedPrefixLen;
375
376 if(EXTRA_ASSERTS) { assert(getPrefixLen() == sharedPrefixLen) /* : "getPrefixLen()="+getPrefixLen()+", sharedPrefixLen="+sharedPrefixLen */ ; }
377 if(EXTRA_ASSERTS) { assert(getSuffixLen() == residualSharedSuffixLen); }
378 // We can't claim more shared chars than exist in the 'prev' value!
379 if(EXTRA_ASSERTS) { assert(sharedPrefixLen + residualSharedSuffixLen <= ((p == null) ? 0 : p.length())) : ("text="+text+", prev="+p); }
380
381 // Compute size of middle (unshared) portion.
382 final int midLen = tailLen - residualSharedSuffixLen;
383
384 if(EXTRA_ASSERTS) { assert(midLen >= 0); }
385 if(EXTRA_ASSERTS) { assert(sharedPrefixLen + midLen + residualSharedSuffixLen == text.length()); }
386
387 if(midLen == 0)
388 {
389 // Avoid any allocation for the tail if no middle bytes.
390 mid = EMPTIES.EMPTY_MID;
391 }
392 // Usual case: copy the non-shared middle bytes.
393 else
394 {
395 final byte t[] = new byte[midLen];
396 for(int i = midLen; --i >= 0; )
397 { t[i] = (byte) text.charAt(i + sharedPrefixLen); }
398 mid = t;
399 }
400
401 if(DEBUG) { System.out.println("prefixLen|midLen|suffixLen = " + sharedPrefixLen+"|"+midLen+"|"+residualSharedSuffixLen + " from "+text.subSequence(0, sharedPrefixLen)+"|"+text.subSequence(sharedPrefixLen, sharedPrefixLen+midLen)+"|"+text.subSequence(sharedPrefixLen+midLen, text.length())+", prevChainLength = " + getPrevChainLength()); }
402 //if(DEBUG && (sharedPrefixLen == 0) && (residualSharedSuffixLen != 0)) { System.out.println("***Shared-suffix-only (suffix len "+residualSharedSuffixLen+") Name instance created: "+text); }
403
404 if(EXTRA_ASSERTS) { assert((prev == null) || (sharedPrefixLen + residualSharedSuffixLen >= MIN_SHARE_CHARS)); }
405
406 // Verify what we've been given.
407 try { validateObject(); }
408 catch(final InvalidObjectException e)
409 { throw new IllegalArgumentException(e); }
410 }
411
412 /**Compute the shared/common prefix length. */
413 private static int computeCommonPrefixLength(final CharSequence text, final Name p)
414 {
415 int sharedPrefixLen = 0;
416 final int l = Math.min(Math.min(text.length(), p.length()), MAX_PREFIX_LENGTH);
417 for(int i = 0; i < l; ++i, ++sharedPrefixLen)
418 { if(p.charAt(i) != text.charAt(i)) { break; } }
419 if(EXTRA_ASSERTS) { assert(sharedPrefixLen >= 0); }
420 return(sharedPrefixLen);
421 }
422
423 /**Compute the shared/common suffix length.
424 * Note that we may have to match against some tail of the prev text
425 * (to avoid overlapping prefix/suffix parts)
426 * and thus the p parameter is the more general CharSequence.
427 */
428 private static int computeCommonSuffixLength(final CharSequence text, final CharSequence p)
429 {
430 int sharedSuffixLen = 0;
431 final int tLen = text.length();
432 final int pLen = p.length();
433 final int l = Math.min(Math.min(tLen, pLen), MAX_SUFFIX_LENGTH);
434 for(int i = 0; i < l; ++i, ++sharedSuffixLen)
435 { if(p.charAt(pLen - i - 1) != text.charAt(tLen - i - 1)) { break; } }
436 if(EXTRA_ASSERTS) { assert(sharedSuffixLen >= 0); }
437 if(EXTRA_ASSERTS) { assert((sharedSuffixLen == l) || (p.charAt(pLen - sharedSuffixLen - 1) != text.charAt(tLen - sharedSuffixLen - 1))) : "returned match must be maximal length"; }
438 return(sharedSuffixLen);
439 }
440
441 /**Cache of the computed hash code.
442 * Not part of the permanent/serialised state of the object since easy to recompute,
443 * but this is called so often by intern(), etc, that it is worth storing
444 * given that typical texts will be long.
445 */
446 private transient int hash; // Default to 0
447
448 /**The hash is computed over the entire text as for String.
449 * This does not depend on how the data is internally stored, eg using a prefix or not.
450 */
451 @Override
452 public final int hashCode()
453 {
454 int h = hash;
455 if(h == 0) // Probably not yet computed, so do so now.
456 {
457 // Hash needs to be (re)computed.
458 final int len = length();
459 for(int i = 0; i < len; i++)
460 { h = 31*h + charAt(i); }
461 hash = h; // Cache the result.
462 }
463 return(h);
464 }
465
466 /**Equality depends on the items being the same concrete (derived) type and lexically identical.
467 * This does not depend on how the data is internally stored, eg using a prefix or not.
468 * Specifically designed to work for derived classes too by default.
469 */
470 @Override
471 public final boolean equals(final Object obj)
472 {
473 if(this == obj) { return(true); }
474 if(null == obj) { return(false); }
475 if(!getClass().equals(obj.getClass())) { return(false); }
476 // Can't use 'fast' copies in order to avoid potential infinite recursion...
477 return(TextUtils.contentEquals(this, (Name) obj));
478 }
479
480 /**True iff this represents the same sequence of char values as the specified sequence.
481 * @param other the sequence to compare this {@code String} against
482 *
483 * @return {@code true} if this represents the same
484 * sequence of char values as the specified sequence,
485 * {@code false} otherwise
486 */
487 public final boolean contentEquals(final CharSequence o)
488 {
489 // Very quick length-equality check before any possible conversion, etc...
490 if(length() != o.length()) { return(false); }
491 // Use the 'fast' version if possible since much better for sorting/searching...
492 return(TextUtils.contentEquals((this),
493 (o instanceof Name) ? (((Name) o)) : o));
494 }
495
496 // /**The InternableRanking is lower/better the more characters we are already sharing (ie not having to hold locally).
497 // * We then aim to minimise the prev chain length.
498 // * <p>
499 // * The aim is to be left with the best (most compact) version intern()ed.
500 // */
501 // public final int internableRank(final InternableRank obj)
502 // {
503 // // Must only be called on items already equals().
504 //// assert(equals(obj));
505 //
506 // final Name other = (Name) obj;
507 //
508 // // See how much prefix/suffix we are sharing.
509 // final int shared = _prefixLen + getSuffixLen();
510 // final int sharedOther = other._prefixLen + other.getSuffixLen();
511 // // More sharing is better so lower rank value.
512 // if(shared > sharedOther) { return(-1); }
513 // if(shared < sharedOther) { return(+1); }
514 //
515 // // Break ties on the length of the prev chain: lower/shorter is better.
516 // return(getPrevChainLength() - other.getPrevChainLength());
517 // }
518
519
520 /**Non-empty item with which we share a prefix and/or suffix, or null if entire value is empty (equivalent to "") or if there is no shared part(s).
521 * May be non-empty if this new value is a prefix/suffix of the prev value.
522 * <p>
523 * Is protected since potentially to derived instances.
524 */
525 protected final Name prev;
526
527 /**Termini lengths (unsigned) packed into a single value for efficiency.
528 * The prefix bits are the low-order bits.
529 * <p>
530 * This is part of the compact serialised state.
531 */
532 private final short terminiLengths;
533
534 /**This is a shadow of the most-frequently accessed prefix length value; non-negative.
535 * This contains a copy of the prefix length.
536 * <p>
537 * Should not expand the memory footprint of an instance
538 * because it should slot in the gap left by the terminiLengths short value.
539 * <p>
540 * This is not part of the serialised state.
541 */
542 private transient short _prefixLen;
543
544 /**Middle portion of value, or a constant empty value iff there are no middle bytes; never null.
545 * We don't use default serialisation for this:
546 * it is written on the wire as raw bytes (if non-empty, else not at all)
547 * following a one-byte (positive) length if <= 127 chars,
548 * else a 4-byte (negated) length to be able to represent maximal-size CharSequences.
549 */
550 private transient /* final */ byte[] mid;
551
552 /**Used to hold values that may be needed before Name's own static initialisation can have completed.
553 * This can happen with circular initialisation dependencies.
554 */
555 private static final class EMPTIES
556 {
557 /**Constant/immutable/shareable empty mid value for any empty mid-section; never null. */
558 static final byte[] EMPTY_MID = new byte[0];
559
560 /**Constant/immutable/shareable empty cached termini; never null. */
561 static final SoftReference<byte[]> EMPTY_TERMINI = new SoftReference<byte[]>(null);
562 }
563
564 /**Cached termini, catenated to minimise memory overhead; never null.
565 * Held via a SoftReference to allow automatic release if memory is stressed.
566 * <p>
567 * Marked transient since redundant and not part of the persisted state.
568 * <p>
569 * Marked volatile for lock-free thread-safe access.
570 * <p>
571 * When empty we can set it to the constant/empty shared value
572 * which takes up no extra space but saves a null test for this value.
573 */
574 private transient volatile SoftReference<byte[]> _cached_termini = EMPTIES.EMPTY_TERMINI;
575
576 /**Minimum length of name that we'll attempt to cache termini for; non-negative.
577 * Avoids silly memory overheads for very small instances.
578 */
579 private static final int MIN_TERMINI_CACHE_CHARS = 32;
580
581 // /**If true then we can cache the termini of this instance if we wish.
582 // * Generally this will if the shared termini are long enough
583 // * to amortise the memory overheads of a SoftReference and another array
584 // * (which implies that we're not the root of a chain for example)
585 // * and if we're not close to a root or another cached value
586 // * that would have a low-enough recursion cost to reach.
587 // * <p>
588 // * This reduces the maximum memory burden even when everything possible is cached,
589 // * but without hurting speed too much.
590 // * <p>
591 // * Should be quick to compute as potentially accessed on each charAt().
592 // */
593 // private boolean canCacheTermini()
594 // {
595 // // Can't/shouldn't cache if at a root.
596 // if(prev == null) { return(false); }
597 //
598 // // Can't cache if shared chars are too few to be worth the memory overheads.
599 // final int sharedChars = _prefixLen + getSuffixLen();
600 // if(sharedChars < MIN_TERMINI_CACHE_CHARS) { return(false); }
601 //
602 // // Looks OK to cache...
603 // return(true);
604 // }
605
606 /**Make cached termini and return the cached value; never null.
607 * Computes the cached form of the termini and stores it wrapped in a SoftReference,
608 * discarding anything else already present.
609 * <p>
610 * This has the form of all the chars of the shared prefix
611 * immediately followed by all the chars of the shared suffix,
612 * stored as byte values with no padding.
613 */
614 private byte[] cacheTermini()
615 {
616 final int pL = _prefixLen;
617 final int sL = getSuffixLen();
618 final byte t[] = new byte[pL + sL];
619 if(EXTRA_ASSERTS) { assert(t.length > 0); } // Shouldn't be doing this without some shared state to be cached...
620 for(int i = pL; --i >= 0; )
621 { t[i] = (byte) prev.charAtInternal(i); }
622 final int l = prev.length();
623 for(int i = sL; --i >= 0; )
624 { t[i+pL] = (byte) prev.charAtInternal(l-sL + i); }
625 _cached_termini = new SoftReference<byte[]>(t);
626 return(t);
627 }
628
629 /**Chain depth, ie how deep our chain of prev values is, zero if this instance has no prev; non-negative.
630 * Can be used to help avoid getting silly tangles of non-GCable prev chains,
631 * and indeed might help avoid situations that might attempt to create circular references.
632 * <p>
633 * Not for normal use of this object!
634 */
635 public final int getPrevChainLength()
636 {
637 // if(null == prev) { return(0); }
638 // return(1 + prev.getPrevChainLength());
639 // Convert to iteration...
640 int len = 0;
641 for(Name p = this; null != (p = p.prev); ++len) { }
642 return(len);
643 }
644
645 /**Length of common prefix with prev item if any; non-negative.
646 * Only of minimal interest outside the class and may be withdrawn from the API.
647 */
648 public final int getPrefixLen()
649 {
650 return(_prefixLen); // return(terminiLengths & PREFIX_MASK); // Bottom bits.
651 }
652
653 /**Length of common suffix with prev item if any; non-negative.
654 * No one really needs to see this outside the class...
655 * <p>
656 * Being private may encourage a static optimiser such as ProGuard to inline it.
657 */
658 private final int getSuffixLen()
659 {
660 return((terminiLengths >>> PREFIX_BITS) & SUFFIX_MASK); // Uses all remaining most-significant bits.
661 }
662
663 /**Convert to String containing same char sequence.
664 * We do not expensively intern() the result as it may be short-lived..
665 */
666 @Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
667
668 /**Writes the name as ASCII bytes to the given (non-null) byte array starting at the given offset.
669 * The array must be long enough (from the given offset) else an exception will be thrown.
670 * <p>
671 * Aims to be efficient (in CPU and memory footprint).
672 */
673 public final void writeToByteArray(final byte[] buf, final int offset)
674 {
675 if(null == buf) { throw new IllegalArgumentException(); }
676 if(offset < 0) { throw new IllegalArgumentException(); }
677 final int length = length();
678 if(offset + length > buf.length) { throw new IllegalArgumentException(); }
679
680 // Copy in the shared prefix, if any.
681 final short pLen = _prefixLen;
682 if(pLen != 0)
683 {
684 // Use the cache if already present (but don't force creation of one).
685 final SoftReference<byte[]> cacheSR = _cached_termini;
686 final byte[] cache = cacheSR.get();
687 if(null != cache)
688 { System.arraycopy(cache, 0, buf, offset, pLen); }
689 else // No cache: fetch directly from prev Name.
690 {
691 for(int i = pLen; --i >= 0; )
692 { buf[i + offset] = prev.charAtInternal(i); }
693 }
694 }
695 // Copy in the unshared mid, if any.
696 final int mLen = mid.length;
697 if(mLen != 0)
698 { System.arraycopy(mid, 0, buf, pLen + offset, mLen); }
699 // Copy in the shared suffix, if any.
700 final int sStart = pLen + mLen;
701 if(sStart < length)
702 {
703 // Use the cache if already present (but don't force creation of one).
704 final SoftReference<byte[]> cacheSR = _cached_termini;
705 final byte[] cache = cacheSR.get();
706 if(null != cache)
707 { System.arraycopy(cache, pLen, buf, sStart + offset, length-sStart); }
708 else // No cache: fetch directly from prev Name.
709 {
710 for(int i = length; --i >= sStart; )
711 {
712 final int reverseIndex = length - i;
713 buf[i + offset] = prev.charAtInternal(prev.length() - reverseIndex);
714 }
715 }
716 }
717 }
718
719 /**Create private byte[] copy for caller; never null.
720 * Aims to be efficient (in CPU and memory footprint).
721 */
722 public final byte[] toByteArray()
723 {
724 final int length = length();
725 final byte[] result = new byte[length];
726 writeToByteArray(result, 0);
727 return(result);
728 }
729
730 /**Internal version of charAt() with bounds checking assumed done, etc.
731 * This has no further cacheing...
732 */
733 private byte charAtInternal(final int index)
734 {
735 // Extract from the shared prefix.
736 final int pLen = _prefixLen;
737 if(index < pLen)
738 {
739 // Recurse to fetch from further up the chain.
740 return(prev.charAtInternal(index));
741 }
742
743 // Extract from the shared suffix.
744 final int mLen = mid.length;
745 final int sStart = pLen + mLen;
746 if(index >= sStart)
747 {
748 // Recurse to fetch from further up the chain.
749 final int length = sStart + getSuffixLen(); // length();
750 if(EXTRA_ASSERTS) { assert(index < length); }
751 final int reverseIndex = length - index;
752 if(EXTRA_ASSERTS) { assert(reverseIndex > 0); }
753 return(prev.charAtInternal(prev.length() - reverseIndex));
754 }
755
756 // Extract from the (unshared) mid text, which is nice and fast...
757 return(mid[index - pLen]);
758 }
759
760 //@Override
761 public final char charAt(final int index)
762 {
763 // Lower bounds check on public interface.
764 if(index < 0) { throw new IllegalArgumentException(); }
765
766 // Use/create the cache here if possible
767 // so that the inner loop implemented by charAtInternal() is as tight as possible.
768
769 // If the requested char is in the unshared mid portion
770 // then we need not touch the cache at all.
771 final int pLen = _prefixLen;
772 final boolean notInSharedPrefix = (index >= pLen);
773 final int mLen = (mid.length);
774 final int mOffset;
775 if(notInSharedPrefix && (mLen > (mOffset = (index - pLen))))
776 { return((char) (mid[mOffset] & 0xff)); }
777
778 // Now do the upper bound check in the input...
779 final int sLen = getSuffixLen();
780 final int length = pLen + mLen + sLen; // Quicker than having length() recompute it...
781 if(index >= length) { throw new IllegalArgumentException(); }
782
783 // If we get here then this instance is not a root.
784 // (Else it would have been satisfied from the mid section which is the whole value.)
785 if(EXTRA_ASSERTS) { assert(prev != null); }
786
787 // Get value from cache if permitted and available.
788 // This must have enough unshared chars to be worth the SoftReference overheads,
789 // and not so near to a root as to not save much time.
790 if(((pLen + sLen) >= MIN_TERMINI_CACHE_CHARS) && (prev.prev != null) && (prev.prev.prev != null))
791 {
792 final SoftReference<byte[]> cacheSR = _cached_termini;
793 final byte[] cache = cacheSR.get();
794 if(null != cache)
795 {
796 // If not in prefix then must be in suffix...
797 final int offset = notInSharedPrefix ? (index - mLen) : index;
798 return((char) (cache[offset] & 0xff));
799 }
800 // If no cache available and we may create one then do so, and use it immediately.
801 else // if(canCacheTermini())
802 {
803 final int offset = notInSharedPrefix ? (index - mLen) : index;
804 return((char) ((cacheTermini())[offset] & 0xff));
805 }
806 }
807
808 // Not from the mid section, and no cache available...
809 return((char) (charAtInternal(index) & 0xff));
810 }
811
812 //@Override
813 public final int length()
814 {
815 return(_prefixLen + mid.length + getSuffixLen());
816 }
817
818 /**Effectively-immutable light-weight wrapper around the underlying text.
819 * Can be intern()ed to help eliminate some expensive duplicates.
820 */
821 private static class CSWrapper<T extends Name> implements CharSequence8Bit,
822 Comparable<CharSequence>,
823 MemoryTools.Internable
824
825 {
826 /**Underlying instance; non-null.
827 * Definitely not intended to be serialised.
828 */
829 transient final T underlying;
830 /**Start offset (inclusive) on underlying impl; non-negative. */
831 final int start;
832 /**End offset (exclusive) on underlying impl; non-negative. */
833 final int end;
834
835 /**Create an instance.
836 * Only called from trusted classes so arg-checking can be by assert() to save release-build CPU cycles.
837 * @param underlying the Name instance that this provides a view of; never null
838 * @param start inclusive start index; in range [0,end]
839 * @param end exclusive end index; in range [start,underlying.length()]
840 */
841 CSWrapper(final T underlying, final int start, final int end)
842 {
843 if(EXTRA_ASSERTS)
844 {
845 assert(null != underlying);
846 assert(start >= 0);
847 assert(end <= underlying.length());
848 assert(start <= end);
849 }
850
851 this.underlying = underlying;
852 this.start = start;
853 this.end = end;
854 }
855
856 /* (non-Javadoc)
857 * @see java.lang.CharSequence#charAt(int)
858 */
859 //@Override
860 public char charAt(final int index)
861 {
862 if(index < 0) { throw new IllegalArgumentException(); }
863 if(index >= (end-start)) { throw new IllegalArgumentException(); }
864 return(underlying.charAt(index + start));
865 }
866
867 /* (non-Javadoc)
868 * @see java.lang.CharSequence#length()
869 */
870 //@Override
871 public int length()
872 {
873 return(end-start);
874 }
875
876 /* (non-Javadoc)
877 * @see java.lang.CharSequence#subSequence(int, int)
878 */
879 //@Override
880 public CharSequence subSequence(final int start, final int end)
881 {
882 // We validate the parameters here since our constructor will not.
883 if(start < 0) { throw new IllegalArgumentException(); }
884 if(start > end) { throw new IllegalArgumentException(); }
885 if(end > length()) { throw new IllegalArgumentException(); }
886 if(EXTRA_ASSERTS) { assert(this.start + end <= underlying.length()); }
887 // Creates new wrapper around original underlying text for efficiency,
888 // ie not chained through this CSWrapper instance.
889 return(new CSWrapper<T>(underlying, this.start + start, this.start + end));
890 }
891
892 /**Equality checks for the same type and content. */
893 @Override public boolean equals(final Object obj)
894 {
895 if(this == obj) { return(true); }
896 if(!(obj instanceof CSWrapper)) { return(false); }
897 return(TextUtils.contentEquals(this, (CharSequence)obj));
898 }
899
900 /**The hash code is based on a fixed-time sampling of the content for speed. */
901 @Override public int hashCode()
902 {
903 final int l = length();
904 int h = l;
905 final int step = (l >>> 3);
906 for(int i = l; --i >= 0; i -= step)
907 { h = 37*h + charAt(i); }
908 h -= l;
909 return(h);
910 }
911
912 //@Override
913 public int compareTo(final CharSequence other)
914 { return(TextUtils.compare(this, other)); }
915
916 /**Convert to String containing same char sequence.
917 * We don't expensively intern() the result as it may be short-lived.
918 */
919 @Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
920
921 /**Create private byte[] copy for caller; never null. */
922 public final byte[] toByteArray()
923 {
924 final int length = length();
925 final byte[] result = new byte[length];
926 // Too hideous to fully optimise (yet) so do the simple way.
927 for(int i = length; --i >= 0; )
928 { result[i] = (byte) underlying.charAt(i + start); } // May make use of cache...
929 return(result);
930 }
931 }
932
933 /**Maximum number of chars to take a copy of the data. */
934 private static final int MAX_TRIVIAL_COPY = 4;
935
936 /**Generate a light-weight wrapper, or for very small amounts of data, a copy; never null.
937 * Not necessarily Serializable.
938 */
939 //@Override
940 public final CharSequence subSequence(final int start, final int end)
941 {
942 if(start < 0) { throw new IndexOutOfBoundsException(); }
943 final int length = length();
944 if(end > length) { throw new IndexOutOfBoundsException(); }
945 if(start > end) { throw new IndexOutOfBoundsException(); }
946
947 final int newLength = end - start;
948
949 // If this is the whole existing value then return it.
950 if((start == 0) && (length == newLength)) { return(this); }
951
952 // In trivial empty case return fixed empty value.
953 if(newLength == 0) { return(EMPTY); }
954
955 // For trivial small subsequences,
956 // make a copy of the relevant data,
957 // eg where we wouldn't want to pin down the possibly-large original
958 // for something not even worthy of a pointer.
959 // We don't intern() so as to avoid high overheads.
960 if(newLength <= MAX_TRIVIAL_COPY)
961 { return(new CS8Bit(this, start, end)); }
962
963 // TODO: optimisation: if this is a prefix then return as a prefix instance.
964
965 // GENERAL CASE: make a light-weight wrapper.
966 return(new CSWrapper<Name>(this, start, end));
967 }
968
969
970 /**Validate fields/state.
971 * Called in the constructor and possibly after deserialising.
972 * <p>
973 * Barf if something bad is found.
974 */
975 public void validateObject()
976 throws InvalidObjectException
977 {
978 // Test some invariants that duff external data should not be able to influence.
979 if(EXTRA_ASSERTS) { assert(null != _cached_termini); }
980 if(EXTRA_ASSERTS) { assert(null != mid); }
981 if(EXTRA_ASSERTS) { assert((mid.length != 0) || (mid == EMPTIES.EMPTY_MID)); }
982
983 // We should use a fixed single-instance array rather than an empty one.
984 final int prefixLen = _prefixLen;
985 if(EXTRA_ASSERTS) { assert(getPrefixLen() == _prefixLen); }
986 final int suffixLen = getSuffixLen();
987 if(EXTRA_ASSERTS) { assert((prefixLen >= 0) && (suffixLen >= 0)); }
988 final int sharedLen = prefixLen + suffixLen;
989 if(prev != null)
990 {
991 if(sharedLen > prev.length())
992 { throw new InvalidObjectException("bad object: prefix/suffix too long for prev: shared="+sharedLen+" vs prev.length()="+prev.length()); }
993 if(EXTRA_ASSERTS) { assert(getPrevChainLength() >= 0); }
994 if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
995 { throw new InvalidObjectException("bad object: overlong prevChainLength: "+getPrevChainLength()); }
996 // Prev must only be non-null if we have some sharing of prefix/suffix.
997 // Note persisted values may have hash a different MIN_SHARE_CHARS at creation
998 // so don't reject something simply because of 'not enough' sharing by current standards.
999 if(sharedLen == 0)
1000 { throw new InvalidObjectException("bad object: invalid unused prev"); }
1001 }
1002 else
1003 {
1004 if(EXTRA_ASSERTS) { assert(getPrevChainLength() == 0); }
1005 if(sharedLen != 0)
1006 { throw new InvalidObjectException("bad object: missing prev needed for shared text"); }
1007 }
1008 if(Integer.MAX_VALUE - prefixLen - suffixLen < length())
1009 { throw new InvalidObjectException("bad object: overlong components"); }
1010 }
1011
1012 /**Deserialise.
1013 * Expects validation to happen in readResolve().
1014 */
1015 private void readObject(final ObjectInputStream in)
1016 throws IOException, ClassNotFoundException
1017 {
1018 // Read in most of the fields in the usual way.
1019 in.defaultReadObject();
1020
1021 // Set up the fast-access shadow prefix-length value.
1022 _prefixLen = (short) (terminiLengths & PREFIX_MASK);
1023
1024 // Fix up cache to be non-null (even if not with any cached data for now)...
1025 _cached_termini = EMPTIES.EMPTY_TERMINI;
1026
1027 // Read in the mid text with a variable-length 'length' prefix.
1028 // This mechanism also ensures that our 'mid' element is unshared.
1029 final byte l0 = in.readByte();
1030 final int length;
1031 if(l0 >= 0) { length = l0; }
1032 else
1033 {
1034 final byte l1 = in.readByte();
1035 final byte l2 = in.readByte();
1036 final byte l3 = in.readByte();
1037 length = -((l0 << 24) + ((l1 & 0xff) << 16) + ((l2 & 0xff) << 8) + (l3 & 0xff));
1038 }
1039 // Create and read the text field; don't create a new empty byte[] for size 0.
1040 if(length == 0) { mid = EMPTIES.EMPTY_MID; }
1041 else
1042 {
1043 mid = new byte[length];
1044 in.readFully(mid);
1045 }
1046
1047 // IMPORTANT: this instance is now safe to use
1048 // but may not be fully valid (eg prev chain length may be too long).
1049 // such issues have to be handled in read-resolve.
1050 }
1051
1052 /**Write out a minimally-redundant form of our internal information.
1053 * The more-efficient on-the-wire format also makes defensive
1054 * reading easier.
1055 */
1056 private void writeObject(final ObjectOutputStream oos)
1057 throws IOException
1058 {
1059 // Check if we're about to persist a bogus value.
1060 if(EXTRA_ASSERTS) { validateObject(); }
1061
1062 // Write out most of the fields in the usual way.
1063 oos.defaultWriteObject();
1064
1065 // Write the length in a variable-length format
1066 // (very short strings require fewer length bytes).
1067 // For very small strings use a byte-value length.
1068 if(mid.length <= Byte.MAX_VALUE) { oos.writeByte(mid.length); }
1069 // Write larger text sizes using a negative (int, 4-byte) length
1070 // so that reading the first byte will give a negative value.
1071 else { oos.writeInt(-mid.length); }
1072 // Now write the text bytes (if any) directly, unwrapped.
1073 if(0 != mid.length) { oos.write(mid); }
1074 }
1075
1076 /**Deserialise: validate this instance, coerce mildly-errant values, and eliminate duplicates.
1077 * Has to handle derived classes too.
1078 */
1079 protected Object readResolve()
1080 throws ObjectStreamException
1081 {
1082 // In the case where the prev chain length on the wire is too large for current parameters,
1083 // we generate a new valid replacement here.
1084 if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
1085 {
1086 System.err.println("WARNING: replacing deserialised Name item that has an overly-long chain: "+getPrevChainLength()+" vs "+MAX_PREV_CHAIN_LENGTH);
1087 // Attempt to retain some of the sharing if possible.
1088 Name p;
1089 for(p = prev; (p != null) && (p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH); p = p.prev)
1090 { }
1091 if(EXTRA_ASSERTS) { assert((p == null) || (p.getPrevChainLength() < MAX_PREV_CHAIN_LENGTH)); }
1092
1093 // FIXME: nasty interdependency between base and derived classes here...
1094 if(ExhibitFull.class.equals(getClass()))
1095 { return(ExhibitFull.create(new CS8Bit(this), p)); }
1096 return(Name.create(new CS8Bit(this), p));
1097 }
1098
1099 // Check that this instance is valid.
1100 validateObject();
1101
1102 // Eliminate duplicates while deserialising this and closely-coupled child classes.
1103 return(MemoryTools.intern(this));
1104 }
1105
1106 /**Unique serialisation ID. */
1107 private static final long serialVersionUID = 2798109831174584816L;
1108
1109
1110 /**ExhibitShortName immutable CharSequence light-weight wrapper around underlying ExhibitFullName data.
1111 * Though slow, these can be used as Map/Set/etc elements,
1112 * as they have a working (though inefficient) consistent hashCode() and equals(),
1113 * but it is much preferred (more efficient/fast) to use ExhibitFull as a key.
1114 */
1115 public static final class ExhibitShort extends CSWrapper<ExhibitFull>
1116 {
1117 /**Create an instance.
1118 * Only intended to be created by an ExhibitFull instance.
1119 */
1120 private ExhibitShort(final ExhibitFull underlying, final int start, final int end)
1121 {
1122 super(underlying, start, end);
1123 if(EXTRA_ASSERTS) { assert(ExhibitName.validNameFinalComponentSyntax(this)); }
1124 }
1125
1126 /**Retrieve full name for this short name; never null. */
1127 public ExhibitFull getFullName() { return(underlying); }
1128
1129 /**Extract the main words (stem) component; never null nor empty.
1130 * As ExhibitName.getMainWordsComponent() but optimised for this common case,
1131 * with the result content identical to that from getMainWordsComponent(getFullName()).
1132 * <p>
1133 * The result is intern()able to help conflate identical values if required,
1134 * ie has a working hashCode() and equals(),
1135 * which may save space and accelerate searching for example.
1136 *
1137 * @param allAttrWords a Set of all legal attribute words (String values);
1138 * may be empty but not null
1139 */
1140 public CharSequence8Bit getMainWordsComponent(final Set<String> allAttrWords)
1141 {
1142 final int endOfAttrWords = ExhibitName.getEndOfAttrWords(this);
1143 final int endOfMainWords = ExhibitName.getEndOfMainWords(this, -1, endOfAttrWords, allAttrWords);
1144 final CharSequence8Bit result = new CSWrapper<ExhibitFull>(underlying, start, start + endOfMainWords);
1145 // Should be identical result to getMainWordsComponent() on full name.
1146 // assert(TextUtils.contentEquals(result, ExhibitName.getMainWordsComponent(underlying, allAttrWords)));
1147 return(result);
1148 }
1149
1150 /**Equality checks for the same type and content.
1151 * Will return true in the case of an identical short name of a different full name.
1152 */
1153 @Override public boolean equals(final Object obj)
1154 {
1155 if(this == obj) { return(true); }
1156 if(!(obj instanceof Name.ExhibitShort)) { return(false); }
1157 return(TextUtils.contentEquals(this, (Name.ExhibitShort)obj));
1158 }
1159
1160 /**The hash code is based on a fixed-time sampling of the content for speed. */
1161 @Override public int hashCode()
1162 {
1163 final int l = length();
1164 int h = l;
1165 final int step = (l >>> 3);
1166 for(int i = l; --i >= 0; i -= step)
1167 { h = 37*h + charAt(i); }
1168 h -= l;
1169 return(h);
1170 }
1171
1172 /**Convert to String containing same char sequence.
1173 * We automatically intern() the short exhibit name result to avoid duplication.
1174 */
1175 @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1176
1177 /**Unique leading printable-ASCII character for persistableKey(), not a valid exhibit name char. */
1178 public static final char PERSISTABLE_NAME_LEADING_CHAR = '!';
1179
1180 /**Length of persistable key in chars; strictly positive. */
1181 public static final char PERSISTABLE_NAME_LENGTH = 8;
1182
1183 /**Persistable unique printable-ASCII immutable CharSequence key; never null.
1184 * This generates a key:
1185 * <ul>
1186 * <li>that is of fixed (small) length</li>
1187 * <li>that should be shorter than most actual exhibit short names thus representing a reduced memory footprint</li>
1188 * <li>that is quickly and easily distinguished from (and can be unambiguously mixed with) exhibit short (and full) names, by leading PERSISTABLE_NAME_LEADING_CHAR</li>
1189 * <li>that should in practice be unique across all short exhibit names and thus can be used as a substitute for a short name</li>
1190 * <li>depends only on the short name, not any other part of the full name</li>
1191 * <li>has a small amount of verification built in (second char of key is first char of short name)</li>
1192 * <li>does not contain any ASCII control characters or whitespace (≤32)</li>
1193 * <li><em>not</em> intern()ed here because a key is small enough for the overhead to be potentially prohibitive</li>
1194 * </ul>
1195 * <p>
1196 * This may be slow to compute.
1197 */
1198 public CharSequence persistableKey()
1199 {
1200 final StringBuilder sb = new StringBuilder(PERSISTABLE_NAME_LENGTH);
1201 sb.append(PERSISTABLE_NAME_LEADING_CHAR); // Unique prefix that guarantees it not a valid exhibit name.
1202 sb.append(charAt(0)); // Allows some cross-checking with original short name.
1203
1204 // Extract a strong SHA-1 digest/hash of the bytes of the short name.
1205 // (We can exclude the first char already explicitly in the key.)
1206 final MessageDigest digestor;
1207 try { digestor = MessageDigest.getInstance(CoreConsts.HASH_SHA1); }
1208 catch(final NoSuchAlgorithmException e) { throw new Error("Unable to locate hash impl: "+CoreConsts.HASH_SHA1); }
1209 for(int i = length(); --i > 0; ) { digestor.update((byte) charAt(i)); }
1210 final byte digest[] = digestor.digest();
1211 if(EXTRA_ASSERTS) { assert(digest != null); }
1212 assert(digest.length >= 4);
1213 // Convert digest to printable ~6.5-bits-per-char form
1214 // to retain as much digest as possible and thus minimise chance of key collision.
1215 for(int i = 0; sb.length() < PERSISTABLE_NAME_LENGTH; i++)
1216 {
1217 int ib = digest[i] & 0x7f; // Basic 7-bit form.
1218 // If result would be control/whitespace (or DEL),
1219 // then translate it into the printable region,
1220 // and incorporate the otherwise-discarded digest bit.
1221 if(ib <= ' ')
1222 { ib += (((digest[i] & 0x80) != 0) ? 64 : 94); }
1223 else if(ib == 127) // Avoid DEL code.
1224 { ib = ((digest[i] & 0x80) != 0) ? 63 : 93; }
1225 if(EXTRA_ASSERTS) { assert((ib > ' ') && (ib < 127)); }
1226 sb.append((char) ib); // Append digest char.
1227 }
1228
1229 if(EXTRA_ASSERTS) { assert(sb.length() == PERSISTABLE_NAME_LENGTH); }
1230 return(new CS8Bit(sb)); // Convert to immutable compact form.
1231 }
1232 }
1233
1234 /**ExhibitFullName implementation on top of SharedTermini8BitString.
1235 * This will only allow construction of instances representing a valid full exhibit name.
1236 * <p>
1237 * This carries no significant extra data over SharedTermini8BitString,
1238 * and mainly represents a guarantee that the data is a valid full exhibit name,
1239 * plus some utility methods/types for conveniently and efficiently accessing components of that name.
1240 * <p>
1241 * The constructors are private and only accessible via factory methods
1242 * to help with instance control,
1243 * and all instances are automatically intern()ed to eliminate duplicates.
1244 */
1245 public static final class ExhibitFull extends Name
1246 {
1247 private ExhibitFull(final CharSequence name, final Name prev)
1248 {
1249 super(name, prev); // Encode/store the text.
1250
1251 // Verify what we've been given.
1252 try { validateObject(); }
1253 catch(final InvalidObjectException e)
1254 { throw new IllegalArgumentException(e.getMessage()); }
1255 }
1256
1257 /**Create an ExhibitFull instance; never null.
1258 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1259 * <p>
1260 * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1261 *
1262 * @param fullName the (8-bit) syntactically-valid full exhibit name; never null nor empty
1263 *
1264 * @throws IllegalArgumentException in the case of a null or invalid putative full name
1265 */
1266 public static ExhibitFull create(final CharSequence fullName)
1267 { return(create(fullName, null)); }
1268
1269 /**Create an ExhibitFull instance with a previous value to attempt to share a prefix with; never null.
1270 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1271 * <p>
1272 * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1273 *
1274 * @param fullName the (8-bit) syntactically-valid full exhibit name; never null nor empty
1275 * @param prev previous Name with which to attempt to share a prefix; null if none
1276 *
1277 * @throws IllegalArgumentException in the case of a null or invalid putative full name
1278 */
1279 public static ExhibitFull create(final CharSequence fullName,
1280 final Name prev)
1281 {
1282 if(null == fullName) { throw new IllegalArgumentException("ExhibitFull Name cannot be null"); }
1283
1284 if(ExhibitFull.class.equals(fullName.getClass()))
1285 {
1286 // Optimisation: if this is already an ExhibitFull (and not a sub-class) then return it as-is
1287 // if no prev has been offered (that might share a longer prefix).
1288 if(prev == null) { return((ExhibitFull) fullName); }
1289 // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
1290 else if(prev.contentEquals(fullName)) { return((ExhibitFull) prev); }
1291 }
1292
1293 final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
1294 // If no prev was specified we look for one in the ad-hoc cache.
1295 final Name prevToUse;
1296 if(prev != null) { prevToUse = prev; }
1297 else
1298 {
1299 // If we find an exact match with the right class in the ad-hoc cache then return it.
1300 prevToUse = adHocPrefixCache.findPrev(fullName);
1301 if((prevToUse != null) && (ExhibitFull.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(fullName))
1302 { return((ExhibitFull) prevToUse); }
1303 }
1304 // Return intern()ed value to eliminate accidental duplicates.
1305 // Pass it through the ad-hoc prefix cache too...
1306 final ExhibitFull result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new ExhibitFull(fullName, prevToUse)));
1307 if(EXTRA_ASSERTS) { assert(ExhibitFull.class.equals(result.getClass())); }
1308 return(result);
1309 }
1310
1311 /**Shared dummy (valid, small, alphabetically-late) instance with no prev; not null.
1312 * Can help to avoid picking up random dependencies on other ExhibitFull instances,
1313 * eg by blocking use of ad-hoc prefix sharing,
1314 * which may be useful for data due to be serialised.
1315 */
1316 public static final Name.ExhibitFull DUMMY = MemoryTools.intern(create("z/z-Z.z", null));
1317
1318 /**Cache of link to parent, computed on demand.
1319 * Only accessed under instance lock so as to eliminate access races
1320 * and thus guarantee avoidance of creation of duplicates.
1321 * <p>
1322 * Not serialisable (because recomputable).
1323 */
1324 private transient ExhibitShort shortName;
1325
1326 /**Extract strongly-typed ExhibitShortName, ie the filename component; never null.
1327 * This is a light-weight, non-intern()able CharSequence wrapper around the existing data.
1328 * <p>
1329 * This guarantees that only one ShortName instance can be generated for each FullName.
1330 * Thus intern()ing is neither needed nor supported.
1331 */
1332 public synchronized ExhibitShort getShortName()
1333 {
1334 ExhibitShort result = shortName;
1335 if(null != shortName) { return(result); }
1336 shortName = result = new ExhibitShort(this, TextUtils.lastIndexOf(this, '/') + 1, length());
1337 return(result);
1338 }
1339
1340 /**Create a "virtual" full name consisting of the category directory and then the file component; never null.
1341 * The results in a syntactically-valid full exhibit name,
1342 * but omits the internal directory "noise".
1343 * <p>
1344 * Returns this instance as-is if already 'virtual'.
1345 */
1346 public Name.ExhibitFull getVirtualExhibitName()
1347 {
1348 // Optimisation:
1349 // if there is only one slash, the name is already virtual,
1350 // so the name can be returned untouched.
1351 final int firstSlash = TextUtils.indexOf(this, '/');
1352 final int lastSlash = TextUtils.lastIndexOf(this, '/');
1353 if(firstSlash == lastSlash) { return(this); }
1354
1355 final int length = length();
1356 // FIXME: avoid inefficient conversion via StringBuilder/char[].
1357 return(create(new StringBuilder(length - (lastSlash-firstSlash))
1358 .append(subSequence(0, firstSlash+1))
1359 .append(subSequence(lastSlash+1, length)),
1360 this)); // Prefix and suffix should be entirely shareable with this...
1361 }
1362
1363 /**Convert to String containing same char sequence; never null.
1364 * We automatically intern() the full-exhibit-name result to avoid duplication.
1365 */
1366 // @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1367 @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append((CharSequence8Bit) (this)).toString())); }
1368
1369 /**Validate fields/state.
1370 * Called in the constructor and possibly after de-serialising.
1371 * <p>
1372 * Barf if something bad is found.
1373 * (Maybe allow some extra info in debug version.)
1374 */
1375 @Override
1376 public void validateObject()
1377 throws InvalidObjectException
1378 {
1379 // Check Name invariants.
1380 super.validateObject();
1381
1382 // Verify that this is a valid full Exhibit name!
1383 if(!ExhibitName.validNameSyntax(this))
1384 { throw new InvalidObjectException("bad object: not a valid full exhibit name: "+this); }
1385 }
1386
1387 /**Unique Serialisation class ID generated by http://random.hd.org/. */
1388 private static final long serialVersionUID = 8993934283490189577L;
1389 }
1390
1391 /**Provide natural sort order for this and derived classes. */
1392 public final int compareTo(final CharSequence o)
1393 {
1394 if(this == o) { return(0); }
1395 // Use the 'fast' version if possible since much better for sorting/searching...
1396 return(TextUtils.compare((this),
1397 (o instanceof Name) ? (((Name) o)) : o));
1398 }
1399
1400 /**Provide natural sort order for this and derived classes. */
1401 public final int compareTo(final Name o)
1402 {
1403 // Same instance compares equal...
1404 if(this == o) { return(0); }
1405 // Use the 'fast' version if possible since much better for sorting/searching...
1406 return(TextUtils.compare(this, o));
1407 }
1408
1409 /**Compute the Integer prefix key for the supplied CharSequence; never null.
1410 * This assumes that the top 8 bits of each char are unimportant
1411 * since if the sequence is to be turned into a Name they must be zero.
1412 * <p>
1413 * We construct the prefix key from the first four characters of the putative Name
1414 * (or fewer if the name is shorter).
1415 */
1416 @SuppressWarnings("unused") private static Integer computeIntegerPrefixKey(final CharSequence cs)
1417 {
1418 if(EXTRA_ASSERTS) { assert(null != cs); }
1419 final int l = cs.length();
1420 if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1421 int result = cs.charAt(0);
1422 if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1423 result |= (((int) cs.charAt(1)) << 8);
1424 if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1425 result |= (((int) cs.charAt(2)) << 16);
1426 if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1427 result |= (((int) cs.charAt(3)) << 24);
1428 return(result);
1429 }
1430
1431 /**Compute the Integer suffix key for the supplied CharSequence; never null.
1432 * This assumes that the top 8 bits of each char are unimportant
1433 * since if the sequence is to be turned into a Name they must be zero.
1434 * <p>
1435 * We construct the suffix key from the last four characters of the putative Name
1436 * (or fewer if the name is shorter).
1437 */
1438 @SuppressWarnings("unused") private static Integer computeIntegerSuffixKey(final CharSequence cs)
1439 {
1440 if(EXTRA_ASSERTS) { assert(null != cs); }
1441 final int l = cs.length();
1442 if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1443 int result = cs.charAt(l - 1);
1444 if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1445 result |= (((int) cs.charAt(l - 2)) << 8);
1446 if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1447 result |= (((int) cs.charAt(l - 3)) << 16);
1448 if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1449 result |= (((int) cs.charAt(l - 4)) << 24);
1450 return(result);
1451 }
1452
1453 /**Contains lookup cache by prefix for all Name instances.
1454 * This is used to try to find a prefix for a newly-constructed Name
1455 * where no prefix has been supplied at all.
1456 * <p>
1457 * This looks up in a short list of recent/useful Name values with the same fixed-length prefix
1458 * for something of the same concrete type and the longest available prefix match,
1459 * returning the candidate 'prev' item (or null if none)
1460 * which then can be used in the usual way.
1461 * <p>
1462 * The Name instances are held via WeakReference wrappers
1463 * which should ensure that the cache does not keep them from being garbage collected.
1464 * <p>
1465 * This would usually be invoked by the factory method or constructor
1466 * where no 'prev' has been supplied.
1467 * <p>
1468 * This cache should grow as needed to get a decent hit rate,
1469 * and shrink to release memory when the hit rate is high or the memory subsystem is stressed.
1470 * <p>
1471 * Thread-safe but does not support concurrency (fully synchronized).
1472 * <p>
1473 * A lock can be held on instances of this object to make compound operations atomic.
1474 *
1475 * @param <TerminusKey> type of key for prefix of Name instances
1476 * must be immutable and support consistent hashCode() and equals()
1477 */
1478 private static final class AdHocPrefixCache<TerminusKey extends Number>
1479 {
1480 /**The lookup from prefix key to list of candidate same-prefix instances; never null.
1481 * On each list may be items all of exactly one prefix
1482 * or whose prefix hashes to the same value,
1483 * depending on how the prefix key value is constructed.
1484 * <p>
1485 * Empty lists (or those effectively empty because all the WeakReferences have been dropped)
1486 * will be purged (ie empty lists are not retained).
1487 * <p>
1488 * The lists values are of type LinkedList, with 'best' items first.
1489 * <p>
1490 * (In future, where a list is of length 1 and where the maximum length is currently 1,
1491 * a list may be stored as a singleton for efficiency.)
1492 * <p>
1493 * We hope for a reasonable minimum size to have a sensible chance of any match at all
1494 * across a large text base.
1495 * <p>
1496 * We size the maximum based on total heap capacity,
1497 * and allow everything to be discarded in case of extreme memory stress.
1498 */
1499 private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> prefixStore =
1500 MemoryTools.SimpleLRUMapAutoSizeForHitRate.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1501 0, computeCacheMaxSize(), "prefixStore");
1502
1503 /**The lookup from suffix key to list of candidate same-suffix instances; never null.
1504 * Lighter-weight than the prefixStore, and smaller, since suffix matching is a secondary activity.
1505 */
1506 private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> suffixStore =
1507 MemoryTools.SimpleProbabilisticCache.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1508 computeCacheMaxSize() >> 2, // Smaller heap for suffixes.
1509 "suffixStore");
1510
1511 /**Compute maximum (prefix) cache size, partly based on heap size; strictly positive.
1512 * Aim to allow ~10000 entries per 1GB of heap, with a minimum of several hundred.
1513 */
1514 private static int computeCacheMaxSize()
1515 { return(Math.max(1<<9, (int) Math.min(1<<16, Runtime.getRuntime().totalMemory() >> 17))); }
1516
1517
1518 /**Returns a suitable prev (with a shared prefix/suffix) for the supplied non-null Name of any Name subclass, else null if none found.
1519 * May trim state, especially dead references.
1520 */
1521 synchronized Name findPrev(final CharSequence putativeName)
1522 {
1523 if(EXTRA_ASSERTS) { assert(null != putativeName); }
1524
1525 final int len = putativeName.length();
1526
1527 // Don't try for very short matches.
1528 if(len < MIN_TERMINUS_MATCH_LENGTH) { return(null); }
1529
1530 // Find and return the best match, if any.
1531 // If we find any prefix match then we don't try for a suffix match.
1532 for(final boolean doPrefix : new boolean[]{true, false})
1533 {
1534 // Choose which store to operate on.
1535 final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1536 doPrefix ? prefixStore : suffixStore;
1537 // Compute the correct key for the selected store.
1538 final TerminusKey key = (TerminusKey)
1539 (doPrefix ? computeIntegerPrefixKey(putativeName) : computeIntegerSuffixKey(putativeName));
1540
1541 // Get the existing list for this prefix if any...
1542 final LinkedList<WeakReference<Name>> list = store.get(key);
1543 // If no list then there is no available match.
1544 if(null == list) { return(null); } // Nothing found.
1545
1546 Name result = null; // Nothing found yet.
1547 int savedCharsBest = 0; // Best shared-characters length (prefix and suffix) found so far.
1548 WeakReference<Name> eBest = null; // Container for the best item.
1549 boolean first = true;
1550 for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1551 {
1552 final WeakReference<Name> e = it.next();
1553 final Name cachedName = e.get();
1554 // If we find a dead entry then zap it and continue...
1555 if(null == cachedName)
1556 {
1557 it.remove();
1558 continue;
1559 }
1560 // Find common prefix/suffix length: skip if zero.
1561 // Can be > len if sharable prefix and suffix overlap.
1562 final int savedChars = Name.computeCommonPrefixLength(putativeName, cachedName) + Name.computeCommonSuffixLength(putativeName, cachedName);
1563 if(savedChars == 0)
1564 { continue; }
1565 // If this is best so far than capture it.
1566 if(savedChars > savedCharsBest)
1567 {
1568 if(DEBUG /*|| !doPrefix*/) { System.out.println("***Name ad-hoc "+(doPrefix?"prefix":"suffix")+" match found "+((result==null) ? "" : "better ")+"match with shared chars "+savedChars); }
1569 result = cachedName;
1570 savedCharsBest = savedChars;
1571 if(!first) { eBest = e; }
1572 // If we already have a maximal match (no unshared characters) then stop looking.
1573 if(savedChars >= len) { break; }
1574 }
1575 }
1576 // Move best item to the front to mark it as 'best' (if not already there).
1577 if(null != eBest)
1578 {
1579 list.remove(eBest);
1580 list.addFirst(eBest);
1581 }
1582
1583 // Trim the list iff now too long (eg because we are now memory-stressed).
1584 trimListLength(list, doPrefix);
1585 // If the list is now empty then remove it entirely.
1586 if(list.isEmpty()) { store.remove(key); }
1587
1588 // If we found something then don't move on to the next store...
1589 if(result != null) { return(result); }
1590 }
1591
1592 return(null); // Nothing found...
1593 }
1594
1595 /**Offer a newly constructed Name instance to the cache; returns the value passed in.
1596 * Rejected if already at maximum chain length.
1597 * @return the input name unchanged
1598 */
1599 synchronized <N extends Name> N offerNewInstance(final N name)
1600 {
1601 if(null == name) { return(null); }
1602
1603 // Don't clutter our tables with very short values.
1604 if(name.length() < MIN_TERMINUS_MATCH_LENGTH) { return(name); }
1605
1606 // If too long to share termini with, espeically ad-hoc, then reject.
1607 if(name.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) { return(name); }
1608
1609 for(final boolean doPrefix : new boolean[]{true, false})
1610 {
1611 // Choose which store to operate on.
1612 final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1613 doPrefix ? prefixStore : suffixStore;
1614 // Compute the correct key for the selected store.
1615 final TerminusKey key = (TerminusKey)
1616 (doPrefix ? computeIntegerPrefixKey(name) : computeIntegerSuffixKey(name));
1617
1618 // Get the existing list for this prefix if any...
1619 LinkedList<WeakReference<Name>> list = store.get(key);
1620 // Create (and store) a new list if need be.
1621 if(null == list)
1622 {
1623 list = new LinkedList<WeakReference<Name>>();
1624 store.put(key, list);
1625 }
1626 // Trim out any dead items (ie weak references gone away).
1627 trimDeadRefs(list);
1628 // If something already equivalent to the supplied Name is on the list
1629 // (ie same content and concrete class)
1630 // then move it to the front (unless already at the front!)
1631 // rather than creating a new WeakReference instance
1632 // and/or filling up the list with duplicates for example.
1633 boolean foundNameAlreadyOnList = false;
1634 boolean first = true;
1635 for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1636 {
1637 final WeakReference<Name> e = it.next();
1638 final Name cachedName = e.get();
1639 if(name.equals(cachedName))
1640 {
1641 foundNameAlreadyOnList = true;
1642 if(!first)
1643 {
1644 if(DEBUG) { System.out.println("Name.AHPC: moved up to first position: "+name); }
1645 it.remove(); // Remove from this position...
1646 list.addFirst(e); // Re-insert at start (without constructing a new WeakRef).
1647 }
1648 else { if(DEBUG) { System.out.println("Name.AHPC: found already in first position: "+name); } }
1649 break;
1650 }
1651 }
1652 // Else stuff us on the front of the list as 'newest' and possibly best.
1653 if(!foundNameAlreadyOnList)
1654 {
1655 list.addFirst(new WeakReference<Name>(name));
1656 if(DEBUG) { System.out.println("Name.AHPC: inserting new at first position: "+name); }
1657 }
1658
1659 // Trim the list iff now too long.
1660 trimListLength(list, doPrefix);
1661 }
1662
1663 return(name); // Return the input name.
1664 }
1665
1666 /**Trim dead references from the supplied non-null list. */
1667 private void trimDeadRefs(final List<WeakReference<Name>> l)
1668 {
1669 for(final Iterator<WeakReference<Name>> it = l.iterator(); it.hasNext(); )
1670 {
1671 final WeakReference<Name> e = it.next();
1672 // Test if ref is alive and zap entry if not.
1673 // Note that get() doesn't keep ref artificially alive unlike for SoftReference...
1674 if(null == e.get())
1675 { it.remove(); }
1676 }
1677 }
1678
1679 /**Trim the supplied non-null list to size if too long. */
1680 private void trimListLength(final LinkedList<WeakReference<Name>> l, final boolean isPrefix)
1681 {
1682 // Cannot be overlong unless longer than smallest permitted max length...
1683 if(l.size() > STRESSED_LIST_LEN)
1684 {
1685 final int targetLen = getMaxListLen(isPrefix);
1686 while(l.size() > targetLen)
1687 { l.removeLast(); }
1688 }
1689 }
1690
1691 /**Maximum list length under normal circumstances; strictly positive.
1692 * Borrowing from GZIP '-6'/default experience a good value might be ~6.
1693 */
1694 private static final int MAX_LIST_LEN = 6;
1695
1696 /**Maximum secondary list length under normal circumstances; strictly positive.
1697 * Since suffix matching is secondary goal this is shorter than the general limit
1698 * but should still probably be greater than 1.
1699 */
1700 private static final int MAX_SECONDARY_LIST_LEN = ((MAX_LIST_LEN >> 1) | 1);
1701
1702 /**Maximum list length under memory stress; strictly positive, less than MAX_LIST_LEN.
1703 * This will do a less good job of finding matches (ie will probably do less compaction)
1704 * but will consume less memory in the ad-hoc cache.
1705 * <p>
1706 * Under stress, any entries we touch are capped to this length,
1707 * thus allowing us to gracefully and incrementally free space.
1708 * <p>
1709 * Borrowing from GZIP '-1/'-fast' experience a good value might be 1.
1710 */
1711 private static final int STRESSED_LIST_LEN = 1;
1712
1713 /**Maximum same-prefix list length; strictly positive.
1714 * This will drop to a smaller measure if memory is under stress.
1715 */
1716 /* private */ int getMaxListLen(final boolean isPrefix)
1717 { return(MemoryTools.isMemoryStressed() ? STRESSED_LIST_LEN : (isPrefix ? MAX_LIST_LEN : MAX_SECONDARY_LIST_LEN)); }
1718 }
1719
1720 /**Get instance of AdHocPrefixCache; never null. */
1721 private synchronized static final AdHocPrefixCache<Integer> getAdHocPrefixCache()
1722 {
1723 // If cache has not been discarded then return it.
1724 final SoftReference<AdHocPrefixCache<Integer>> cache = _adHocPrefixCache;
1725 AdHocPrefixCache<Integer> result = null;
1726 if((null != cache) && (null != (result = cache.get())))
1727 { return(result); }
1728 // Construct and cache and return a new instance.
1729 result = new AdHocPrefixCache<Integer>();
1730 _adHocPrefixCache = new SoftReference<AdHocPrefixCache<Integer>>(result);
1731 return(result);
1732 }
1733
1734 /**Memory-sensitive cache of Name instances indexed by fixed-length prefix and suffix; initially null, not thereafter.
1735 * Borrows a few techniques from GZIP-like algorithms.
1736 * <p>
1737 * This uses Integer as a key for up to 4 bytes of prefix/suffix lookup.
1738 * <p>
1739 * Only accessed under the class lock, by getAdHocPrefixCache().
1740 * <p>
1741 * Held via a StaticReference to allow the entire cache to be discarded automatically
1742 * under extreme memory stress.
1743 * <p>
1744 * Accessed under the class lock.
1745 */
1746 private static SoftReference<AdHocPrefixCache<Integer>> _adHocPrefixCache;
1747
1748 /**Case-insensitive Comparator for Name equivalent to TextUtils.CASE_INSENSITIVE_ORDER; not null.
1749 * May be able to operate more efficiently than generic TextUtils.CASE_INSENSITIVE_ORDER.
1750 */
1751 public static final Comparator<Name> CASE_INSENSITIVE_ORDER = new Comparator<Name>(){
1752 public int compare(final Name n1, final Name n2)
1753 { return(TextUtils.CASE_INSENSITIVE_ORDER.compare(n1, n2)); }
1754 };
1755
1756 /**Set true for extra assert()s for internal invariants, usually only in non-release code. */
1757 private static final boolean EXTRA_ASSERTS = IsDebug.isDebug;
1758
1759 /**Set true for extra debug/development logging output. */
1760 private static final boolean DEBUG = false;
1761 }