001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.io.IOException;
032 import java.io.InvalidObjectException;
033 import java.io.ObjectInputStream;
034 import java.io.ObjectInputValidation;
035 import java.io.ObjectOutputStream;
036 import java.io.ObjectStreamException;
037 import java.io.Serializable;
038 import java.lang.ref.SoftReference;
039 import java.lang.ref.WeakReference;
040 import java.security.MessageDigest;
041 import java.security.NoSuchAlgorithmException;
042 import java.util.Comparator;
043 import java.util.Iterator;
044 import java.util.LinkedList;
045 import java.util.List;
046 import java.util.Set;
047
048 import org.hd.d.pg2k.svrCore.TextUtils.CharSequence7Bit;
049 import org.hd.d.pg2k.svrCore.TextUtils.CharSequence8Bit;
050 import org.hd.d.pg2k.svrCore.collections.LRUMapAutoSizeForHitRate;
051 import org.hd.d.pg2k.svrCore.collections.SimpleProbabilisticCache;
052
053 import ORG.hd.d.IsDebug;
054
055 // TODO: Allow sharing of the underlying byte[] between any two non-prev equal-content Name types.
056
057 /**Compact immutable in-memory representation of a (usually-short, sharable-prefix/suffix) 8-bit ASCII String.
058 * This can share a common prefix and suffix with another instance,
059 * avoiding having to store the shared elements itself.
060 * <p>
061 * This is particularly suitable for densely-packed sorted lists of values,
062 * such as sets of exhibit names which with tend to have by necessity long shared prefixes.
063 * <p>
064 * This is suitable for names but also other usually-short 8-bit text values,
065 * in particular those that don't have the sort of internal structure and repetition
066 * that Compact7BitString can take advantage of.
067 * <p>
068 * This supports random access reasonably efficiently
069 * (eg through the CharSequence interface)
070 * but some elements will be slower to access than others.
071 * (chars at either end will tend to be the slowest to access.)
072 * <p>
073 * Converting to and from this form should be relatively resource-light,
074 * eg with low CPU and memory requirements,
075 * though all instances are automatically intern()ed which has some cost.
076 * <p>
077 * Where there is a conflict between memory consumption and CPU,
078 * this implementation is biased to minimise memory use at the possible expense of CPU.
079 * <p>
080 * Internally this uses a byte[] to store the data
081 * (a basic halving of in-memory size in most JVMs),
082 * and use of a common prefix with a similar
083 * (nominally lexically previous close value)
084 * should help further trim memory use.
085 * <p>
086 * (This implementation internally attempts to minimise the number of objects kept live
087 * and that need to be traversed to complete construction.)
088 * <p>
089 * These classes are not publicly constructible in order to help control instance count,
090 * and indeed may use MemoryTools.intern() to eliminate duplicates.
091 * <p>
092 * This uses an internal cache to attempt to find ad-hoc prefix matches with existing instances
093 * if no 'prev' value is offered during creation of a new instance.
094 * This cache is memory-sensitive and will be silently sacrificed
095 * if the system is short of memory.
096 * <p>
097 * Note that MemoryTools.intern() may not release a 'duplicate'
098 * if there remain other instances hanging from it.
099 * <p>
100 * This is not final though all its constructors are private
101 * to allow only inner-class derived types in order to preserve immutability.
102 * <p>
103 * As this is performance-critical code, and may have to run on slow/non-JIT JVMs,
104 * most assert()s that check purely-internally-imposed invariants
105 * are entirely eliminated from release code;
106 * those that might still be triggered by runtime issues external to this class,
107 * such as run-time presence of hashing algorithms,
108 * remain present to enabled at runtime if required.
109 */
110 public class Name implements CharSequence8Bit, Comparable<Name>,
111 Serializable, ObjectInputValidation,
112 MemoryTools.Internable // Not currently supporting InternableRank
113 {
114 /**Create a Name instance; never null unless input is null.
115 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
116 * <p>
117 * If passed a null this returns a null.
118 * <p>
119 * If passed a Name (but not including sub-classes) this returns it untouched.
120 * @param text the 8-bit text to store; if null then null is returned
121 */
122 public static Name create(final CharSequence text)
123 { return(create(text, null)); }
124
125 /**Create a Name instance with an optional previous value to attempt to share a prefix with; never null unless input is null.
126 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
127 * <p>
128 * If passed a null this returns a null.
129 * <p>
130 * If passed a Name (but not including sub-classes) this returns it untouched.
131 *
132 * @param text the 8-bit text to store; if null then null is returned
133 * @param prev previous value with which to attempt to share a prefix; null if none
134 */
135 public static Name create(final CharSequence text,
136 final Name prev)
137 {
138 if(null == text) { return(null); }
139
140 if(Name.class.equals(text.getClass()))
141 {
142 // Optimisation: if this is already a Name (and not a sub-class) then return it as-is
143 // if no prev has been offered (that might share a longer prefix).
144 if(prev == null) { return((Name) text); }
145 // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
146 else if(prev.contentEquals(text)) { return(prev); }
147 }
148
149 final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
150 // If no prev was specified we look for one in the ad-hoc cache.
151 final Name prevToUse;
152 if(prev != null) { prevToUse = prev; }
153 else
154 {
155 // If we find an exact match with the right class in the ad-hoc cache then return it.
156 prevToUse = adHocPrefixCache.findPrev(text);
157 if((prevToUse != null) && (Name.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(text))
158 { return(prevToUse); }
159 }
160 // Return intern()ed value to eliminate accidental duplicates.
161 // Pass it through the ad-hoc prefix cache too...
162 final Name result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new Name(text, prevToUse)));
163 if(EXTRA_ASSERTS) { assert(Name.class.equals(result.getClass())); }
164 return(result);
165 }
166
167 /**Attempts to create a Name instance, but falls back to returning an intern()ed String instance otherwise.
168 * This copes with possible non-8-bit values by returning a String rather than a Name.
169 * <p>
170 * Essentially this returns the most memory-efficient representation reasonably possible.
171 *
172 * @return null if text is null, else Name if 8-bit text, else intern()ed String instance
173 */
174 public static CharSequence createOrStringFallback(final CharSequence text, final Name prev)
175 {
176 try { return(create(text, prev)); }
177 catch(final IllegalArgumentException e) { return(MemoryTools.intern(text.toString())); }
178 }
179
180 /**Shared empty instance; not null. */
181 public static final Name EMPTY = create("");
182
183 /**Maximum 'prev' chain length that we will support; non-negative.
184 * Limits the amount of recursion and CPU involved in processing 'nested'/'chained' prefixes,
185 * at the possible expense of some lost compression (ie memory savings).
186 * <p>
187 * This limit also helps avoid blocking GC through overly tangled and straggly data structures.
188 * <p>
189 * Usually positive and in the range (say) 1 to 32 to allow compression,
190 * though higher values also hurt things such as sorting
191 * which forces lots of chain-chasing while comparing prefixes.
192 * <p>
193 * As at 2009/08/02 the maximum actual value when creating fresh ExhibitFull values is 18,
194 * ie 18 is sufficient to yield maximum compression/sharing.
195 * <p>
196 * We may need to raise or lower this limit even though serialised data already exists,
197 * so Name instances are gently coerced to this limit where necessary when deserialised.
198 * <p>
199 * With the 20100621 AEP/AEID name set (29531 names) run against code as at 20120312,
200 * adjusting MAX_PREV_CHAIN_LENGTH results in the following testNameOnAEP() output:
201 * <ul>
202 * <li>0: serialised in-order compact representation array bytes raw | compressed: 4086042 | 286207</li>
203 * <li>4: serialised in-order compact representation array bytes raw | compressed: 2936877 | 285002</li>
204 * <li>8: serialised in-order compact representation array bytes raw | compressed: 1477484 | 271888</li>
205 * <li>9: serialised in-order compact representation array bytes raw | compressed: 1296150 | 267751</li>
206 * <li>11: serialised in-order compact representation array bytes raw | compressed: 1155096 | 262664</li>
207 * <li>12: serialised in-order compact representation array bytes raw | compressed: 1058893 | 260514</li>
208 * <li>13: serialised in-order compact representation array bytes raw | compressed: 1080953 | 259716</li>
209 * <li>14: serialised in-order compact representation array bytes raw | compressed: 1043707 | 259408</li>
210 * <li>17+: serialised in-order compact representation array bytes raw | compressed: 1042888 | 259163</li>
211 * </ul>
212 * Using 8 the serialised form fails to be smaller than using native Strings, when compressed:
213 * "Serialised in-order String array bytes raw | compressed: 3752966 | 271266",
214 * and note the strange behaviour at 13 which results in a larger non-compressed output than at 12.
215 */
216 private static final int MAX_PREV_CHAIN_LENGTH = 14;
217
218 /**If true, always try to search back through prev values for an as-good or better match.
219 * This in principle allows less redundant intermediate values to be retained in memory
220 * and postpones any forced truncation of the chain.
221 */
222 private static final boolean ALWAYS_LOOK_FOR_EARLIER_PREV = true;
223
224 /**Number of bits for prefix length (unsigned) value.
225 * Should usually be greater than the number of suffix bits,
226 * since the primary compaction mechanism is prefix sharing.
227 * <p>
228 * 9 is needed for maximal compression on 20090707 test set,
229 * but compression at 8 is very close.
230 * <p>
231 * NOTE: cannot be changed without preventing deserialisation of old data.
232 */
233 private static final int PREFIX_BITS = 9;
234
235 /**Mask for prefix bits (at bottom of word).
236 * Broken out to help pre-JIT execution performance.
237 */
238 private static final int PREFIX_MASK = ((1<<PREFIX_BITS)-1);
239
240 /**Number of bits for suffix length (unsigned) value.
241 * This uses whatever bits are left after prefix bits have been taken
242 * eg from 16 bits in a shared short.
243 */
244 private static final int SUFFIX_BITS = 16 - PREFIX_BITS;
245
246 /**Mask for suffix bits (at top of word).
247 * Broken out to help pre-JIT execution performance.
248 */
249 private static final int SUFFIX_MASK = ((1<<SUFFIX_BITS)-1);
250
251 /**Max shared prefix length in characters; strictly positive.
252 * An actual match greater than this will be capped to this.
253 */
254 private static final int MAX_PREFIX_LENGTH = PREFIX_MASK;
255
256 /**Max shared prefix length in characters; strictly positive.
257 * An actual match greater than this will be capped to this.
258 */
259 private static final int MAX_SUFFIX_LENGTH = SUFFIX_MASK;
260
261 /**Minimum terminus match length; strictly positive.
262 * This helps eliminate completely spurious linkages that may hurt GC
263 * and trail rubbish in the serialised form of such spuriously-linked values.
264 * <p>
265 * We won't share very short prefixes/suffixes.
266 * <p>
267 * We avoid matches of "" (that should be dealt with by intern()) because this is >0.
268 * We should probably avoid picking up single letters such as trailing full-stops, ie with this is >1.
269 * We should probably allow everything of at least length 3 or 4
270 * (such as ".au" or ".jpg" for example)
271 * which is probably roughly the difference in bytes in a serialised stream
272 * of adaptively encoding a null vs a non-null prev pointer.
273 */
274 private static final int MIN_TERMINUS_MATCH_LENGTH = 3;
275
276 /**Minimum prefix/suffix total share length to use a prev; strictly positive.
277 * If 1 then we always try to use a 'prev' when there is even one character matching,
278 * but this may entrain lots of 'dead wood' that might otherwise be garbage.
279 * <p>
280 * A higher value may ultimately result in lower memory consumption
281 * (and no loss of compactness with or without compression of the serialised form)
282 * though with some loss in the headline immediate 'characters saved' figure.
283 * <p>
284 * This does not need to be enforced on (old) deserialised data,
285 * since for example intern() may often resolve such issues silently,
286 * and in any case it is not a correctness issue.
287 * <p>
288 * For consistency we line this up with the minimum prefix/suffix match length.
289 */
290 private static final int MIN_SHARE_CHARS = MIN_TERMINUS_MATCH_LENGTH;
291
292 /**If non-zero then the minimum number of characters in any initial prefix share block; non-negative.
293 * This is to try to avoid initial sequences of only one or two chars/bytes
294 * causing especially deep and inefficient (logical) recursion.
295 */
296 private static final int MIN_FIRST_BLOCK_CHARS = MIN_TERMINUS_MATCH_LENGTH;
297
298 /**Create a new instance with a previous value to attempt to share a prefix with.
299 * Note that the 'prev' value may be any sub-type of Name.
300 * <p>
301 * Private in order to better manage instance control.
302 *
303 * @param text the 8-bit text to store and must not equal the prev; never null but can be empty
304 * @param putativePrev non-empty previous value with which to attempt to share a prefix; null if none
305 */
306 private Name(final CharSequence text, final Name putativePrev)
307 {
308 if(EXTRA_ASSERTS) { assert(null != text) : "factory method should already have screened out null text value"; }
309 // Verify that all text is 8-bit if not already declared to be (and if fibbing no harm should actually happen).
310 final int l = text.length();
311 if(!(text instanceof CharSequence8Bit))
312 {
313 for(int i = l; --i >= 0; )
314 { if(text.charAt(i) > 255) { throw new IllegalArgumentException(); } }
315 }
316
317 this._length = l;
318
319 // Condition use of the prev value.
320 Name p = putativePrev;
321 // If the prev chain is already maximum length
322 // then try going back at least one step
323 // which may still be of some help with compression.
324 // We may in any case always look back for the oldest good-enough prev
325 // to help trim memory use and preserve elbow room.
326 //
327 // This treats suffix matching as a secondary opportunistic task
328 // and we'll optimise any prefix by preference
329 // unless the initial prefix is shorter than the initial suffix.
330 if(null != p)
331 {
332 final int initialPrefixMatchLength = computeCommonPrefixLengthPossibleMinInitialBlock(text, p);
333 final int initialSuffixMatchLength = computeCommonSuffixLength(text, p);
334
335 // Abandon it now now if the offered prev has no shared termini with us!
336 if((initialPrefixMatchLength == 0) && (initialSuffixMatchLength == 0))
337 { p = null; }
338 else
339 {
340 if(ALWAYS_LOOK_FOR_EARLIER_PREV)
341 {
342 // Prefer prefix matching if better or same as suffix length.
343 if(initialPrefixMatchLength >= initialSuffixMatchLength)
344 {
345 // Attempt to quickly find at-least-as-good recent prev
346 // when there is a prefix match available.
347 while((p != null) && (p.prev != null) &&
348 (computeCommonPrefixLengthPossibleMinInitialBlock(text, p.prev) >= initialPrefixMatchLength))
349 { p = p.prev; }
350 }
351 else
352 {
353 // If no prefix match was available
354 // then try to find the shortest chain while retaining the current prefix.
355 while((p != null) && (p.prev != null) &&
356 (computeCommonSuffixLength(text, p.prev) >= initialSuffixMatchLength))
357 { p = p.prev; }
358 }
359 }
360
361 final int ppCL = p.getPrevChainLength();
362 if(EXTRA_ASSERTS) { assert(ppCL >= 0); }
363 if(EXTRA_ASSERTS) { assert(ppCL <= MAX_PREV_CHAIN_LENGTH); }
364 if(ppCL >= MAX_PREV_CHAIN_LENGTH)
365 {
366 if(DEBUG) { System.out.println("limiting chain length"); }
367 // Go back at least one step; further if it doesn't hurt prefix match length much.
368 // This should give headroom to build a new useful near-maximal-length prefix.
369 final int targetMatchLength = Math.max(initialPrefixMatchLength / 2, MIN_SHARE_CHARS);
370 do { p = p.prev; }
371 while((p != null) && (p.prev != null) &&
372 ((p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) ||
373 (computeCommonPrefixLength(text, p.prev) >= targetMatchLength)));
374 }
375 // Ensure that any prev that we now have has a short enough chain length to tag on to.
376 if(EXTRA_ASSERTS) { assert((p == null) || (p.getPrevChainLength() < MAX_PREV_CHAIN_LENGTH)); }
377 }
378 }
379 // Find how much of prev is actually in common with the new text.
380 int sharedPrefixLen = (null == p) ? 0 : computeCommonPrefixLength(text, p);
381 // Only store/use any putative prev if there is in fact some common prefix/suffix string.
382 if(null != p)
383 { p = ((sharedPrefixLen > 0) || (computeCommonSuffixLength(text, p) > 0)) ? p : null; }
384 if(EXTRA_ASSERTS) { assert((sharedPrefixLen == 0) || (p != null)); } // No shared prefix possible without a non-null prev.
385 if(EXTRA_ASSERTS) { assert(getPrevChainLength() <= MAX_PREV_CHAIN_LENGTH); }
386
387 // Now extract any matching suffix from the tail residue after extracting a maximal prefix.
388 int tailLen = l - sharedPrefixLen;
389 int residualSharedSuffixLen = (null == p) ? 0 : computeCommonSuffixLength(text.subSequence(sharedPrefixLen, l), p.subSequence(sharedPrefixLen, p._length));
390 final int putativeSharedLength = sharedPrefixLen + residualSharedSuffixLen;
391
392 // If not enough shared length to justify a link to prev
393 // then avoid any such sharing...
394 if(putativeSharedLength < MIN_SHARE_CHARS)
395 {
396 p = null; // Reset to no sharing.
397 sharedPrefixLen = 0; // Reset to no sharing.
398 residualSharedSuffixLen = 0; // Reset to no sharing.
399 tailLen = l; // Reset to no sharing.
400 }
401
402 // No record the sharing that we are going to use.
403 prev = p;
404 // Now pack the termini lengths in together.
405 terminiLengths = (short) (sharedPrefixLen | (residualSharedSuffixLen << PREFIX_BITS));
406 // Set up the fast-access shadow prefix-length value.
407 _prefixLen = (short) sharedPrefixLen;
408
409 if(EXTRA_ASSERTS) { assert(getPrefixLen() == sharedPrefixLen) /* : "getPrefixLen()="+getPrefixLen()+", sharedPrefixLen="+sharedPrefixLen */ ; }
410 if(EXTRA_ASSERTS) { assert(getSuffixLen() == residualSharedSuffixLen); }
411 // We can't claim more shared chars than exist in the 'prev' value!
412 if(EXTRA_ASSERTS) { assert(sharedPrefixLen + residualSharedSuffixLen <= ((p == null) ? 0 : p._length)) : ("text="+text+", prev="+p); }
413
414 // Compute size of middle (unshared) portion.
415 final int midLen = tailLen - residualSharedSuffixLen;
416
417 if(EXTRA_ASSERTS) { assert(midLen >= 0); }
418 if(EXTRA_ASSERTS) { assert(sharedPrefixLen + midLen + residualSharedSuffixLen == text.length()); }
419
420 if(midLen == 0)
421 {
422 // Avoid any allocation for the tail if no middle bytes.
423 mid = EMPTIES.EMPTY_MID;
424 }
425 // Usual case: copy the non-shared middle bytes.
426 else
427 {
428 final byte t[] = new byte[midLen];
429 for(int i = midLen; --i >= 0; )
430 { t[i] = (byte) text.charAt(i + sharedPrefixLen); }
431 mid = t;
432 }
433
434 if(DEBUG) { System.out.println("prefixLen|midLen|suffixLen = " + sharedPrefixLen+"|"+midLen+"|"+residualSharedSuffixLen + " from "+text.subSequence(0, sharedPrefixLen)+"|"+text.subSequence(sharedPrefixLen, sharedPrefixLen+midLen)+"|"+text.subSequence(sharedPrefixLen+midLen, text.length())+", prevChainLength = " + getPrevChainLength()); }
435 //if(DEBUG && (sharedPrefixLen == 0) && (residualSharedSuffixLen != 0)) { System.out.println("***Shared-suffix-only (suffix len "+residualSharedSuffixLen+") Name instance created: "+text); }
436
437 if(EXTRA_ASSERTS) { assert((prev == null) || (sharedPrefixLen + residualSharedSuffixLen >= MIN_SHARE_CHARS)); }
438
439 // Verify what we've been given.
440 try { validateObject(); }
441 catch(final InvalidObjectException e)
442 { throw new IllegalArgumentException(e); }
443 }
444
445 /**Compute the shared/common prefix length forced to zero if initial contiguous block not large enough.
446 * A minimum initial contiguous prefix may be insisted on for efficiency,
447 * by heuristically reducing (logical) recursion and increasing the chunk size that can be worked on,
448 * at least at the very start of a Name, eg for when doing equals()/compareTo() and the like,
449 * possibly at a slight cost in compression effectiveness.
450 */
451 private static int computeCommonPrefixLengthPossibleMinInitialBlock(final CharSequence text, final Name p)
452 {
453 final int sharedPrefixLen = computeCommonPrefixLength(text, p);
454 if(MIN_FIRST_BLOCK_CHARS > 0)
455 {
456 if(sharedPrefixLen < MIN_FIRST_BLOCK_CHARS) { return(0); }
457 if(byteAtInner(p, 0) < MIN_FIRST_BLOCK_CHARS) { return(0); }
458 }
459 return(sharedPrefixLen);
460 }
461
462 /**Compute the shared/common prefix length. */
463 private static int computeCommonPrefixLength(final CharSequence text, final Name p)
464 {
465 int sharedPrefixLen = 0;
466 final int l = Math.min(Math.min(text.length(), p._length), MAX_PREFIX_LENGTH);
467 for(int i = 0; i < l; ++i, ++sharedPrefixLen)
468 { if(p.charAt(i) != text.charAt(i)) { break; } }
469 if(EXTRA_ASSERTS) { assert(sharedPrefixLen >= 0); }
470 return(sharedPrefixLen);
471 }
472
473 /**Compute the shared/common suffix length.
474 * Note that we may have to match against some tail of the prev text
475 * (to avoid overlapping prefix/suffix parts)
476 * and thus the p parameter is the more general CharSequence.
477 */
478 private static int computeCommonSuffixLength(final CharSequence text, final CharSequence p)
479 {
480 int sharedSuffixLen = 0;
481 final int tLen = text.length();
482 final int pLen = p.length();
483 final int l = Math.min(Math.min(tLen, pLen), MAX_SUFFIX_LENGTH);
484 for(int i = 0; i < l; ++i, ++sharedSuffixLen)
485 { if(p.charAt(pLen - i - 1) != text.charAt(tLen - i - 1)) { break; } }
486 if(EXTRA_ASSERTS) { assert(sharedSuffixLen >= 0); }
487 if(EXTRA_ASSERTS) { assert((sharedSuffixLen == l) || (p.charAt(pLen - sharedSuffixLen - 1) != text.charAt(tLen - sharedSuffixLen - 1))) : "returned match must be maximal length"; }
488 return(sharedSuffixLen);
489 }
490
491 /**Cache of the computed hash code.
492 * Not part of the permanent/serialised state of the object since easy to recompute,
493 * but this is called so often by intern(), etc, that it is worth storing
494 * given that typical texts will be long.
495 */
496 private transient int hash; // Default to 0
497
498 /**The hash is computed over the entire text as for String.
499 * This does not depend on how the data is internally stored, eg using a prefix or not.
500 */
501 @Override
502 public final int hashCode()
503 {
504 int h = hash;
505 if(h == 0) // Probably not yet computed, so do so now.
506 {
507 // Hash needs to be (re)computed.
508 final int len = _length;
509 for(int i = 0; i < len; i++)
510 { h = 31*h + charAt(i); }
511 hash = h; // Cache the result.
512 }
513 return(h);
514 }
515
516 /**Equality depends on the items being the same concrete (derived) type and lexically identical.
517 * This does not depend on how the data is internally stored, eg using a prefix or not.
518 * Specifically designed to work for derived classes too by default.
519 */
520 @Override
521 public final boolean equals(final Object obj)
522 {
523 if(this == obj) { return(true); }
524 if(null == obj) { return(false); }
525 if(getClass() != obj.getClass()) { return(false); }
526 //return(TextUtils.contentEquals(this, (Name) obj));
527 final Name o = (Name) obj;
528 final int l = length();
529 if(o._length != l) { return(false); }
530
531 final int[] args = new int[2]; // Allocate just once to minimise heap churn.
532 for(int i = 0; i < l; )
533 {
534 // Do this in chunks for efficiency...
535 final byte[] raw1 = bytesAtInner(this, i, args);
536 final int offset1 = args[0];
537 final int length1 = args[1];
538 final byte[] raw2 = bytesAtInner(o, i, args);
539 final int offset2 = args[0];
540 final int length2 = args[1];
541
542 final int ll = Math.min(length1, length2);
543 for(int j = 0; j < ll; ++j)
544 { if(raw1[offset1 + j] != raw2[offset2 + j]) { return(false); } }
545
546 i += ll;
547 }
548
549 return(true); // Identical content.
550 }
551
552 /**True iff this represents the same sequence of char values as the specified sequence.
553 * @param other the sequence to compare this against; never null.
554 *
555 * @return {@code true} if this represents the same
556 * sequence of char values as the specified sequence,
557 * {@code false} otherwise
558 */
559 public final boolean contentEquals(final CharSequence8Bit o)
560 { return(TextUtils.contentEquals(this, o)); }
561
562 /**True iff this represents the same sequence of char values as the specified sequence.
563 * @param other the sequence to compare this against; never null.
564 *
565 * @return {@code true} if this represents the same
566 * sequence of char values as the specified sequence,
567 * {@code false} otherwise
568 */
569 public final boolean contentEquals(final CharSequence o)
570 { return(TextUtils.contentEquals(this, o)); }
571
572
573 /**Non-empty item with which we share a prefix and/or suffix, or null if entire value is empty (equivalent to "") or if there is no shared part(s).
574 * May be non-empty if this new value is a prefix/suffix of the prev value.
575 * <p>
576 * Is protected since potentially to derived instances.
577 */
578 protected final Name prev;
579
580 /**Termini lengths (unsigned) packed into a single value for efficiency.
581 * The prefix bits are the low-order bits.
582 * <p>
583 * This is part of the compact serialised state.
584 */
585 private final short terminiLengths;
586
587 /**This is a logically-final shadow of the most-frequently accessed prefix length value; non-negative.
588 * This contains a copy of the prefix length.
589 * <p>
590 * Should not expand the memory footprint of an instance
591 * because it should slot in the gap left by the terminiLengths short value.
592 * <p>
593 * This is not part of the serialised state.
594 */
595 private transient /* final */ short _prefixLen;
596
597 /**This is a logically-final shadow computed value for the very-frequently accessed length; non-negative.
598 * This is not part of the serialised state.
599 */
600 private transient /* final */ int _length;
601
602 /**Middle portion of value, or a constant empty value iff there are no middle bytes; never null.
603 * We don't use default serialisation for this:
604 * it is written on the wire as raw bytes (if non-empty, else not at all)
605 * following a one-byte (positive) length if <= 127 chars,
606 * else a 4-byte (negated) length to be able to represent maximal-size CharSequences.
607 */
608 private transient /* final */ byte[] mid;
609
610 /**Used to hold values that may be needed before Name's own static initialisation can have completed.
611 * This can happen with circular initialisation dependencies.
612 */
613 private static final class EMPTIES
614 {
615 /**Constant/immutable/shareable empty mid value for any empty mid-section; never null. */
616 static final byte[] EMPTY_MID = new byte[0];
617 }
618
619 /**Chain depth, ie how deep our chain of prev values is, zero if this instance has no prev; non-negative.
620 * Can be used to help avoid getting silly tangles of non-GC-able prev chains,
621 * and indeed might help avoid situations that might attempt to create circular references.
622 * <p>
623 * Public for white-box testing only.
624 */
625 public final int getPrevChainLength()
626 {
627 int len = 0;
628 for(Name p = this; null != (p = p.prev); ++len) { }
629 return(len);
630 }
631
632 /**Returns number of internal contiguous blocks to cover all the text, fewer being better; non-negative.
633 * Intended for performance tuning/testing only; may not be a permanent nor supported part of the API.
634 */
635 public int getBlockCount()
636 {
637 int count = 0;
638 final int l = _length;
639 final int[] args = new int[2]; // Allocate just once to minimise heap churn.
640 for(int i = 0; i < l; )
641 {
642 ++count;
643 // Do this in chunks for efficiency...
644 final byte[] raw1 = bytesAtInner(this, i, args);
645 final int offset1 = args[0];
646 final int length1 = args[1];
647 assert(length1 > 0);
648 i += length1;
649 }
650 return(count);
651 }
652
653 /**Human-readable diagnostic of internal format; never null.
654 * Intended for performance tuning only; not a permanent nor supported part of the API.
655 */
656 public String showInternalStructure()
657 {
658 final StringBuilder sb = new StringBuilder(128);
659 final int l = _length;
660 sb.append("length=").append(l).append(", p=").append(_prefixLen).append(", s=").append(getSuffixLen()).
661 append(", depth=").append(getPrevChainLength()).
662 append(", prev=").append(System.identityHashCode(prev)).append("; ");
663 final int[] args = new int[2]; // Allocate just once to minimise heap churn.
664 for(int i = 0; i < l; )
665 {
666 // Do this in chunks for efficiency...
667 final byte[] raw1 = bytesAtInner(this, i, args);
668 final int offset1 = args[0];
669 final int length1 = args[1];
670 assert(length1 > 0);
671 sb.append("segment @").append(i).append(", length=").append(length1).append(", offset=").append(offset1).append(", ");
672 sb.append("text=>>>").append(new CS8Bit(raw1, offset1, offset1+length1)).append("<<<, ");
673 i += length1;
674 }
675 return(sb.toString());
676 }
677
678 /**Length of common prefix with prev item if any; non-negative.
679 * Only of minimal interest outside the class (for white-box testing) and may be withdrawn from the API.
680 */
681 public final int getPrefixLen()
682 {
683 return(_prefixLen); // return(terminiLengths & PREFIX_MASK); // Bottom bits.
684 }
685
686 /**Length of common suffix with prev item if any; non-negative.
687 * Only of minimal interest outside the class (for white-box testing) and may be withdrawn from the API.
688 */
689 public final int getSuffixLen()
690 {
691 return((terminiLengths >>> PREFIX_BITS) & SUFFIX_MASK); // Uses all remaining most-significant bits.
692 }
693
694 /**Convert to String containing same char sequence.
695 * We do not expensively intern() the result as it may be short-lived.
696 */
697 //@Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
698 @Override public String toString()
699 {
700 final int l = _length;
701 final char[] text = new char[l];
702 final int[] args = new int[2]; // Allocate just once to minimise heap churn.
703 for(int i = 0; i < l; )
704 {
705 // Do this in chunks for efficiency...
706 final byte[] raw1 = bytesAtInner(this, i, args);
707 final int offset1 = args[0];
708 final int length1 = args[1];
709 for(int j = 0; j < length1; ++j, ++i)
710 { text[i] = (char) (raw1[offset1 + j] & 0xff); }
711 }
712 return(new String(text));
713 }
714
715 /**Writes the Name as ASCII bytes to the given (non-null) byte array starting at the given offset.
716 * The array must be long enough (from the given offset) else an exception will be thrown.
717 * <p>
718 * Aims to be efficient (in CPU and memory footprint).
719 */
720 public final void writeToByteArray(final byte[] buf, final int offset)
721 {
722 if(null == buf) { throw new IllegalArgumentException(); }
723 if(offset < 0) { throw new IllegalArgumentException(); }
724 final int l = _length;
725 if(offset + l > buf.length) { throw new IllegalArgumentException(); }
726
727 final int[] args = new int[2]; // Allocate just once to minimise heap churn.
728 for(int i = 0; i < l; )
729 {
730 // Do this in chunks for efficiency...
731 final byte[] raw1 = bytesAtInner(this, i, args);
732 final int offset1 = args[0];
733 final int length1 = args[1];
734 System.arraycopy(raw1, offset1, buf, offset+i, length1);
735 i += length1;
736 }
737 }
738
739 /**Create private byte[] copy for caller; never null.
740 * Aims to be efficient (in CPU and memory footprint).
741 */
742 public final byte[] toByteArray()
743 {
744 final byte[] result = new byte[_length];
745 writeToByteArray(result, 0);
746 return(result);
747 }
748
749 /**At the given index returns the raw byte array and index and usable length for contiguous bytes from that index; never null.
750 * This gives direct access to underlying byte/character data in the largest lumps available,
751 * to facilitate more efficient multi-character operations (eg with less recursion).
752 * <p>
753 * This returns internal arrays/state which must not be passed outside the class.
754 *
755 * @param index must be within bounds for this Name; non-negative
756 * @param offsetAndLength 2-element array to return the offset (index 0) and length (index 1); never null
757 * @return non-null byte[] with in-bounds offset and positive length of usable character from that offset.
758 */
759 protected static byte[] bytesAtInner(Name name, int index, final int[] offsetAndLength)
760 {
761 int maxLen = name._length - index; // Maximum length that the returned segment can possibly be.
762 for( ; ; )
763 {
764 final short pLen = name._prefixLen;
765
766 // Drill into the shared prefix, if any.
767 if(index < pLen)
768 {
769 maxLen = Math.min(maxLen, pLen - index);
770 // if(EXTRA_ASSERTS) { assert(maxLen > 0); }
771 name = name.prev;
772 continue;
773 }
774
775 // Drill into the shared suffix, if any.
776 if(index >= pLen + name.mid.length)
777 {
778 // Recurse to fetch from further up the chain.
779 //final int length = sStart + name.getSuffixLen(); // length();
780 // if(EXTRA_ASSERTS) { assert(index < name._length); }
781 final int reverseIndex = name._length - index;
782 // if(EXTRA_ASSERTS) { assert(reverseIndex > 0); }
783 maxLen = Math.min(maxLen, reverseIndex);
784 // if(EXTRA_ASSERTS) { assert(maxLen > 0); }
785 name = name.prev;
786 index = name._length - reverseIndex;
787 continue;
788 }
789
790 // Point into the (unshared) mid text, which is nice and fast...
791 final int offset = index - pLen;
792 offsetAndLength[0] = offset;
793 final int len = name.mid.length - offset;
794 // if(EXTRA_ASSERTS) { assert(len > 0); }
795 offsetAndLength[1] = Math.min(maxLen, len);
796 // if(EXTRA_ASSERTS) { assert(maxLen > 0); }
797 return(name.mid);
798 }
799 }
800
801 /**Internal fast version of charAt()/byteAt() with args/bounds checking assumed done. */
802 protected static byte byteAtInner(Name name, int index)
803 {
804 for( ; ; )
805 {
806 final int pLen = name._prefixLen;
807
808 // Drill into the shared prefix, if any.
809 if(index < pLen)
810 {
811 name = name.prev;
812 continue;
813 }
814
815 // Drill into the shared suffix, if any.
816 if(index >= pLen + name.mid.length)
817 {
818 // if(EXTRA_ASSERTS) { assert(index < length); }
819 final int reverseIndex = name._length - index;
820 // if(EXTRA_ASSERTS) { assert(reverseIndex > 0); }
821 name = name.prev;
822 index = name._length - reverseIndex;
823 continue;
824 }
825
826 // Extract from the (unshared) mid text.
827 return(name.mid[index - pLen]);
828 }
829 }
830
831 //@Override
832 public final byte byteAt(final int index)
833 {
834 // Bounds check for public interface.
835 if((index < 0) || (index >= _length)) { throw new IllegalArgumentException(); }
836 return(byteAtInner(this, index));
837 }
838
839 public char charAt(final int index)
840 //{ return((char) (byteAt(index) & 0xff)); }
841 {
842 // Bounds check for public interface.
843 if((index < 0) || (index >= _length)) { throw new IllegalArgumentException(); }
844 return((char) (byteAtInner(this, index) & 0xff));
845 }
846
847 //@Override
848 public final int length()
849 //{ return(_prefixLen + mid.length + getSuffixLen()); }
850 { return(_length); }
851
852 /**Effectively-immutable light-weight wrapper around the underlying text.
853 * Can be intern()ed to help eliminate some expensive duplicates.
854 */
855 private static class CSWrapper<T extends Name> implements CharSequence8Bit,
856 Comparable<CharSequence>,
857 MemoryTools.Internable
858
859 {
860 /**Underlying instance; non-null.
861 * Definitely not intended to be serialised.
862 */
863 transient final T underlying;
864 /**Start offset (inclusive) on underlying impl; non-negative. */
865 final int start;
866 /**End offset (exclusive) on underlying impl; non-negative. */
867 final int end;
868
869 /**Create an instance.
870 * Only called from trusted classes so arg-checking can be by assert() to save release-build CPU cycles.
871 * @param underlying the Name instance that this provides a view of; never null
872 * @param start inclusive start index; in range [0,end]
873 * @param end exclusive end index; in range [start,underlying.length()]
874 */
875 CSWrapper(final T underlying, final int start, final int end)
876 {
877 if(EXTRA_ASSERTS)
878 {
879 assert(null != underlying);
880 assert(start >= 0);
881 assert(end <= underlying.length());
882 assert(start <= end);
883 }
884
885 this.underlying = underlying;
886 this.start = start;
887 this.end = end;
888 }
889
890 /* (non-Javadoc)
891 * @see java.lang.CharSequence#charAt(int)
892 */
893 //@Override
894 public char charAt(final int index)
895 { return((char) (byteAt(index) & 0xff)); }
896
897 //@Override
898 public byte byteAt(final int index)
899 {
900 if(index < 0) { throw new IllegalArgumentException(); }
901 if(index >= (end-start)) { throw new IllegalArgumentException(); }
902 return(underlying.byteAt(index + start));
903 }
904
905 /* (non-Javadoc)
906 * @see java.lang.CharSequence#length()
907 */
908 //@Override
909 public int length()
910 {
911 return(end-start);
912 }
913
914 /* (non-Javadoc)
915 * @see java.lang.CharSequence#subSequence(int, int)
916 */
917 //@Override
918 public CharSequence subSequence(final int start, final int end)
919 {
920 // We validate the parameters here since our constructor will not.
921 if(start < 0) { throw new IllegalArgumentException(); }
922 if(start > end) { throw new IllegalArgumentException(); }
923 if(end > length()) { throw new IllegalArgumentException(); }
924 if(EXTRA_ASSERTS) { assert(this.start + end <= underlying.length()); }
925 // Creates new wrapper around original underlying text for efficiency,
926 // ie not chained through this CSWrapper instance.
927 return(new CSWrapper<T>(underlying, this.start + start, this.start + end));
928 }
929
930 /**Equality checks for the same type and content. */
931 @Override public boolean equals(final Object obj)
932 {
933 if(this == obj) { return(true); }
934 if(!(obj instanceof CSWrapper)) { return(false); }
935 return(TextUtils.contentEquals(this, (CharSequence8Bit)obj));
936 }
937
938 /**The hash code is based on a fixed-time sampling of the content for speed. */
939 @Override public int hashCode()
940 {
941 final int l = length();
942 int h = l;
943 final int step = (l >>> 3);
944 for(int i = l; --i >= 0; i -= step) // Sample at least the last character.
945 { h = 37*h + charAt(i); }
946 h -= l;
947 return(h);
948 }
949
950 //@Override
951 public int compareTo(final CharSequence other)
952 { return(TextUtils.compare(this, other)); }
953
954 /**Convert to String containing same char sequence.
955 * We don't expensively intern() the result as it may be short-lived.
956 */
957 @Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
958
959 /**Create private byte[] copy for caller; never null. */
960 public final byte[] toByteArray()
961 {
962 final int length = length();
963 final byte[] result = new byte[length];
964 // Too hideous to fully optimise (yet) so do the simple way.
965 for(int i = length; --i >= 0; )
966 { result[i] = underlying.byteAt(i + start); }
967 return(result);
968 }
969 }
970
971 /**Maximum number of chars in a subsequence where a copy of the data in taken. */
972 private static final int MAX_TRIVIAL_COPY = 8;
973
974 /**Generate a light-weight wrapper, or for very small amounts of data, a copy; never null.
975 * Not necessarily Serializable.
976 */
977 //@Override
978 public final CharSequence8Bit subSequence(final int start, final int end)
979 {
980 if(start < 0) { throw new IndexOutOfBoundsException(); }
981 final int length = length();
982 if(end > length) { throw new IndexOutOfBoundsException(); }
983 if(start > end) { throw new IndexOutOfBoundsException(); }
984
985 final int newLength = end - start;
986
987 // If this is the whole existing value then return it.
988 if((start == 0) && (length == newLength)) { return(this); }
989
990 // In trivial empty case return fixed empty value.
991 if(newLength == 0) { return(EMPTY); }
992
993 // For trivial small subsequences,
994 // make a copy of the relevant data,
995 // eg where we wouldn't want to pin down the possibly-large original
996 // for something not even worthy of a pointer.
997 // We don't intern() so as to avoid high overheads.
998 if(newLength <= MAX_TRIVIAL_COPY)
999 { return(new CS8Bit(this, start, end)); }
1000
1001 // TODO: optimisation: if this is a prefix then return as a prefix instance.
1002
1003 // GENERAL CASE: make a light-weight wrapper.
1004 return(new CSWrapper<Name>(this, start, end));
1005 }
1006
1007
1008 /**Validate fields/state.
1009 * Called in the constructor and possibly after deserialising.
1010 * <p>
1011 * Barf if something bad is found.
1012 */
1013 public void validateObject()
1014 throws InvalidObjectException
1015 {
1016 // Test some invariants that duff external data should not be able to influence.
1017 if(EXTRA_ASSERTS) { assert(null != mid); }
1018 if(EXTRA_ASSERTS) { assert((mid.length != 0) || (mid == EMPTIES.EMPTY_MID)); }
1019 if(EXTRA_ASSERTS) { assert(_length >= mid.length); }
1020
1021 final int prefixLen = _prefixLen;
1022 if(EXTRA_ASSERTS) { assert(getPrefixLen() == _prefixLen); }
1023 final int suffixLen = getSuffixLen();
1024 if(EXTRA_ASSERTS) { assert((prefixLen >= 0) && (suffixLen >= 0)); }
1025 final int sharedLen = prefixLen + suffixLen;
1026 if(EXTRA_ASSERTS) { assert(sharedLen + mid.length == this._length); }
1027 if(prev != null)
1028 {
1029 if(sharedLen > prev._length)
1030 { throw new InvalidObjectException("bad object: prefix/suffix too long for prev: shared="+sharedLen+" vs prev.length="+prev._length); }
1031 if(EXTRA_ASSERTS) { assert(getPrevChainLength() >= 0); }
1032 if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
1033 { throw new InvalidObjectException("bad object: overlong prevChainLength: "+getPrevChainLength()); }
1034 // Prev must only be non-null if we have some sharing of prefix/suffix.
1035 // Note persisted values may have hash a different MIN_SHARE_CHARS at creation
1036 // so don't reject something simply because of 'not enough' sharing by current standards.
1037 if(sharedLen == 0)
1038 { throw new InvalidObjectException("bad object: invalid unused prev"); }
1039 }
1040 else
1041 {
1042 if(EXTRA_ASSERTS) { assert(getPrevChainLength() == 0); }
1043 if(sharedLen != 0)
1044 { throw new InvalidObjectException("bad object: missing prev needed for shared text"); }
1045 }
1046 if(Integer.MAX_VALUE - prefixLen - suffixLen < _length)
1047 { throw new InvalidObjectException("bad object: overlong components"); }
1048 }
1049
1050 /**Deserialise.
1051 * Expects validation to happen in readResolve().
1052 */
1053 private void readObject(final ObjectInputStream in)
1054 throws IOException, ClassNotFoundException
1055 {
1056 // Read in most of the fields in the usual way.
1057 in.defaultReadObject();
1058
1059 // Set up the fast-access shadow prefix-length value.
1060 final int pLen = (terminiLengths & PREFIX_MASK);
1061 this._prefixLen = (short) pLen;
1062
1063 // Read in the mid text with a variable-length 'length' prefix.
1064 // This mechanism also ensures that our 'mid' element is unshared.
1065 final byte l0 = in.readByte();
1066 final int midLen;
1067 if(l0 >= 0) { midLen = l0; }
1068 else
1069 {
1070 final byte l1 = in.readByte();
1071 final byte l2 = in.readByte();
1072 final byte l3 = in.readByte();
1073 midLen = -((l0 << 24) + ((l1 & 0xff) << 16) + ((l2 & 0xff) << 8) + (l3 & 0xff));
1074 }
1075 // Create and read the text field; don't create a new empty byte[] for size 0.
1076 if(midLen == 0) { mid = EMPTIES.EMPTY_MID; }
1077 else
1078 {
1079 mid = new byte[midLen];
1080 in.readFully(mid);
1081 }
1082
1083 // Set up the fast-access length value.
1084 this._length = pLen + midLen + getSuffixLen();
1085
1086 // IMPORTANT: this instance is now safe to use
1087 // but may not be fully valid (eg prev chain length may be too long).
1088 // such issues MUST be handled in read-resolve.
1089 }
1090
1091 /**Write out a minimally-redundant form of our internal information.
1092 * The more-efficient on-the-wire format also makes defensive
1093 * reading easier.
1094 */
1095 private void writeObject(final ObjectOutputStream oos)
1096 throws IOException
1097 {
1098 // Check if we're about to persist a bogus value.
1099 if(EXTRA_ASSERTS) { validateObject(); }
1100
1101 // Write out most of the fields in the usual way.
1102 oos.defaultWriteObject();
1103
1104 // Write the length in a variable-length format
1105 // (very short strings require fewer length bytes).
1106 // For very small strings use a byte-value length.
1107 if(mid.length <= Byte.MAX_VALUE) { oos.writeByte(mid.length); }
1108 // Write larger text sizes using a negative (int, 4-byte) length
1109 // so that reading the first byte will give a negative value.
1110 else { oos.writeInt(-mid.length); }
1111 // Now write the text bytes (if any) directly, unwrapped.
1112 if(0 != mid.length) { oos.write(mid); }
1113 }
1114
1115 /**Deserialise: validate this instance, coerce mildly-errant values, and eliminate duplicates.
1116 * Has to handle derived classes too.
1117 */
1118 protected Object readResolve()
1119 throws ObjectStreamException
1120 {
1121 // In the case where the prev chain length on the wire is too large for current settings,
1122 // dynamically generate a new valid replacement (same content, different/no prev) here.
1123 if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
1124 {
1125 if(IsDebug.isDebug) { System.err.println("WARNING: replacing deserialised Name item that has an overly-long chain: "+getPrevChainLength()+" vs "+MAX_PREV_CHAIN_LENGTH); }
1126 // Attempt to retain some of the sharing if possible.
1127 Name p;
1128 for(p = prev; (p != null) && (p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH); p = p.prev) { }
1129
1130 // FIXME: nasty interdependency between base and derived classes here...
1131 if(ExhibitFull.class.equals(getClass()))
1132 { return(ExhibitFull.create(new CS8Bit(this), p)); }
1133 return(Name.create(new CS8Bit(this), p));
1134 }
1135
1136 // Check that this instance is valid.
1137 validateObject();
1138
1139 // Eliminate duplicates while deserialising.
1140 return(MemoryTools.intern(this));
1141 }
1142
1143 /**Unique serialisation ID. */
1144 private static final long serialVersionUID = 2798109831174584816L;
1145
1146
1147 /**ExhibitShortName immutable CharSequence light-weight wrapper around underlying ExhibitFull data.
1148 * Though slow, can be used as Map/Set/etc element,
1149 * as they have a working (though inefficient) consistent hashCode() and equals(),
1150 * but it is much preferred (more efficient/fast) to use ExhibitFull as a key.
1151 */
1152 public static final class ExhibitShort extends CSWrapper<ExhibitFull> implements CharSequence7Bit
1153 {
1154 /**Create an instance.
1155 * Only intended to be created by an ExhibitFull instance.
1156 */
1157 private ExhibitShort(final ExhibitFull underlying, final int start, final int end)
1158 {
1159 super(underlying, start, end);
1160 if(EXTRA_ASSERTS) { assert(ExhibitName.validNameFinalComponentSyntax(this)); }
1161 }
1162
1163 /**Retrieve full name for this short name; never null. */
1164 public ExhibitFull getFullName() { return(underlying); }
1165
1166 /**Extract the main words (stem) component; never null nor empty.
1167 * As ExhibitName.getMainWordsComponent() but optimised for this common case,
1168 * with the result content identical to that from getMainWordsComponent(getFullName()).
1169 * <p>
1170 * The result is intern()able to help conflate identical values if required,
1171 * ie has a working hashCode() and equals(),
1172 * which may save space and accelerate searching for example.
1173 *
1174 * @param allAttrWords a Set of all legal attribute words (String values);
1175 * may be empty but not null
1176 */
1177 public CharSequence8Bit getMainWordsComponent(final Set<String> allAttrWords)
1178 {
1179 final int endOfAttrWords = ExhibitName.getEndOfAttrWords(this);
1180 final int endOfMainWords = ExhibitName.getEndOfMainWords(this, -1, endOfAttrWords, allAttrWords);
1181 final CharSequence8Bit result = new CSWrapper<ExhibitFull>(underlying, start, start + endOfMainWords);
1182 // Should be identical result to getMainWordsComponent() on full name.
1183 // assert(TextUtils.contentEquals(result, ExhibitName.getMainWordsComponent(underlying, allAttrWords)));
1184 return(result);
1185 }
1186
1187 /**Equality checks for the same type and content.
1188 * Will return true in the case of an identical short name of a different full name.
1189 */
1190 @Override public boolean equals(final Object obj)
1191 {
1192 if(this == obj) { return(true); }
1193 if(!(obj instanceof Name.ExhibitShort)) { return(false); }
1194 return(TextUtils.contentEquals(this, (Name.ExhibitShort)obj));
1195 }
1196
1197 /**The hash code is based on a fixed-time sampling of the content for speed. */
1198 @Override public int hashCode()
1199 {
1200 final int l = length();
1201 int h = l;
1202 final int step = 1 + (l >>> 3);
1203 for(int i = l; (i -= step) > 0; )
1204 { h = 37*h + byteAt(i); }
1205 h -= l;
1206 h = (31*h) ^ byteAt(0); // Always include the first char in the hash as typically contains a reasonable spread of values.
1207 return(h);
1208 }
1209
1210 /**Convert to String containing same char sequence; never null nor empty.
1211 * Automatically intern() the String result to avoid duplication.
1212 */
1213 @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1214
1215 /**Unique leading printable-ASCII character for persistableKey(), not a valid exhibit name char. */
1216 public static final char PERSISTABLE_NAME_LEADING_CHAR = '!';
1217
1218 /**Length of persistable key in chars; strictly positive. */
1219 public static final int PERSISTABLE_NAME_LENGTH = 8;
1220
1221 /**Persistable unique printable-ASCII-7 immutable CharSequence (CharSequence8Bit) key; never null nor empty.
1222 * This generates a key:
1223 * <ul>
1224 * <li>that is of fixed (small) length</li>
1225 * <li>that should be shorter than most actual exhibit short names thus representing a reduced memory footprint</li>
1226 * <li>that is quickly and easily distinguished from (and can be unambiguously mixed with) exhibit short (and full) names, by leading PERSISTABLE_NAME_LEADING_CHAR</li>
1227 * <li>that should in practice be unique across all short exhibit names and thus can be used as a substitute for a short name</li>
1228 * <li>depends only on the short name, not any other part of the full name</li>
1229 * <li>has a small amount of verification built in (second char of key is first char of short name)</li>
1230 * <li>that is pure printable 7-bit ASCII</li>
1231 * <li>does not contain any ASCII control characters or whitespace (≤32)</li>
1232 * <li><em>not</em> intern()ed here because a key is small enough for the overhead to be potentially prohibitive</li>
1233 * </ul>
1234 * <p>
1235 * This may be slow to compute.
1236 */
1237 public CharSequence8Bit persistableKey()
1238 {
1239 final byte[] key = new byte[PERSISTABLE_NAME_LENGTH];
1240 key[0] = (byte) PERSISTABLE_NAME_LEADING_CHAR;
1241 key[1] = byteAt(0);
1242
1243 // Extract a strong SHA-1 digest/hash of the bytes of the short name.
1244 // (Excludes the first char already explicitly in the key.)
1245 final MessageDigest digestor;
1246 try { digestor = MessageDigest.getInstance(CoreConsts.HASH_SHA1); }
1247 catch(final NoSuchAlgorithmException e) { throw new Error("Unable to locate hash impl: "+CoreConsts.HASH_SHA1, e); }
1248 for(int i = length(); --i > 0; ) { digestor.update(byteAt(i)); } // TODO: possibly replace with more efficient chunk-at-a-time form.
1249 final byte digest[] = digestor.digest();
1250 if(EXTRA_ASSERTS) { assert(digest != null); }
1251 if(EXTRA_ASSERTS) { assert(digest.length >= (PERSISTABLE_NAME_LENGTH - 2)); }
1252 // Convert digest to printable ~6.5-bits-per-char form
1253 // to retain as much digest as possible and thus minimise chance of key collision.
1254 for(int i = 0, ki = 2; ki < PERSISTABLE_NAME_LENGTH; i++)
1255 {
1256 int ib = digest[i] & 0x7f; // Basic 7-bit form.
1257 // If result would be control/whitespace (or DEL),
1258 // then translate it into the printable region,
1259 // and incorporate the otherwise-discarded digest bit.
1260 if(ib <= ' ')
1261 { ib += (((digest[i] & 0x80) != 0) ? 64 : 94); }
1262 else if(ib == 127) // Avoid DEL code.
1263 { ib = ((digest[i] & 0x80) != 0) ? 63 : 93; }
1264 if(EXTRA_ASSERTS) { assert((ib > ' ') && (ib < 127)); }
1265 key[ki++] = (byte) ib; // Append digest char.
1266 }
1267
1268 return(new CS8Bit(key));
1269 }
1270 }
1271
1272 /**ExhibitFullName holds a (ASCII-7) valid syntactically-valid full exhibit name.
1273 * This will only allow construction of instances representing a valid full exhibit name.
1274 * <p>
1275 * This mainly represents a guarantee that the data is a valid full exhibit name,
1276 * plus some utility methods/types for conveniently and efficiently accessing components of that name.
1277 * <p>
1278 * The constructors are private and only accessible via factory methods
1279 * to help with instance control,
1280 * and all instances are automatically intern()ed to eliminate duplicates.
1281 */
1282 public static final class ExhibitFull extends Name implements CharSequence7Bit
1283 {
1284 private ExhibitFull(final CharSequence name, final Name prev)
1285 {
1286 super(name, prev); // Encode/store the text.
1287
1288 // Verify what we've been given.
1289 try { validateObject(); }
1290 catch(final InvalidObjectException e)
1291 { throw new IllegalArgumentException(e.getMessage()); }
1292 }
1293
1294 /**Create an ExhibitFull instance; never null.
1295 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1296 * <p>
1297 * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1298 *
1299 * @param fullName the (7-bit) syntactically-valid full exhibit name; never null nor empty
1300 *
1301 * @throws IllegalArgumentException in the case of a null or invalid putative full name
1302 */
1303 public static ExhibitFull create(final CharSequence fullName)
1304 { return(create(fullName, null)); }
1305
1306 /**Create an ExhibitFull instance with an optional previous value to attempt to share a prefix with; never null.
1307 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1308 * <p>
1309 * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1310 *
1311 * @param fullName the (7-bit) syntactically-valid full exhibit name; never null nor empty
1312 * @param prev previous Name with which to attempt to share a prefix; null if none
1313 *
1314 * @throws IllegalArgumentException in the case of a null or invalid putative full name
1315 */
1316 public static ExhibitFull create(final CharSequence fullName,
1317 final Name prev)
1318 {
1319 if(null == fullName) { throw new IllegalArgumentException("ExhibitFull Name cannot be null"); }
1320
1321 if(ExhibitFull.class.equals(fullName.getClass()))
1322 {
1323 // Optimisation: if this is already an ExhibitFull (and not a sub-class) then return it as-is
1324 // if no prev has been offered (that might share a longer prefix).
1325 if(prev == null) { return((ExhibitFull) fullName); }
1326 // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
1327 else if(prev.contentEquals(fullName)) { return((ExhibitFull) prev); }
1328 }
1329
1330 final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
1331 // If no prev was specified we look for one in the ad-hoc cache.
1332 final Name prevToUse;
1333 if(prev != null) { prevToUse = prev; }
1334 else
1335 {
1336 // If we find an exact match with the right class in the ad-hoc cache then return it.
1337 prevToUse = adHocPrefixCache.findPrev(fullName);
1338 if((prevToUse != null) && (ExhibitFull.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(fullName))
1339 { return((ExhibitFull) prevToUse); }
1340 }
1341 // Return intern()ed value to eliminate accidental duplicates.
1342 // Pass it through the ad-hoc prefix cache too...
1343 final ExhibitFull result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new ExhibitFull(fullName, prevToUse)));
1344 if(EXTRA_ASSERTS) { assert(ExhibitFull.class.equals(result.getClass())); }
1345 return(result);
1346 }
1347
1348 /**Create an ExhibitFull instance with an optional previous value to attempt to share a prefix with, with no intern()ing; never null.
1349 * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1350 * <p>
1351 * This uses minimal cleverness, no intern()ing, no use of prefix/suffix caches, minimal overheads, etc,
1352 * and so allows generation of completely fresh instances unconnected with any previous ones.
1353 * This may well not make optimal use of memory, and so is not usually recommended.
1354 *
1355 * @param fullName the (7-bit) syntactically-valid full exhibit name; never null nor empty
1356 * @param prev previous Name with which to attempt to share a prefix; null if none
1357 *
1358 * @throws IllegalArgumentException in the case of a null or invalid putative full name
1359 */
1360 public static ExhibitFull createNoIntern(final CharSequence fullName,
1361 final Name prev)
1362 {
1363 if(null == fullName) { throw new IllegalArgumentException("ExhibitFull Name cannot be null"); }
1364 final ExhibitFull result = new ExhibitFull(fullName, prev);
1365 return(result);
1366 }
1367
1368 /**Cache of link to parent, lazily computed on demand.
1369 * Only accessed under instance lock so as to eliminate access races
1370 * and thus guarantee avoidance of creation of duplicates.
1371 * <p>
1372 * Not serialisable (because recomputable).
1373 */
1374 private transient ExhibitShort shortName;
1375
1376 /**Extract strongly-typed ExhibitShortName, ie the filename component; never null.
1377 * This is a light-weight, non-intern()able CharSequence wrapper around the existing data.
1378 * <p>
1379 * This guarantees that only one ShortName instance can be generated for each FullName.
1380 * Thus intern()ing is neither needed nor supported.
1381 */
1382 public synchronized ExhibitShort getShortName()
1383 {
1384 ExhibitShort result = shortName;
1385 if(null != result) { return(result); }
1386 final int l = length();
1387 shortName = result = new ExhibitShort(this, TextUtils.lastIndexOf(this, '/', l - ExhibitName.MIN_FILENAME_LENGTH) + 1, l);
1388 return(result);
1389 }
1390
1391 /**Create a "virtual" full name consisting of the category directory and then the file component; never null.
1392 * The results in a syntactically-valid full exhibit name,
1393 * but omits the internal directory "noise".
1394 * <p>
1395 * Returns this instance as-is if already 'virtual'.
1396 */
1397 public Name.ExhibitFull getVirtualExhibitName()
1398 {
1399 // Optimisation:
1400 // if there is only one slash, the name is already virtual,
1401 // so the name can be returned untouched.
1402 final int l = length();
1403 final int firstSlash = TextUtils.indexOf(this, '/', 1);
1404 final int lastSlash = TextUtils.lastIndexOf(this, '/', l - ExhibitName.MIN_FILENAME_LENGTH);
1405 if(firstSlash == lastSlash) { return(this); }
1406
1407 // FIXME: avoid inefficient conversion via StringBuilder/char[].
1408 return(create(new StringBuilder(l - (lastSlash-firstSlash))
1409 .append(subSequence(0, firstSlash+1))
1410 .append(subSequence(lastSlash+1, l)),
1411 this)); // Prefix and suffix should be entirely shareable with this...
1412 }
1413
1414 /**Extract char at specified index.
1415 * This specialisation can take advantage of the fact that exhibit names are entirely 7-bit,
1416 * so there is no danger of sign extension in conversion from byte to int for comparison.
1417 */
1418 public final char charAt(final int index)
1419 //{ return((char) byteAt(index)); }
1420 {
1421 // Bounds check for public interface.
1422 if((index < 0) || (index >= length())) { throw new IllegalArgumentException(); }
1423 return((char) byteAtInner(this, index));
1424 }
1425
1426 /**Provide natural sort order for this class.
1427 * This specialisation can take advantage of the fact that exhibit names are entirely 7-bit,
1428 * so there is no danger of sign extension in conversion from byte to int for comparison.
1429 */
1430 public final int compareTo(final Name.ExhibitFull o)
1431 {
1432 // Same instance compares equal...
1433 if(this == o) { return(0); }
1434 //return(TextUtils.compare(this, o));
1435
1436 // Inlined and specialised for better performance...
1437 final int l1 = length();
1438 final int l2 = o.length();
1439 final int l = Math.min(l1, l2);
1440
1441 // if(l > 0)
1442 {
1443 final int[] args = new int[2]; // Allocate just once to minimise heap churn.
1444
1445 for(int i = 0; i < l; )
1446 {
1447 // Do this in chunks for efficiency...
1448 final byte[] raw1 = bytesAtInner(this, i, args);
1449 final int offset1 = args[0];
1450 final int length1 = args[1];
1451 final byte[] raw2 = bytesAtInner(o, i, args);
1452 final int offset2 = args[0];
1453 final int length2 = args[1];
1454
1455 final int ll = Math.min(l-i, Math.min(length1, length2));
1456 //if(EXTRA_ASSERTS) { assert(ll > 0); }
1457
1458 for(int j = 0; j < ll; ++j)
1459 {
1460 final int c1 = raw1[offset1 + j];
1461 final int c2 = raw2[offset2 + j];
1462 //if(EXTRA_ASSERTS) { assert((c1 >= 0) && (c2 >= 0)) : "exhibit name chars can only be 7-bit so no danger of sign-extension"; }
1463 if(c1 != c2)
1464 {
1465 final int cResult = c1 - c2;
1466 //if(EXTRA_ASSERTS) { assert(cResult == TextUtils.compare(this, o)); } // Very expensive but precise check.
1467 return(cResult);
1468 }
1469 }
1470
1471 i += ll;
1472 }
1473 }
1474
1475 final int lResult = l1 - l2;
1476 //if(EXTRA_ASSERTS) { assert(lResult == TextUtils.compare(this, o)); } // Very expensive but precise check.
1477 return(lResult);
1478 }
1479
1480 /**Convert to String containing same char sequence; never null.
1481 * Automatically intern()s the full-exhibit-name result to avoid duplication.
1482 */
1483 // @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1484 @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append((CharSequence8Bit) (this)).toString())); }
1485
1486 /**Validate fields/state.
1487 * Called in the constructor and possibly after de-serialising.
1488 * <p>
1489 * Barf if something bad is found.
1490 * (Maybe allow some extra info in debug version.)
1491 */
1492 @Override
1493 public void validateObject()
1494 throws InvalidObjectException
1495 {
1496 // Check Name invariants.
1497 super.validateObject();
1498
1499 // Verify that this is valid full Exhibit name content!
1500 if(!ExhibitName.validNameSyntaxForce(this))
1501 { throw new InvalidObjectException("bad object: not a valid full exhibit name: "+this); }
1502 }
1503
1504 /**Unique Serialisation class ID generated by http://random.hd.org/. */
1505 private static final long serialVersionUID = 8993934283490189577L;
1506 }
1507
1508 /**Provide natural sort order for this and derived classes. */
1509 public final int compareTo(final Name o)
1510 {
1511 // Same instance compares equal...
1512 if(this == o) { return(0); }
1513 //return(TextUtils.compare(this, o));
1514 // Inlined and specialised for better performance...
1515 final int l1 = _length;
1516 final int l2 = o._length;
1517 final int l = Math.min(l1, l2);
1518 for(int i = 0; i < l; ++i)
1519 {
1520 final int c1 = (byteAt(i) & 0xff);
1521 final int c2 = (o.byteAt(i) & 0xff);
1522 if(c1 != c2) { return(c1 - c2); }
1523 }
1524 return(l1 - l2);
1525 }
1526
1527 /**Compute the Integer prefix key for the supplied CharSequence; never null.
1528 * This assumes that the top 8 bits of each char are unimportant
1529 * since if the sequence is to be turned into a Name they must be zero.
1530 * <p>
1531 * We construct the prefix key from the first four characters of the putative Name
1532 * (or fewer if the name is shorter).
1533 */
1534 @SuppressWarnings("unused") private static Integer computeIntegerPrefixKey(final CharSequence cs)
1535 {
1536 if(EXTRA_ASSERTS) { assert(null != cs); }
1537 final int l = cs.length();
1538 if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1539 int result = cs.charAt(0);
1540 if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1541 result |= (((int) cs.charAt(1)) << 8);
1542 if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1543 result |= (((int) cs.charAt(2)) << 16);
1544 if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1545 result |= (((int) cs.charAt(3)) << 24);
1546 return(result);
1547 }
1548
1549 /**Compute the Integer suffix key for the supplied CharSequence; never null.
1550 * This assumes that the top 8 bits of each char are unimportant
1551 * since if the sequence is to be turned into a Name they must be zero.
1552 * <p>
1553 * We construct the suffix key from the last four characters of the putative Name
1554 * (or fewer if the name is shorter).
1555 */
1556 @SuppressWarnings("unused") private static Integer computeIntegerSuffixKey(final CharSequence cs)
1557 {
1558 if(EXTRA_ASSERTS) { assert(null != cs); }
1559 final int l = cs.length();
1560 if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1561 int result = cs.charAt(l - 1);
1562 if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1563 result |= (((int) cs.charAt(l - 2)) << 8);
1564 if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1565 result |= (((int) cs.charAt(l - 3)) << 16);
1566 if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1567 result |= (((int) cs.charAt(l - 4)) << 24);
1568 return(result);
1569 }
1570
1571 /**Contains lookup cache by prefix for all Name instances.
1572 * This is used to try to find a prefix for a newly-constructed Name
1573 * where no prefix has been supplied at all.
1574 * <p>
1575 * This looks up in a short list of recent/useful Name values with the same fixed-length prefix
1576 * for something of the same concrete type and the longest available prefix match,
1577 * returning the candidate 'prev' item (or null if none)
1578 * which then can be used in the usual way.
1579 * <p>
1580 * The Name instances are held via WeakReference wrappers
1581 * which should ensure that the cache does not keep them from being garbage collected.
1582 * <p>
1583 * This would usually be invoked by the factory method or constructor
1584 * where no 'prev' has been supplied.
1585 * <p>
1586 * This cache should grow as needed to get a decent hit rate,
1587 * and shrink to release memory when the hit rate is high or the memory subsystem is stressed.
1588 * <p>
1589 * Thread-safe but does not support concurrency (fully synchronized).
1590 * <p>
1591 * A lock can be held on instances of this object to make compound operations atomic.
1592 *
1593 * @param <TerminusKey> type of key for prefix of Name instances
1594 * must be immutable and support consistent hashCode() and equals()
1595 */
1596 private static final class AdHocPrefixCache<TerminusKey extends Number>
1597 {
1598 /**The lookup from prefix key to list of candidate same-prefix instances; never null.
1599 * On each list may be items all of exactly one prefix
1600 * or whose prefix hashes to the same value,
1601 * depending on how the prefix key value is constructed.
1602 * <p>
1603 * Empty lists (or those effectively empty because all the WeakReferences have been dropped)
1604 * will be purged (ie empty lists are not retained).
1605 * <p>
1606 * The lists values are of type LinkedList, with 'best' items first.
1607 * <p>
1608 * (In future, where a list is of length 1 and where the maximum length is currently 1,
1609 * a list may be stored as a singleton for efficiency.)
1610 * <p>
1611 * We hope for a reasonable minimum size to have a sensible chance of any match at all
1612 * across a large text base.
1613 * <p>
1614 * We size the maximum based on total heap capacity,
1615 * and allow everything to be discarded in case of extreme memory stress.
1616 */
1617 private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> prefixStore =
1618 LRUMapAutoSizeForHitRate.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1619 0, computeCacheMaxSize(), "prefixStore");
1620
1621 /**The lookup from suffix key to list of candidate same-suffix instances; never null.
1622 * Lighter-weight than the prefixStore, and smaller, since suffix matching is a secondary activity.
1623 */
1624 private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> suffixStore =
1625 SimpleProbabilisticCache.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1626 computeCacheMaxSize() >> 2, // Smaller heap for suffixes.
1627 "suffixStore");
1628
1629 /**Compute maximum (prefix) cache size, partly based on heap size; strictly positive.
1630 * Aim to allow ~10000 entries per 1GB of heap, with a minimum of several hundred.
1631 */
1632 private static int computeCacheMaxSize()
1633 { return(Math.max(1<<9, (int) Math.min(1<<16, Runtime.getRuntime().totalMemory() >> 17))); }
1634
1635
1636 /**Returns a suitable prev (with a shared prefix/suffix) for the supplied non-null Name of any Name subclass, else null if none found.
1637 * May trim state, especially dead references.
1638 */
1639 synchronized Name findPrev(final CharSequence putativeName)
1640 {
1641 if(EXTRA_ASSERTS) { assert(null != putativeName); }
1642
1643 final int len = putativeName.length();
1644
1645 // Don't try for very short matches.
1646 if(len < MIN_TERMINUS_MATCH_LENGTH) { return(null); }
1647
1648 // Find and return the best match, if any.
1649 // If we find any prefix match then we don't try for a suffix match.
1650 for(final boolean doPrefix : new boolean[]{true, false})
1651 {
1652 // Choose which store to operate on.
1653 final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1654 doPrefix ? prefixStore : suffixStore;
1655 // Compute the correct key for the selected store.
1656 final TerminusKey key = (TerminusKey)
1657 (doPrefix ? computeIntegerPrefixKey(putativeName) : computeIntegerSuffixKey(putativeName));
1658
1659 // Get the existing list for this prefix if any...
1660 final LinkedList<WeakReference<Name>> list = store.get(key);
1661 // If no list then there is no available match.
1662 if(null == list) { return(null); } // Nothing found.
1663
1664 Name result = null; // Nothing found yet.
1665 int savedCharsBest = 0; // Best shared-characters length (prefix and suffix) found so far.
1666 WeakReference<Name> eBest = null; // Container for the best item.
1667 boolean first = true;
1668 for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1669 {
1670 final WeakReference<Name> e = it.next();
1671 final Name cachedName = e.get();
1672 // If we find a dead entry then zap it and continue...
1673 if(null == cachedName)
1674 {
1675 it.remove();
1676 continue;
1677 }
1678 // Find common prefix/suffix length: skip if zero.
1679 // Can be > len if sharable prefix and suffix overlap.
1680 final int savedChars = Name.computeCommonPrefixLength(putativeName, cachedName) + Name.computeCommonSuffixLength(putativeName, cachedName);
1681 if(savedChars == 0)
1682 { continue; }
1683 // If this is best so far than capture it.
1684 if(savedChars > savedCharsBest)
1685 {
1686 if(DEBUG /*|| !doPrefix*/) { System.out.println("***Name ad-hoc "+(doPrefix?"prefix":"suffix")+" match found "+((result==null) ? "" : "better ")+"match with shared chars "+savedChars); }
1687 result = cachedName;
1688 savedCharsBest = savedChars;
1689 if(!first) { eBest = e; }
1690 // If we already have a maximal match (no unshared characters) then stop looking.
1691 if(savedChars >= len) { break; }
1692 }
1693 }
1694 // Move best item to the front to mark it as 'best' (if not already there).
1695 if(null != eBest)
1696 {
1697 list.remove(eBest);
1698 list.addFirst(eBest);
1699 }
1700
1701 // Trim the list iff now too long (eg because we are now memory-stressed).
1702 trimListLength(list, doPrefix);
1703 // If the list is now empty then remove it entirely.
1704 if(list.isEmpty()) { store.remove(key); }
1705
1706 // If we found something then don't move on to the next store...
1707 if(result != null) { return(result); }
1708 }
1709
1710 return(null); // Nothing found...
1711 }
1712
1713 /**Offer a newly constructed Name instance to the cache; returns the value passed in.
1714 * Rejected if already at maximum chain length.
1715 * @return the input name unchanged
1716 */
1717 synchronized <N extends Name> N offerNewInstance(final N name)
1718 {
1719 if(null == name) { return(null); }
1720
1721 // Don't clutter our tables with very short values.
1722 if(name.length() < MIN_TERMINUS_MATCH_LENGTH) { return(name); }
1723
1724 // If too long to share termini with, espeically ad-hoc, then reject.
1725 if(name.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) { return(name); }
1726
1727 for(final boolean doPrefix : new boolean[]{true, false})
1728 {
1729 // Choose which store to operate on.
1730 final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1731 doPrefix ? prefixStore : suffixStore;
1732 // Compute the correct key for the selected store.
1733 final TerminusKey key = (TerminusKey)
1734 (doPrefix ? computeIntegerPrefixKey(name) : computeIntegerSuffixKey(name));
1735
1736 // Get the existing list for this prefix if any...
1737 LinkedList<WeakReference<Name>> list = store.get(key);
1738 // Create (and store) a new list if need be.
1739 if(null == list)
1740 {
1741 list = new LinkedList<WeakReference<Name>>();
1742 store.put(key, list);
1743 }
1744 // Trim out any dead items (ie weak references gone away).
1745 trimDeadRefs(list);
1746 // If something already equivalent to the supplied Name is on the list
1747 // (ie same content and concrete class)
1748 // then move it to the front (unless already at the front!)
1749 // rather than creating a new WeakReference instance
1750 // and/or filling up the list with duplicates for example.
1751 boolean foundNameAlreadyOnList = false;
1752 boolean first = true;
1753 for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1754 {
1755 final WeakReference<Name> e = it.next();
1756 final Name cachedName = e.get();
1757 if(name.equals(cachedName))
1758 {
1759 foundNameAlreadyOnList = true;
1760 if(!first)
1761 {
1762 if(DEBUG) { System.out.println("Name.AHPC: moved up to first position: "+name); }
1763 it.remove(); // Remove from this position...
1764 list.addFirst(e); // Re-insert at start (without constructing a new WeakRef).
1765 }
1766 else { if(DEBUG) { System.out.println("Name.AHPC: found already in first position: "+name); } }
1767 break;
1768 }
1769 }
1770 // Else stuff us on the front of the list as 'newest' and possibly best.
1771 if(!foundNameAlreadyOnList)
1772 {
1773 list.addFirst(new WeakReference<Name>(name));
1774 if(DEBUG) { System.out.println("Name.AHPC: inserting new at first position: "+name); }
1775 }
1776
1777 // Trim the list iff now too long.
1778 trimListLength(list, doPrefix);
1779 }
1780
1781 return(name); // Return the input name.
1782 }
1783
1784 /**Trim dead references from the supplied non-null list. */
1785 private void trimDeadRefs(final List<WeakReference<Name>> l)
1786 {
1787 for(final Iterator<WeakReference<Name>> it = l.iterator(); it.hasNext(); )
1788 {
1789 final WeakReference<Name> e = it.next();
1790 // Test if ref is alive and zap entry if not.
1791 // Note that get() doesn't keep ref artificially alive unlike for SoftReference...
1792 if(null == e.get())
1793 { it.remove(); }
1794 }
1795 }
1796
1797 /**Trim the supplied non-null list to size if too long. */
1798 private void trimListLength(final LinkedList<WeakReference<Name>> l, final boolean isPrefix)
1799 {
1800 // Cannot be overlong unless longer than smallest permitted max length...
1801 if(l.size() > STRESSED_LIST_LEN)
1802 {
1803 final int targetLen = getMaxListLen(isPrefix);
1804 while(l.size() > targetLen)
1805 { l.removeLast(); }
1806 }
1807 }
1808
1809 /**Maximum list length under normal circumstances; strictly positive.
1810 * Borrowing from GZIP '-6'/default experience a good value might be ~6.
1811 */
1812 private static final int MAX_LIST_LEN = 6;
1813
1814 /**Maximum secondary list length under normal circumstances; strictly positive.
1815 * Since suffix matching is secondary goal this is shorter than the general limit
1816 * but should still probably be greater than 1.
1817 */
1818 private static final int MAX_SECONDARY_LIST_LEN = ((MAX_LIST_LEN >> 1) | 1);
1819
1820 /**Maximum list length under memory stress; strictly positive, less than MAX_LIST_LEN.
1821 * This will do a less good job of finding matches (ie will probably do less compaction)
1822 * but will consume less memory in the ad-hoc cache.
1823 * <p>
1824 * Under stress, any entries we touch are capped to this length,
1825 * thus allowing us to gracefully and incrementally free space.
1826 * <p>
1827 * Borrowing from GZIP '-1/'-fast' experience a good value might be 1.
1828 */
1829 private static final int STRESSED_LIST_LEN = 1;
1830
1831 /**Maximum same-prefix list length; strictly positive.
1832 * This will drop to a smaller measure if memory is under stress.
1833 */
1834 /* private */ int getMaxListLen(final boolean isPrefix)
1835 { return(MemoryTools.isMemoryStressed() ? STRESSED_LIST_LEN : (isPrefix ? MAX_LIST_LEN : MAX_SECONDARY_LIST_LEN)); }
1836 }
1837
1838 /**Get instance of AdHocPrefixCache; never null. */
1839 private synchronized static final AdHocPrefixCache<Integer> getAdHocPrefixCache()
1840 {
1841 // If cache has not been discarded then return it.
1842 final SoftReference<AdHocPrefixCache<Integer>> cache = _adHocPrefixCache;
1843 AdHocPrefixCache<Integer> result = null;
1844 if((null != cache) && (null != (result = cache.get())))
1845 { return(result); }
1846 // Construct and cache and return a new instance.
1847 result = new AdHocPrefixCache<Integer>();
1848 _adHocPrefixCache = new SoftReference<AdHocPrefixCache<Integer>>(result);
1849 return(result);
1850 }
1851
1852 /**Memory-sensitive cache of Name instances indexed by fixed-length prefix and suffix; initially null, not thereafter.
1853 * Borrows a few techniques from GZIP-like algorithms.
1854 * <p>
1855 * This uses Integer as a key for up to 4 bytes of prefix/suffix lookup.
1856 * <p>
1857 * Only accessed under the class lock, by getAdHocPrefixCache().
1858 * <p>
1859 * Held via a StaticReference to allow the entire cache to be discarded automatically
1860 * under extreme memory stress.
1861 * <p>
1862 * Accessed under the class lock.
1863 */
1864 private static SoftReference<AdHocPrefixCache<Integer>> _adHocPrefixCache;
1865
1866 /**Case-insensitive Comparator for Name equivalent to TextUtils.CASE_INSENSITIVE_ORDER; not null.
1867 * May be able to operate more efficiently than generic TextUtils.CASE_INSENSITIVE_ORDER.
1868 */
1869 public static final Comparator<Name> CASE_INSENSITIVE_ORDER = new Comparator<Name>(){
1870 public int compare(final Name n1, final Name n2)
1871 { return(TextUtils.CASE_INSENSITIVE_ORDER.compare(n1, n2)); }
1872 };
1873
1874 /**Set true for extra assert()s for internal invariants, usually only in non-release code. */
1875 private static final boolean EXTRA_ASSERTS = IsDebug.isDebug;
1876
1877 /**Set true for extra debug/development logging output. */
1878 private static final boolean DEBUG = false;
1879 }