001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore;
030    
031    import java.io.IOException;
032    import java.io.InvalidObjectException;
033    import java.io.ObjectInputStream;
034    import java.io.ObjectInputValidation;
035    import java.io.ObjectOutputStream;
036    import java.io.ObjectStreamException;
037    import java.io.Serializable;
038    import java.lang.ref.SoftReference;
039    import java.lang.ref.WeakReference;
040    import java.security.MessageDigest;
041    import java.security.NoSuchAlgorithmException;
042    import java.util.Comparator;
043    import java.util.Iterator;
044    import java.util.LinkedList;
045    import java.util.List;
046    import java.util.Set;
047    
048    import org.hd.d.pg2k.svrCore.TextUtils.CharSequence7Bit;
049    import org.hd.d.pg2k.svrCore.TextUtils.CharSequence8Bit;
050    import org.hd.d.pg2k.svrCore.collections.LRUMapAutoSizeForHitRate;
051    import org.hd.d.pg2k.svrCore.collections.SimpleProbabilisticCache;
052    
053    import ORG.hd.d.IsDebug;
054    
055    // TODO: Allow sharing of the underlying byte[] between any two non-prev equal-content Name types.
056    
057    /**Compact immutable in-memory representation of a (usually-short, sharable-prefix/suffix) 8-bit ASCII String.
058     * This can share a common prefix and suffix with another instance,
059     * avoiding having to store the shared elements itself.
060     * <p>
061     * This is particularly suitable for densely-packed sorted lists of values,
062     * such as sets of exhibit names which with tend to have by necessity long shared prefixes.
063     * <p>
064     * This is suitable for names but also other usually-short 8-bit text values,
065     * in particular those that don't have the sort of internal structure and repetition
066     * that Compact7BitString can take advantage of.
067     * <p>
068     * This supports random access reasonably efficiently
069     * (eg through the CharSequence interface)
070     * but some elements will be slower to access than others.
071     * (chars at either end will tend to be the slowest to access.)
072     * <p>
073     * Converting to and from this form should be relatively resource-light,
074     * eg with low CPU and memory requirements,
075     * though all instances are automatically intern()ed which has some cost.
076     * <p>
077     * Where there is a conflict between memory consumption and CPU,
078     * this implementation is biased to minimise memory use at the possible expense of CPU.
079     * <p>
080     * Internally this uses a byte[] to store the data
081     * (a basic halving of in-memory size in most JVMs),
082     * and use of a common prefix with a similar
083     * (nominally lexically previous close value)
084     * should help further trim memory use.
085     * <p>
086     * (This implementation internally attempts to minimise the number of objects kept live
087     * and that need to be traversed to complete construction.)
088     * <p>
089     * These classes are not publicly constructible in order to help control instance count,
090     * and indeed may use MemoryTools.intern() to eliminate duplicates.
091     * <p>
092     * This uses an internal cache to attempt to find ad-hoc prefix matches with existing instances
093     * if no 'prev' value is offered during creation of a new instance.
094     * This cache is memory-sensitive and will be silently sacrificed
095     * if the system is short of memory.
096     * <p>
097     * Note that MemoryTools.intern() may not release a 'duplicate'
098     * if there remain other instances hanging from it.
099     * <p>
100     * This is not final though all its constructors are private
101     * to allow only inner-class derived types in order to preserve immutability.
102     * <p>
103     * As this is performance-critical code, and may have to run on slow/non-JIT JVMs,
104     * most assert()s that check purely-internally-imposed invariants
105     * are entirely eliminated from release code;
106     * those that might still be triggered by runtime issues external to this class,
107     * such as run-time presence of hashing algorithms,
108     * remain present to enabled at runtime if required.
109     */
110    public class Name implements CharSequence8Bit, Comparable<Name>,
111                                 Serializable, ObjectInputValidation,
112                                 MemoryTools.Internable // Not currently supporting InternableRank
113        {
114        /**Create a Name instance; never null unless input is null.
115         * It is often best to attempt to share a prefix with the lexically previous (or next) item.
116         * <p>
117         * If passed a null this returns a null.
118         * <p>
119         * If passed a Name (but not including sub-classes) this returns it untouched.
120         * @param text  the 8-bit text to store; if null then null is returned
121         */
122        public static Name create(final CharSequence text)
123            { return(create(text, null)); }
124    
125        /**Create a Name instance with an optional previous value to attempt to share a prefix with; never null unless input is null.
126         * It is often best to attempt to share a prefix with the lexically previous (or next) item.
127         * <p>
128         * If passed a null this returns a null.
129         * <p>
130         * If passed a Name (but not including sub-classes) this returns it untouched.
131         *
132         * @param text  the 8-bit text to store; if null then null is returned
133         * @param prev  previous value with which to attempt to share a prefix; null if none
134         */
135        public static Name create(final CharSequence text,
136                                  final Name prev)
137            {
138            if(null == text) { return(null); }
139    
140            if(Name.class.equals(text.getClass()))
141                {
142                // Optimisation: if this is already a Name (and not a sub-class) then return it as-is
143                // if no prev has been offered (that might share a longer prefix).
144                if(prev == null) { return((Name) text); }
145                // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
146                else if(prev.contentEquals(text)) { return(prev); }
147                }
148    
149            final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
150            // If no prev was specified we look for one in the ad-hoc cache.
151            final Name prevToUse;
152            if(prev != null) { prevToUse = prev; }
153            else
154                {
155                // If we find an exact match with the right class in the ad-hoc cache then return it.
156                prevToUse = adHocPrefixCache.findPrev(text);
157                if((prevToUse != null) && (Name.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(text))
158                    { return(prevToUse); }
159                }
160            // Return intern()ed value to eliminate accidental duplicates.
161            // Pass it through the ad-hoc prefix cache too...
162            final Name result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new Name(text, prevToUse)));
163            if(EXTRA_ASSERTS) { assert(Name.class.equals(result.getClass())); }
164            return(result);
165            }
166    
167        /**Attempts to create a Name instance, but falls back to returning an intern()ed String instance otherwise.
168         * This copes with possible non-8-bit values by returning a String rather than a Name.
169         * <p>
170         * Essentially this returns the most memory-efficient representation reasonably possible.
171         *
172         * @return  null if text is null, else Name if 8-bit text, else intern()ed String instance
173         */
174        public static CharSequence createOrStringFallback(final CharSequence text, final Name prev)
175            {
176            try { return(create(text, prev)); }
177            catch(final IllegalArgumentException e) { return(MemoryTools.intern(text.toString())); }
178            }
179    
180        /**Shared empty instance; not null. */
181        public static final Name EMPTY = create("");
182    
183        /**Maximum 'prev' chain length that we will support; non-negative.
184         * Limits the amount of recursion and CPU involved in processing 'nested'/'chained' prefixes,
185         * at the possible expense of some lost compression (ie memory savings).
186         * <p>
187         * This limit also helps avoid blocking GC through overly tangled and straggly data structures.
188         * <p>
189         * Usually positive and in the range (say) 1 to 32 to allow compression,
190         * though higher values also hurt things such as sorting
191         * which forces lots of chain-chasing while comparing prefixes.
192         * <p>
193         * As at 2009/08/02 the maximum actual value when creating fresh ExhibitFull values is 18,
194         * ie 18 is sufficient to yield maximum compression/sharing.
195         * <p>
196         * We may need to raise or lower this limit even though serialised data already exists,
197         * so Name instances are gently coerced to this limit where necessary when deserialised.
198         * <p>
199         * With the 20100621 AEP/AEID name set (29531 names) run against code as at 20120312,
200         * adjusting MAX_PREV_CHAIN_LENGTH results in the following testNameOnAEP() output:
201         * <ul>
202         * <li>0: serialised in-order compact representation array bytes raw | compressed: 4086042 | 286207</li>
203         * <li>4: serialised in-order compact representation array bytes raw | compressed: 2936877 | 285002</li>
204         * <li>8: serialised in-order compact representation array bytes raw | compressed: 1477484 | 271888</li>
205         * <li>9: serialised in-order compact representation array bytes raw | compressed: 1296150 | 267751</li>
206         * <li>11: serialised in-order compact representation array bytes raw | compressed: 1155096 | 262664</li>
207         * <li>12: serialised in-order compact representation array bytes raw | compressed: 1058893 | 260514</li>
208         * <li>13: serialised in-order compact representation array bytes raw | compressed: 1080953 | 259716</li>
209         * <li>14: serialised in-order compact representation array bytes raw | compressed: 1043707 | 259408</li>
210         * <li>17+: serialised in-order compact representation array bytes raw | compressed: 1042888 | 259163</li>
211         * </ul>
212         * Using 8 the serialised form fails to be smaller than using native Strings, when compressed:
213         * "Serialised in-order String array bytes raw | compressed: 3752966 | 271266",
214         * and note the strange behaviour at 13 which results in a larger non-compressed output than at 12.
215         */
216        private static final int MAX_PREV_CHAIN_LENGTH = 14;
217    
218        /**If true, always try to search back through prev values for an as-good or better match.
219         * This in principle allows less redundant intermediate values to be retained in memory
220         * and postpones any forced truncation of the chain.
221         */
222        private static final boolean ALWAYS_LOOK_FOR_EARLIER_PREV = true;
223    
224        /**Number of bits for prefix length (unsigned) value.
225         * Should usually be greater than the number of suffix bits,
226         * since the primary compaction mechanism is prefix sharing.
227         * <p>
228         * 9 is needed for maximal compression on 20090707 test set,
229         * but compression at 8 is very close.
230         * <p>
231         * NOTE: cannot be changed without preventing deserialisation of old data.
232         */
233        private static final int PREFIX_BITS = 9;
234    
235        /**Mask for prefix bits (at bottom of word).
236         * Broken out to help pre-JIT execution performance.
237         */
238        private static final int PREFIX_MASK = ((1<<PREFIX_BITS)-1);
239    
240        /**Number of bits for suffix length (unsigned) value.
241         * This uses whatever bits are left after prefix bits have been taken
242         * eg from 16 bits in a shared short.
243         */
244        private static final int SUFFIX_BITS = 16 - PREFIX_BITS;
245    
246        /**Mask for suffix bits (at top of word).
247         * Broken out to help pre-JIT execution performance.
248         */
249        private static final int SUFFIX_MASK = ((1<<SUFFIX_BITS)-1);
250    
251        /**Max shared prefix length in characters; strictly positive.
252         * An actual match greater than this will be capped to this.
253         */
254        private static final int MAX_PREFIX_LENGTH = PREFIX_MASK;
255    
256        /**Max shared prefix length in characters; strictly positive.
257         * An actual match greater than this will be capped to this.
258         */
259        private static final int MAX_SUFFIX_LENGTH = SUFFIX_MASK;
260    
261        /**Minimum terminus match length; strictly positive.
262         * This helps eliminate completely spurious linkages that may hurt GC
263         * and trail rubbish in the serialised form of such spuriously-linked values.
264         * <p>
265         * We won't share very short prefixes/suffixes.
266         * <p>
267         * We avoid matches of "" (that should be dealt with by intern()) because this is &gt;0.
268         * We should probably avoid picking up single letters such as trailing full-stops, ie with this is &gt;1.
269         * We should probably allow everything of at least length 3 or 4
270         * (such as ".au" or ".jpg" for example)
271         * which is probably roughly the difference in bytes in a serialised stream
272         * of adaptively encoding a null vs a non-null prev pointer.
273         */
274        private static final int MIN_TERMINUS_MATCH_LENGTH = 3;
275    
276        /**Minimum prefix/suffix total share length to use a prev; strictly positive.
277         * If 1 then we always try to use a 'prev' when there is even one character matching,
278         * but this may entrain lots of 'dead wood' that might otherwise be garbage.
279         * <p>
280         * A higher value may ultimately result in lower memory consumption
281         * (and no loss of compactness with or without compression of the serialised form)
282         * though with some loss in the headline immediate 'characters saved' figure.
283         * <p>
284         * This does not need to be enforced on (old) deserialised data,
285         * since for example intern() may often resolve such issues silently,
286         * and in any case it is not a correctness issue.
287         * <p>
288         * For consistency we line this up with the minimum prefix/suffix match length.
289         */
290        private static final int MIN_SHARE_CHARS = MIN_TERMINUS_MATCH_LENGTH;
291    
292        /**If non-zero then the minimum number of characters in any initial prefix share block; non-negative.
293         * This is to try to avoid initial sequences of only one or two chars/bytes
294         * causing especially deep and inefficient (logical) recursion.
295         */
296        private static final int MIN_FIRST_BLOCK_CHARS = MIN_TERMINUS_MATCH_LENGTH;
297    
298        /**Create a new instance with a previous value to attempt to share a prefix with.
299         * Note that the 'prev' value may be any sub-type of Name.
300         * <p>
301         * Private in order to better manage instance control.
302         *
303         * @param text  the 8-bit text to store and must not equal the prev; never null but can be empty
304         * @param putativePrev  non-empty previous value with which to attempt to share a prefix; null if none
305         */
306        private Name(final CharSequence text, final Name putativePrev)
307            {
308            if(EXTRA_ASSERTS) { assert(null != text) : "factory method should already have screened out null text value"; }
309            // Verify that all text is 8-bit if not already declared to be (and if fibbing no harm should actually happen).
310            final int l = text.length();
311            if(!(text instanceof CharSequence8Bit))
312                {
313                for(int i = l; --i >= 0; )
314                    { if(text.charAt(i) > 255) { throw new IllegalArgumentException(); } }
315                }
316    
317            this._length = l;
318    
319            // Condition use of the prev value.
320            Name p = putativePrev;
321            // If the prev chain is already maximum length
322            // then try going back at least one step
323            // which may still be of some help with compression.
324            // We may in any case always look back for the oldest good-enough prev
325            // to help trim memory use and preserve elbow room.
326            //
327            // This treats suffix matching as a secondary opportunistic task
328            // and we'll optimise any prefix by preference
329            // unless the initial prefix is shorter than the initial suffix.
330            if(null != p)
331                {
332                final int initialPrefixMatchLength = computeCommonPrefixLengthPossibleMinInitialBlock(text, p);
333                final int initialSuffixMatchLength = computeCommonSuffixLength(text, p);
334    
335                // Abandon it now now if the offered prev has no shared termini with us!
336                if((initialPrefixMatchLength == 0) && (initialSuffixMatchLength == 0))
337                    { p = null; }
338                else
339                    {
340                    if(ALWAYS_LOOK_FOR_EARLIER_PREV)
341                        {
342                        // Prefer prefix matching if better or same as suffix length.
343                        if(initialPrefixMatchLength >= initialSuffixMatchLength)
344                            {
345                            // Attempt to quickly find at-least-as-good recent prev
346                            // when there is a prefix match available.
347                            while((p != null) && (p.prev != null) &&
348                                    (computeCommonPrefixLengthPossibleMinInitialBlock(text, p.prev) >= initialPrefixMatchLength))
349                                { p = p.prev; }
350                            }
351                        else
352                            {
353                            // If no prefix match was available
354                            // then try to find the shortest chain while retaining the current prefix.
355                            while((p != null) && (p.prev != null) &&
356                                    (computeCommonSuffixLength(text, p.prev) >= initialSuffixMatchLength))
357                                { p = p.prev; }
358                            }
359                        }
360    
361                    final int ppCL = p.getPrevChainLength();
362                    if(EXTRA_ASSERTS) { assert(ppCL >= 0); }
363                    if(EXTRA_ASSERTS) { assert(ppCL <= MAX_PREV_CHAIN_LENGTH); }
364                    if(ppCL >= MAX_PREV_CHAIN_LENGTH)
365                        {
366    if(DEBUG) { System.out.println("limiting chain length"); }
367                        // Go back at least one step; further if it doesn't hurt prefix match length much.
368                        // This should give headroom to build a new useful near-maximal-length prefix.
369                        final int targetMatchLength = Math.max(initialPrefixMatchLength / 2, MIN_SHARE_CHARS);
370                        do { p = p.prev; }
371                        while((p != null) && (p.prev != null) &&
372                                ((p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) ||
373                                        (computeCommonPrefixLength(text, p.prev) >= targetMatchLength)));
374                        }
375                    // Ensure that any prev that we now have has a short enough chain length to tag on to.
376                    if(EXTRA_ASSERTS) { assert((p == null) || (p.getPrevChainLength() < MAX_PREV_CHAIN_LENGTH)); }
377                    }
378                }
379            // Find how much of prev is actually in common with the new text.
380            int sharedPrefixLen = (null == p) ? 0 : computeCommonPrefixLength(text, p);
381            // Only store/use any putative prev if there is in fact some common prefix/suffix string.
382            if(null != p)
383                { p = ((sharedPrefixLen > 0) || (computeCommonSuffixLength(text, p) > 0)) ? p : null; }
384            if(EXTRA_ASSERTS) { assert((sharedPrefixLen == 0) || (p != null)); } // No shared prefix possible without a non-null prev.
385            if(EXTRA_ASSERTS) { assert(getPrevChainLength() <= MAX_PREV_CHAIN_LENGTH); }
386    
387            // Now extract any matching suffix from the tail residue after extracting a maximal prefix.
388            int tailLen = l - sharedPrefixLen;
389            int residualSharedSuffixLen = (null == p) ? 0 : computeCommonSuffixLength(text.subSequence(sharedPrefixLen, l), p.subSequence(sharedPrefixLen, p._length));
390            final int putativeSharedLength = sharedPrefixLen + residualSharedSuffixLen;
391    
392            // If not enough shared length to justify a link to prev
393            // then avoid any such sharing...
394            if(putativeSharedLength < MIN_SHARE_CHARS)
395                {
396                p = null; // Reset to no sharing.
397                sharedPrefixLen = 0; // Reset to no sharing.
398                residualSharedSuffixLen = 0; // Reset to no sharing.
399                tailLen = l; // Reset to no sharing.
400                }
401    
402            // No record the sharing that we are going to use.
403            prev = p;
404            // Now pack the termini lengths in together.
405            terminiLengths = (short) (sharedPrefixLen | (residualSharedSuffixLen << PREFIX_BITS));
406            // Set up the fast-access shadow prefix-length value.
407            _prefixLen = (short) sharedPrefixLen;
408    
409            if(EXTRA_ASSERTS) { assert(getPrefixLen() == sharedPrefixLen) /* : "getPrefixLen()="+getPrefixLen()+", sharedPrefixLen="+sharedPrefixLen */ ; }
410            if(EXTRA_ASSERTS) { assert(getSuffixLen() == residualSharedSuffixLen); }
411            // We can't claim more shared chars than exist in the 'prev' value!
412            if(EXTRA_ASSERTS) { assert(sharedPrefixLen + residualSharedSuffixLen <= ((p == null) ? 0 : p._length)) : ("text="+text+", prev="+p); }
413    
414            // Compute size of middle (unshared) portion.
415            final int midLen = tailLen - residualSharedSuffixLen;
416    
417            if(EXTRA_ASSERTS) { assert(midLen >= 0); }
418            if(EXTRA_ASSERTS) { assert(sharedPrefixLen + midLen + residualSharedSuffixLen == text.length()); }
419    
420            if(midLen == 0)
421                {
422                // Avoid any allocation for the tail if no middle bytes.
423                mid = EMPTIES.EMPTY_MID;
424                }
425            // Usual case: copy the non-shared middle bytes.
426            else
427                {
428                final byte t[] = new byte[midLen];
429                for(int i = midLen; --i >= 0; )
430                    { t[i] = (byte) text.charAt(i + sharedPrefixLen); }
431                mid = t;
432                }
433    
434    if(DEBUG) { System.out.println("prefixLen|midLen|suffixLen = " + sharedPrefixLen+"|"+midLen+"|"+residualSharedSuffixLen + " from "+text.subSequence(0, sharedPrefixLen)+"|"+text.subSequence(sharedPrefixLen, sharedPrefixLen+midLen)+"|"+text.subSequence(sharedPrefixLen+midLen, text.length())+", prevChainLength = " + getPrevChainLength()); }
435    //if(DEBUG && (sharedPrefixLen == 0) && (residualSharedSuffixLen != 0)) { System.out.println("***Shared-suffix-only (suffix len "+residualSharedSuffixLen+") Name instance created: "+text); }
436    
437    if(EXTRA_ASSERTS) { assert((prev == null) || (sharedPrefixLen + residualSharedSuffixLen >= MIN_SHARE_CHARS)); }
438    
439            // Verify what we've been given.
440            try { validateObject(); }
441            catch(final InvalidObjectException e)
442                { throw new IllegalArgumentException(e); }
443            }
444    
445        /**Compute the shared/common prefix length forced to zero if initial contiguous block not large enough.
446         * A minimum initial contiguous prefix may be insisted on for efficiency,
447         * by heuristically reducing (logical) recursion and increasing the chunk size that can be worked on,
448         * at least at the very start of a Name, eg for when doing equals()/compareTo() and the like,
449         * possibly at a slight cost in compression effectiveness.
450         */
451        private static int computeCommonPrefixLengthPossibleMinInitialBlock(final CharSequence text, final Name p)
452            {
453            final int sharedPrefixLen = computeCommonPrefixLength(text, p);
454            if(MIN_FIRST_BLOCK_CHARS > 0)
455                {
456                if(sharedPrefixLen < MIN_FIRST_BLOCK_CHARS) { return(0); }
457                if(byteAtInner(p, 0) < MIN_FIRST_BLOCK_CHARS) { return(0); }
458                }
459            return(sharedPrefixLen);
460            }
461    
462        /**Compute the shared/common prefix length. */
463        private static int computeCommonPrefixLength(final CharSequence text, final Name p)
464            {
465            int sharedPrefixLen = 0;
466            final int l = Math.min(Math.min(text.length(), p._length), MAX_PREFIX_LENGTH);
467            for(int i = 0; i < l; ++i, ++sharedPrefixLen)
468                { if(p.charAt(i) != text.charAt(i)) { break; } }
469            if(EXTRA_ASSERTS) { assert(sharedPrefixLen >= 0); }
470            return(sharedPrefixLen);
471            }
472    
473        /**Compute the shared/common suffix length.
474         * Note that we may have to match against some tail of the prev text
475         * (to avoid overlapping prefix/suffix parts)
476         * and thus the p parameter is the more general CharSequence.
477         */
478        private static int computeCommonSuffixLength(final CharSequence text, final CharSequence p)
479            {
480            int sharedSuffixLen = 0;
481            final int tLen = text.length();
482            final int pLen = p.length();
483            final int l = Math.min(Math.min(tLen, pLen), MAX_SUFFIX_LENGTH);
484            for(int i = 0; i < l; ++i, ++sharedSuffixLen)
485                { if(p.charAt(pLen - i - 1) != text.charAt(tLen - i - 1)) { break; } }
486            if(EXTRA_ASSERTS) { assert(sharedSuffixLen >= 0); }
487            if(EXTRA_ASSERTS) { assert((sharedSuffixLen == l) || (p.charAt(pLen - sharedSuffixLen - 1) != text.charAt(tLen - sharedSuffixLen - 1))) : "returned match must be maximal length"; }
488            return(sharedSuffixLen);
489            }
490    
491        /**Cache of the computed hash code.
492         * Not part of the permanent/serialised state of the object since easy to recompute,
493         * but this is called so often by intern(), etc, that it is worth storing
494         * given that typical texts will be long.
495         */
496        private transient int hash; // Default to 0
497    
498        /**The hash is computed over the entire text as for String.
499         * This does not depend on how the data is internally stored, eg using a prefix or not.
500         */
501        @Override
502        public final int hashCode()
503            {
504            int h = hash;
505            if(h == 0) // Probably not yet computed, so do so now.
506                {
507                // Hash needs to be (re)computed.
508                final int len = _length;
509                for(int i = 0; i < len; i++)
510                    { h = 31*h + charAt(i); }
511                hash = h; // Cache the result.
512                }
513            return(h);
514            }
515    
516        /**Equality depends on the items being the same concrete (derived) type and lexically identical.
517         * This does not depend on how the data is internally stored, eg using a prefix or not.
518         * Specifically designed to work for derived classes too by default.
519         */
520        @Override
521        public final boolean equals(final Object obj)
522            {
523            if(this == obj) { return(true); }
524            if(null == obj) { return(false); }
525            if(getClass() != obj.getClass()) { return(false); }
526            //return(TextUtils.contentEquals(this, (Name) obj));
527            final Name o = (Name) obj;
528            final int l = length();
529            if(o._length != l) { return(false); }
530    
531            final int[] args = new int[2]; // Allocate just once to minimise heap churn.
532            for(int i = 0; i < l; )
533                {
534                // Do this in chunks for efficiency...
535                final byte[] raw1 = bytesAtInner(this, i, args);
536                final int offset1 = args[0];
537                final int length1 = args[1];
538                final byte[] raw2 = bytesAtInner(o, i, args);
539                final int offset2 = args[0];
540                final int length2 = args[1];
541    
542                final int ll = Math.min(length1, length2);
543                for(int j = 0; j < ll; ++j)
544                    { if(raw1[offset1 + j] != raw2[offset2 + j]) { return(false); } }
545    
546                i += ll;
547                }
548    
549            return(true); // Identical content.
550            }
551    
552        /**True iff this represents the same sequence of char values as the specified sequence.
553         * @param  other  the sequence to compare this against; never null.
554         *
555         * @return  {@code true} if this represents the same
556         *          sequence of char values as the specified sequence,
557         *          {@code false} otherwise
558         */
559        public final boolean contentEquals(final CharSequence8Bit o)
560            { return(TextUtils.contentEquals(this, o)); }
561    
562        /**True iff this represents the same sequence of char values as the specified sequence.
563         * @param  other  the sequence to compare this against; never null.
564         *
565         * @return  {@code true} if this represents the same
566         *          sequence of char values as the specified sequence,
567         *          {@code false} otherwise
568         */
569        public final boolean contentEquals(final CharSequence o)
570            { return(TextUtils.contentEquals(this, o)); }
571    
572    
573        /**Non-empty item with which we share a prefix and/or suffix, or null if entire value is empty (equivalent to "") or if there is no shared part(s).
574         * May be non-empty if this new value is a prefix/suffix of the prev value.
575         * <p>
576         * Is protected since potentially to derived instances.
577         */
578        protected final Name prev;
579    
580        /**Termini lengths (unsigned) packed into a single value for efficiency.
581         * The prefix bits are the low-order bits.
582         * <p>
583         * This is part of the compact serialised state.
584         */
585        private final short terminiLengths;
586    
587        /**This is a logically-final shadow of the most-frequently accessed prefix length value; non-negative.
588         * This contains a copy of the prefix length.
589         * <p>
590         * Should not expand the memory footprint of an instance
591         * because it should slot in the gap left by the terminiLengths short value.
592         * <p>
593         * This is not part of the serialised state.
594         */
595        private transient /* final */ short _prefixLen;
596    
597        /**This is a logically-final shadow computed value for the very-frequently accessed length; non-negative.
598         * This is not part of the serialised state.
599         */
600        private transient /* final */ int _length;
601    
602        /**Middle portion of value, or a constant empty value iff there are no middle bytes; never null.
603         * We don't use default serialisation for this:
604         * it is written on the wire as raw bytes (if non-empty, else not at all)
605         * following a one-byte (positive) length if <= 127 chars,
606         * else a 4-byte (negated) length to be able to represent maximal-size CharSequences.
607         */
608        private transient /* final */ byte[] mid;
609    
610        /**Used to hold values that may be needed before Name's own static initialisation can have completed.
611         * This can happen with circular initialisation dependencies.
612         */
613        private static final class EMPTIES
614            {
615            /**Constant/immutable/shareable empty mid value for any empty mid-section; never null. */
616            static final byte[] EMPTY_MID = new byte[0];
617            }
618    
619        /**Chain depth, ie how deep our chain of prev values is, zero if this instance has no prev; non-negative.
620         * Can be used to help avoid getting silly tangles of non-GC-able prev chains,
621         * and indeed might help avoid situations that might attempt to create circular references.
622         * <p>
623         * Public for white-box testing only.
624         */
625        public final int getPrevChainLength()
626            {
627            int len = 0;
628            for(Name p = this; null != (p = p.prev); ++len) { }
629            return(len);
630            }
631    
632        /**Returns number of internal contiguous blocks to cover all the text, fewer being better; non-negative.
633         * Intended for performance tuning/testing only; may not be a permanent nor supported part of the API.
634         */
635        public int getBlockCount()
636            {
637            int count = 0;
638            final int l = _length;
639            final int[] args = new int[2]; // Allocate just once to minimise heap churn.
640            for(int i = 0; i < l; )
641                {
642                ++count;
643                // Do this in chunks for efficiency...
644                final byte[] raw1 = bytesAtInner(this, i, args);
645                final int offset1 = args[0];
646                final int length1 = args[1];
647                assert(length1 > 0);
648                i += length1;
649                }
650            return(count);
651            }
652    
653        /**Human-readable diagnostic of internal format; never null.
654         * Intended for performance tuning only; not a permanent nor supported part of the API.
655         */
656        public String showInternalStructure()
657            {
658            final StringBuilder sb = new StringBuilder(128);
659            final int l = _length;
660            sb.append("length=").append(l).append(", p=").append(_prefixLen).append(", s=").append(getSuffixLen()).
661               append(", depth=").append(getPrevChainLength()).
662               append(", prev=").append(System.identityHashCode(prev)).append("; ");
663            final int[] args = new int[2]; // Allocate just once to minimise heap churn.
664            for(int i = 0; i < l; )
665                {
666                // Do this in chunks for efficiency...
667                final byte[] raw1 = bytesAtInner(this, i, args);
668                final int offset1 = args[0];
669                final int length1 = args[1];
670                assert(length1 > 0);
671                sb.append("segment @").append(i).append(", length=").append(length1).append(", offset=").append(offset1).append(", ");
672                sb.append("text=>>>").append(new CS8Bit(raw1, offset1, offset1+length1)).append("<<<, ");
673                i += length1;
674                }
675            return(sb.toString());
676            }
677    
678        /**Length of common prefix with prev item if any; non-negative.
679         * Only of minimal interest outside the class (for white-box testing) and may be withdrawn from the API.
680         */
681        public final int getPrefixLen()
682            {
683            return(_prefixLen); // return(terminiLengths & PREFIX_MASK); // Bottom bits.
684            }
685    
686        /**Length of common suffix with prev item if any; non-negative.
687         * Only of minimal interest outside the class (for white-box testing) and may be withdrawn from the API.
688         */
689        public final int getSuffixLen()
690            {
691            return((terminiLengths >>> PREFIX_BITS) & SUFFIX_MASK); // Uses all remaining most-significant bits.
692            }
693    
694        /**Convert to String containing same char sequence.
695         * We do not expensively intern() the result as it may be short-lived.
696         */
697        //@Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
698        @Override public String toString()
699            {
700            final int l = _length;
701            final char[] text = new char[l];
702            final int[] args = new int[2]; // Allocate just once to minimise heap churn.
703            for(int i = 0; i < l; )
704                {
705                // Do this in chunks for efficiency...
706                final byte[] raw1 = bytesAtInner(this, i, args);
707                final int offset1 = args[0];
708                final int length1 = args[1];
709                for(int j = 0; j < length1; ++j, ++i)
710                    { text[i] = (char) (raw1[offset1 + j] & 0xff); }
711                }
712            return(new String(text));
713            }
714    
715        /**Writes the Name as ASCII bytes to the given (non-null) byte array starting at the given offset.
716         * The array must be long enough (from the given offset) else an exception will be thrown.
717         * <p>
718         * Aims to be efficient (in CPU and memory footprint).
719         */
720        public final void writeToByteArray(final byte[] buf, final int offset)
721            {
722            if(null == buf) { throw new IllegalArgumentException(); }
723            if(offset < 0) { throw new IllegalArgumentException(); }
724            final int l = _length;
725            if(offset + l > buf.length) { throw new IllegalArgumentException(); }
726    
727            final int[] args = new int[2]; // Allocate just once to minimise heap churn.
728            for(int i = 0; i < l; )
729                {
730                // Do this in chunks for efficiency...
731                final byte[] raw1 = bytesAtInner(this, i, args);
732                final int offset1 = args[0];
733                final int length1 = args[1];
734                System.arraycopy(raw1, offset1, buf, offset+i, length1);
735                i += length1;
736                }
737            }
738    
739        /**Create private byte[] copy for caller; never null.
740         * Aims to be efficient (in CPU and memory footprint).
741         */
742        public final byte[] toByteArray()
743            {
744            final byte[] result = new byte[_length];
745            writeToByteArray(result, 0);
746            return(result);
747            }
748    
749        /**At the given index returns the raw byte array and index and usable length for contiguous bytes from that index; never null.
750         * This gives direct access to underlying byte/character data in the largest lumps available,
751         * to facilitate more efficient multi-character operations (eg with less recursion).
752         * <p>
753         * This returns internal arrays/state which must not be passed outside the class.
754         *
755         * @param index  must be within bounds for this Name; non-negative
756         * @param offsetAndLength  2-element array to return the offset (index 0) and length (index 1); never null
757         * @return non-null byte[] with in-bounds offset and positive length of usable character from that offset.
758         */
759        protected static byte[] bytesAtInner(Name name, int index, final int[] offsetAndLength)
760            {
761            int maxLen = name._length - index; // Maximum length that the returned segment can possibly be.
762            for( ; ; )
763                {
764                final short pLen = name._prefixLen;
765    
766                // Drill into the shared prefix, if any.
767                if(index < pLen)
768                    {
769                    maxLen = Math.min(maxLen, pLen - index);
770    //                if(EXTRA_ASSERTS) { assert(maxLen > 0); }
771                    name = name.prev;
772                    continue;
773                    }
774    
775                // Drill into the shared suffix, if any.
776                if(index >= pLen + name.mid.length)
777                    {
778                    // Recurse to fetch from further up the chain.
779                    //final int length = sStart + name.getSuffixLen(); // length();
780    //                if(EXTRA_ASSERTS) { assert(index < name._length); }
781                    final int reverseIndex = name._length - index;
782    //                if(EXTRA_ASSERTS) { assert(reverseIndex > 0); }
783                    maxLen = Math.min(maxLen, reverseIndex);
784    //                if(EXTRA_ASSERTS) { assert(maxLen > 0); }
785                    name = name.prev;
786                    index = name._length - reverseIndex;
787                    continue;
788                    }
789    
790                // Point into the (unshared) mid text, which is nice and fast...
791                final int offset = index - pLen;
792                offsetAndLength[0] = offset;
793                final int len = name.mid.length - offset;
794    //            if(EXTRA_ASSERTS) { assert(len > 0); }
795                offsetAndLength[1] = Math.min(maxLen, len);
796    //            if(EXTRA_ASSERTS) { assert(maxLen > 0); }
797                return(name.mid);
798                }
799            }
800    
801        /**Internal fast version of charAt()/byteAt() with args/bounds checking assumed done. */
802        protected static byte byteAtInner(Name name, int index)
803            {
804            for( ; ; )
805                {
806                final int pLen = name._prefixLen;
807    
808                // Drill into the shared prefix, if any.
809                if(index < pLen)
810                    {
811                    name = name.prev;
812                    continue;
813                    }
814    
815                // Drill into the shared suffix, if any.
816                if(index >= pLen + name.mid.length)
817                    {
818    //                if(EXTRA_ASSERTS) { assert(index < length); }
819                    final int reverseIndex = name._length - index;
820    //                if(EXTRA_ASSERTS) { assert(reverseIndex > 0); }
821                    name = name.prev;
822                    index = name._length - reverseIndex;
823                    continue;
824                    }
825    
826                // Extract from the (unshared) mid text.
827                return(name.mid[index - pLen]);
828                }
829            }
830    
831        //@Override
832        public final byte byteAt(final int index)
833            {
834            // Bounds check for public interface.
835            if((index < 0) || (index >= _length)) { throw new IllegalArgumentException(); }
836            return(byteAtInner(this, index));
837            }
838    
839        public char charAt(final int index)
840            //{ return((char) (byteAt(index) & 0xff)); }
841            {
842            // Bounds check for public interface.
843            if((index < 0) || (index >= _length)) { throw new IllegalArgumentException(); }
844            return((char) (byteAtInner(this, index) & 0xff));
845            }
846    
847        //@Override
848        public final int length()
849            //{ return(_prefixLen + mid.length + getSuffixLen()); }
850            { return(_length); }
851    
852        /**Effectively-immutable light-weight wrapper around the underlying text.
853         * Can be intern()ed to help eliminate some expensive duplicates.
854         */
855        private static class CSWrapper<T extends Name> implements CharSequence8Bit,
856                                                                  Comparable<CharSequence>,
857                                                                  MemoryTools.Internable
858    
859            {
860            /**Underlying instance; non-null.
861             * Definitely not intended to be serialised.
862             */
863            transient final T underlying;
864            /**Start offset (inclusive) on underlying impl; non-negative. */
865            final int start;
866            /**End offset (exclusive) on underlying impl; non-negative. */
867            final int end;
868    
869            /**Create an instance.
870             * Only called from trusted classes so arg-checking can be by assert() to save release-build CPU cycles.
871             * @param underlying  the Name instance that this provides a view of; never null
872             * @param start  inclusive start index; in range [0,end]
873             * @param end  exclusive end index; in range [start,underlying.length()]
874             */
875            CSWrapper(final T underlying, final int start, final int end)
876                {
877                if(EXTRA_ASSERTS)
878                    {
879                    assert(null != underlying);
880                    assert(start >= 0);
881                    assert(end <= underlying.length());
882                    assert(start <= end);
883                    }
884    
885                this.underlying = underlying;
886                this.start = start;
887                this.end = end;
888                }
889    
890            /* (non-Javadoc)
891             * @see java.lang.CharSequence#charAt(int)
892             */
893            //@Override
894            public char charAt(final int index)
895                { return((char) (byteAt(index) & 0xff)); }
896    
897            //@Override
898            public byte byteAt(final int index)
899                {
900                if(index < 0) { throw new IllegalArgumentException(); }
901                if(index >= (end-start)) { throw new IllegalArgumentException(); }
902                return(underlying.byteAt(index + start));
903                }
904    
905            /* (non-Javadoc)
906             * @see java.lang.CharSequence#length()
907             */
908            //@Override
909            public int length()
910                {
911                return(end-start);
912                }
913    
914            /* (non-Javadoc)
915             * @see java.lang.CharSequence#subSequence(int, int)
916             */
917            //@Override
918            public CharSequence subSequence(final int start, final int end)
919                {
920                // We validate the parameters here since our constructor will not.
921                if(start < 0) { throw new IllegalArgumentException(); }
922                if(start > end) { throw new IllegalArgumentException(); }
923                if(end > length()) { throw new IllegalArgumentException(); }
924                if(EXTRA_ASSERTS) { assert(this.start + end <= underlying.length()); }
925                // Creates new wrapper around original underlying text for efficiency,
926                // ie not chained through this CSWrapper instance.
927                return(new CSWrapper<T>(underlying, this.start + start, this.start + end));
928                }
929    
930            /**Equality checks for the same type and content. */
931            @Override public boolean equals(final Object obj)
932                {
933                if(this == obj) { return(true); }
934                if(!(obj instanceof CSWrapper)) { return(false); }
935                return(TextUtils.contentEquals(this, (CharSequence8Bit)obj));
936                }
937    
938            /**The hash code is based on a fixed-time sampling of the content for speed. */
939            @Override public int hashCode()
940                {
941                final int l = length();
942                int h = l;
943                final int step = (l >>> 3);
944                for(int i = l; --i >= 0; i -= step) // Sample at least the last character.
945                    { h = 37*h + charAt(i); }
946                h -= l;
947                return(h);
948                }
949    
950            //@Override
951            public int compareTo(final CharSequence other)
952                { return(TextUtils.compare(this, other)); }
953    
954            /**Convert to String containing same char sequence.
955             * We don't expensively intern() the result as it may be short-lived.
956             */
957            @Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
958    
959            /**Create private byte[] copy for caller; never null. */
960            public final byte[] toByteArray()
961                {
962                final int length = length();
963                final byte[] result = new byte[length];
964                // Too hideous to fully optimise (yet) so do the simple way.
965                for(int i = length; --i >= 0; )
966                    { result[i] = underlying.byteAt(i + start); }
967                return(result);
968                }
969            }
970    
971        /**Maximum number of chars in a subsequence where a copy of the data in taken. */
972        private static final int MAX_TRIVIAL_COPY = 8;
973    
974        /**Generate a light-weight wrapper, or for very small amounts of data, a copy; never null.
975         * Not necessarily Serializable.
976         */
977        //@Override
978        public final CharSequence8Bit subSequence(final int start, final int end)
979            {
980            if(start < 0) { throw new IndexOutOfBoundsException(); }
981            final int length = length();
982            if(end > length) { throw new IndexOutOfBoundsException(); }
983            if(start > end) { throw new IndexOutOfBoundsException(); }
984    
985            final int newLength = end - start;
986    
987            // If this is the whole existing value then return it.
988            if((start == 0) && (length == newLength)) { return(this); }
989    
990            // In trivial empty case return fixed empty value.
991            if(newLength == 0) { return(EMPTY); }
992    
993            // For trivial small subsequences,
994            // make a copy of the relevant data,
995            // eg where we wouldn't want to pin down the possibly-large original
996            // for something not even worthy of a pointer.
997            // We don't intern() so as to avoid high overheads.
998            if(newLength <= MAX_TRIVIAL_COPY)
999                { return(new CS8Bit(this, start, end)); }
1000    
1001            // TODO: optimisation: if this is a prefix then return as a prefix instance.
1002    
1003            // GENERAL CASE: make a light-weight wrapper.
1004            return(new CSWrapper<Name>(this, start, end));
1005            }
1006    
1007    
1008        /**Validate fields/state.
1009         * Called in the constructor and possibly after deserialising.
1010         * <p>
1011         * Barf if something bad is found.
1012         */
1013        public void validateObject()
1014            throws InvalidObjectException
1015            {
1016            // Test some invariants that duff external data should not be able to influence.
1017            if(EXTRA_ASSERTS) { assert(null != mid); }
1018            if(EXTRA_ASSERTS) { assert((mid.length != 0) || (mid == EMPTIES.EMPTY_MID)); }
1019            if(EXTRA_ASSERTS) { assert(_length >= mid.length); }
1020    
1021            final int prefixLen = _prefixLen;
1022            if(EXTRA_ASSERTS) { assert(getPrefixLen() == _prefixLen); }
1023            final int suffixLen = getSuffixLen();
1024            if(EXTRA_ASSERTS) { assert((prefixLen >= 0) && (suffixLen >= 0)); }
1025            final int sharedLen = prefixLen + suffixLen;
1026            if(EXTRA_ASSERTS) { assert(sharedLen + mid.length == this._length); }
1027            if(prev != null)
1028                {
1029                if(sharedLen > prev._length)
1030                    { throw new InvalidObjectException("bad object: prefix/suffix too long for prev: shared="+sharedLen+" vs prev.length="+prev._length); }
1031                if(EXTRA_ASSERTS) { assert(getPrevChainLength() >= 0); }
1032                if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
1033                    { throw new InvalidObjectException("bad object: overlong prevChainLength: "+getPrevChainLength()); }
1034                // Prev must only be non-null if we have some sharing of prefix/suffix.
1035                // Note persisted values may have hash a different MIN_SHARE_CHARS at creation
1036                // so don't reject something simply because of 'not enough' sharing by current standards.
1037                if(sharedLen == 0)
1038                    { throw new InvalidObjectException("bad object: invalid unused prev"); }
1039                }
1040            else
1041                {
1042                if(EXTRA_ASSERTS) { assert(getPrevChainLength() == 0); }
1043                if(sharedLen != 0)
1044                    { throw new InvalidObjectException("bad object: missing prev needed for shared text"); }
1045                }
1046            if(Integer.MAX_VALUE - prefixLen - suffixLen < _length)
1047                { throw new InvalidObjectException("bad object: overlong components"); }
1048            }
1049    
1050        /**Deserialise.
1051         * Expects validation to happen in readResolve().
1052         */
1053        private void readObject(final ObjectInputStream in)
1054            throws IOException, ClassNotFoundException
1055            {
1056            // Read in most of the fields in the usual way.
1057            in.defaultReadObject();
1058    
1059            // Set up the fast-access shadow prefix-length value.
1060            final int pLen = (terminiLengths & PREFIX_MASK);
1061            this._prefixLen = (short) pLen;
1062    
1063            // Read in the mid text with a variable-length 'length' prefix.
1064            // This mechanism also ensures that our 'mid' element is unshared.
1065            final byte l0 = in.readByte();
1066            final int midLen;
1067            if(l0 >= 0) { midLen = l0; }
1068            else
1069                {
1070                final byte l1 = in.readByte();
1071                final byte l2 = in.readByte();
1072                final byte l3 = in.readByte();
1073                midLen = -((l0 << 24) + ((l1 & 0xff) << 16) + ((l2 & 0xff) << 8) + (l3 & 0xff));
1074                }
1075            // Create and read the text field; don't create a new empty byte[] for size 0.
1076            if(midLen == 0) { mid = EMPTIES.EMPTY_MID; }
1077            else
1078                {
1079                mid = new byte[midLen];
1080                in.readFully(mid);
1081                }
1082    
1083            // Set up the fast-access length value.
1084            this._length = pLen + midLen + getSuffixLen();
1085    
1086            // IMPORTANT: this instance is now safe to use
1087            // but may not be fully valid (eg prev chain length may be too long).
1088            // such issues MUST be handled in read-resolve.
1089            }
1090    
1091        /**Write out a minimally-redundant form of our internal information.
1092         * The more-efficient on-the-wire format also makes defensive
1093         * reading easier.
1094         */
1095        private void writeObject(final ObjectOutputStream oos)
1096            throws IOException
1097            {
1098            // Check if we're about to persist a bogus value.
1099            if(EXTRA_ASSERTS) { validateObject(); }
1100    
1101            // Write out most of the fields in the usual way.
1102            oos.defaultWriteObject();
1103    
1104            // Write the length in a variable-length format
1105            // (very short strings require fewer length bytes).
1106            // For very small strings use a byte-value length.
1107            if(mid.length <= Byte.MAX_VALUE) { oos.writeByte(mid.length); }
1108            // Write larger text sizes using a negative (int, 4-byte) length
1109            // so that reading the first byte will give a negative value.
1110            else { oos.writeInt(-mid.length); }
1111            // Now write the text bytes (if any) directly, unwrapped.
1112            if(0 != mid.length) { oos.write(mid); }
1113            }
1114    
1115        /**Deserialise: validate this instance, coerce mildly-errant values, and eliminate duplicates.
1116         * Has to handle derived classes too.
1117         */
1118        protected Object readResolve()
1119            throws ObjectStreamException
1120            {
1121            // In the case where the prev chain length on the wire is too large for current settings,
1122            // dynamically generate a new valid replacement (same content, different/no prev) here.
1123            if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
1124                {
1125    if(IsDebug.isDebug) { System.err.println("WARNING: replacing deserialised Name item that has an overly-long chain: "+getPrevChainLength()+" vs "+MAX_PREV_CHAIN_LENGTH); }
1126                // Attempt to retain some of the sharing if possible.
1127                Name p;
1128                for(p = prev; (p != null) && (p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH); p = p.prev) { }
1129    
1130                // FIXME: nasty interdependency between base and derived classes here...
1131                if(ExhibitFull.class.equals(getClass()))
1132                    { return(ExhibitFull.create(new CS8Bit(this), p)); }
1133                return(Name.create(new CS8Bit(this), p));
1134                }
1135    
1136            // Check that this instance is valid.
1137            validateObject();
1138    
1139            // Eliminate duplicates while deserialising.
1140            return(MemoryTools.intern(this));
1141            }
1142    
1143        /**Unique serialisation ID. */
1144        private static final long serialVersionUID = 2798109831174584816L;
1145    
1146    
1147        /**ExhibitShortName immutable CharSequence light-weight wrapper around underlying ExhibitFull data.
1148         * Though slow, can be used as Map/Set/etc element,
1149         * as they have a working (though inefficient) consistent hashCode() and equals(),
1150         * but it is much preferred (more efficient/fast) to use ExhibitFull as a key.
1151         */
1152        public static final class ExhibitShort extends CSWrapper<ExhibitFull> implements CharSequence7Bit
1153            {
1154            /**Create an instance.
1155             * Only intended to be created by an ExhibitFull instance.
1156             */
1157            private ExhibitShort(final ExhibitFull underlying, final int start, final int end)
1158                {
1159                super(underlying, start, end);
1160                if(EXTRA_ASSERTS) { assert(ExhibitName.validNameFinalComponentSyntax(this)); }
1161                }
1162    
1163            /**Retrieve full name for this short name; never null. */
1164            public ExhibitFull getFullName() { return(underlying); }
1165    
1166            /**Extract the main words (stem) component; never null nor empty.
1167             * As ExhibitName.getMainWordsComponent() but optimised for this common case,
1168             * with the result content identical to that from getMainWordsComponent(getFullName()).
1169             * <p>
1170             * The result is intern()able to help conflate identical values if required,
1171             * ie has a working hashCode() and equals(),
1172             * which may save space and accelerate searching for example.
1173             *
1174             * @param allAttrWords  a Set of all legal attribute words (String values);
1175             *     may be empty but not null
1176             */
1177            public CharSequence8Bit getMainWordsComponent(final Set<String> allAttrWords)
1178                {
1179                final int endOfAttrWords = ExhibitName.getEndOfAttrWords(this);
1180                final int endOfMainWords = ExhibitName.getEndOfMainWords(this, -1, endOfAttrWords, allAttrWords);
1181                final CharSequence8Bit result = new CSWrapper<ExhibitFull>(underlying, start, start + endOfMainWords);
1182                // Should be identical result to getMainWordsComponent() on full name.
1183    //            assert(TextUtils.contentEquals(result, ExhibitName.getMainWordsComponent(underlying, allAttrWords)));
1184                return(result);
1185                }
1186    
1187            /**Equality checks for the same type and content.
1188             * Will return true in the case of an identical short name of a different full name.
1189             */
1190            @Override public boolean equals(final Object obj)
1191                {
1192                if(this == obj) { return(true); }
1193                if(!(obj instanceof Name.ExhibitShort)) { return(false); }
1194                return(TextUtils.contentEquals(this, (Name.ExhibitShort)obj));
1195                }
1196    
1197            /**The hash code is based on a fixed-time sampling of the content for speed. */
1198            @Override public int hashCode()
1199                {
1200                final int l = length();
1201                int h = l;
1202                final int step = 1 + (l >>> 3);
1203                for(int i = l; (i -= step) > 0; )
1204                    { h = 37*h + byteAt(i); }
1205                h -= l;
1206                h = (31*h) ^ byteAt(0); // Always include the first char in the hash as typically contains a reasonable spread of values.
1207                return(h);
1208                }
1209    
1210            /**Convert to String containing same char sequence; never null nor empty.
1211             * Automatically intern() the String result to avoid duplication.
1212             */
1213            @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1214    
1215            /**Unique leading printable-ASCII character for persistableKey(), not a valid exhibit name char. */
1216            public static final char PERSISTABLE_NAME_LEADING_CHAR = '!';
1217    
1218            /**Length of persistable key in chars; strictly positive. */
1219            public static final int PERSISTABLE_NAME_LENGTH = 8;
1220    
1221            /**Persistable unique printable-ASCII-7 immutable CharSequence (CharSequence8Bit) key; never null nor empty.
1222             * This generates a key:
1223             * <ul>
1224             * <li>that is of fixed (small) length</li>
1225             * <li>that should be shorter than most actual exhibit short names thus representing a reduced memory footprint</li>
1226             * <li>that is quickly and easily distinguished from (and can be unambiguously mixed with) exhibit short (and full) names, by leading PERSISTABLE_NAME_LEADING_CHAR</li>
1227             * <li>that should in practice be unique across all short exhibit names and thus can be used as a substitute for a short name</li>
1228             * <li>depends only on the short name, not any other part of the full name</li>
1229             * <li>has a small amount of verification built in (second char of key is first char of short name)</li>
1230             * <li>that is pure printable 7-bit ASCII</li>
1231             * <li>does not contain any ASCII control characters or whitespace (&le;32)</li>
1232             * <li><em>not</em> intern()ed here because a key is small enough for the overhead to be potentially prohibitive</li>
1233             * </ul>
1234             * <p>
1235             * This may be slow to compute.
1236             */
1237            public CharSequence8Bit persistableKey()
1238                {
1239                final byte[] key = new byte[PERSISTABLE_NAME_LENGTH];
1240                key[0] = (byte) PERSISTABLE_NAME_LEADING_CHAR;
1241                key[1] = byteAt(0);
1242    
1243                // Extract a strong SHA-1 digest/hash of the bytes of the short name.
1244                // (Excludes the first char already explicitly in the key.)
1245                final MessageDigest digestor;
1246                try { digestor = MessageDigest.getInstance(CoreConsts.HASH_SHA1); }
1247                catch(final NoSuchAlgorithmException e) { throw new Error("Unable to locate hash impl: "+CoreConsts.HASH_SHA1, e); }
1248                for(int i = length(); --i > 0; ) { digestor.update(byteAt(i)); } // TODO: possibly replace with more efficient chunk-at-a-time form.
1249                final byte digest[] = digestor.digest();
1250                if(EXTRA_ASSERTS) { assert(digest != null); }
1251                if(EXTRA_ASSERTS) { assert(digest.length >= (PERSISTABLE_NAME_LENGTH - 2)); }
1252                // Convert digest to printable ~6.5-bits-per-char form
1253                // to retain as much digest as possible and thus minimise chance of key collision.
1254                for(int i = 0, ki = 2; ki < PERSISTABLE_NAME_LENGTH; i++)
1255                    {
1256                    int ib = digest[i] & 0x7f; // Basic 7-bit form.
1257                    // If result would be control/whitespace (or DEL),
1258                    // then translate it into the printable region,
1259                    // and incorporate the otherwise-discarded digest bit.
1260                    if(ib <= ' ')
1261                        { ib += (((digest[i] & 0x80) != 0) ? 64 : 94); }
1262                    else if(ib == 127) // Avoid DEL code.
1263                        { ib = ((digest[i] & 0x80) != 0) ? 63 : 93; }
1264                    if(EXTRA_ASSERTS) { assert((ib > ' ') && (ib < 127)); }
1265                    key[ki++] = (byte) ib; // Append digest char.
1266                    }
1267    
1268                return(new CS8Bit(key));
1269                }
1270            }
1271    
1272        /**ExhibitFullName holds a (ASCII-7) valid syntactically-valid full exhibit name.
1273         * This will only allow construction of instances representing a valid full exhibit name.
1274         * <p>
1275         * This mainly represents a guarantee that the data is a valid full exhibit name,
1276         * plus some utility methods/types for conveniently and efficiently accessing components of that name.
1277         * <p>
1278         * The constructors are private and only accessible via factory methods
1279         * to help with instance control,
1280         * and all instances are automatically intern()ed to eliminate duplicates.
1281         */
1282        public static final class ExhibitFull extends Name implements CharSequence7Bit
1283            {
1284            private ExhibitFull(final CharSequence name, final Name prev)
1285                {
1286                super(name, prev); // Encode/store the text.
1287    
1288                // Verify what we've been given.
1289                try { validateObject(); }
1290                catch(final InvalidObjectException e)
1291                    { throw new IllegalArgumentException(e.getMessage()); }
1292                }
1293    
1294            /**Create an ExhibitFull instance; never null.
1295             * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1296             * <p>
1297             * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1298             *
1299             * @param fullName  the (7-bit) syntactically-valid full exhibit name; never null nor empty
1300             *
1301             * @throws IllegalArgumentException  in the case of a null or invalid putative full name
1302             */
1303            public static ExhibitFull create(final CharSequence fullName)
1304                { return(create(fullName, null)); }
1305    
1306            /**Create an ExhibitFull instance with an optional previous value to attempt to share a prefix with; never null.
1307             * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1308             * <p>
1309             * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1310             *
1311             * @param fullName  the (7-bit) syntactically-valid full exhibit name; never null nor empty
1312             * @param prev  previous Name with which to attempt to share a prefix; null if none
1313             *
1314             * @throws IllegalArgumentException  in the case of a null or invalid putative full name
1315             */
1316            public static ExhibitFull create(final CharSequence fullName,
1317                                             final Name prev)
1318                {
1319                if(null == fullName) { throw new IllegalArgumentException("ExhibitFull Name cannot be null"); }
1320    
1321                if(ExhibitFull.class.equals(fullName.getClass()))
1322                    {
1323                    // Optimisation: if this is already an ExhibitFull (and not a sub-class) then return it as-is
1324                    // if no prev has been offered (that might share a longer prefix).
1325                    if(prev == null) { return((ExhibitFull) fullName); }
1326                    // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
1327                    else if(prev.contentEquals(fullName)) { return((ExhibitFull) prev); }
1328                    }
1329    
1330                final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
1331                // If no prev was specified we look for one in the ad-hoc cache.
1332                final Name prevToUse;
1333                if(prev != null) { prevToUse = prev; }
1334                else
1335                    {
1336                    // If we find an exact match with the right class in the ad-hoc cache then return it.
1337                    prevToUse = adHocPrefixCache.findPrev(fullName);
1338                    if((prevToUse != null) && (ExhibitFull.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(fullName))
1339                        { return((ExhibitFull) prevToUse); }
1340                    }
1341                // Return intern()ed value to eliminate accidental duplicates.
1342                // Pass it through the ad-hoc prefix cache too...
1343                final ExhibitFull result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new ExhibitFull(fullName, prevToUse)));
1344                if(EXTRA_ASSERTS) { assert(ExhibitFull.class.equals(result.getClass())); }
1345                return(result);
1346                }
1347    
1348            /**Create an ExhibitFull instance with an optional previous value to attempt to share a prefix with, with no intern()ing; never null.
1349             * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1350             * <p>
1351             * This uses minimal cleverness, no intern()ing, no use of prefix/suffix caches, minimal overheads, etc,
1352             * and so allows generation of completely fresh instances unconnected with any previous ones.
1353             * This may well not make optimal use of memory, and so is not usually recommended.
1354             *
1355             * @param fullName  the (7-bit) syntactically-valid full exhibit name; never null nor empty
1356             * @param prev  previous Name with which to attempt to share a prefix; null if none
1357             *
1358             * @throws IllegalArgumentException  in the case of a null or invalid putative full name
1359             */
1360            public static ExhibitFull createNoIntern(final CharSequence fullName,
1361                                                     final Name prev)
1362                {
1363                if(null == fullName) { throw new IllegalArgumentException("ExhibitFull Name cannot be null"); }
1364                final ExhibitFull result = new ExhibitFull(fullName, prev);
1365                return(result);
1366                }
1367    
1368            /**Cache of link to parent, lazily computed on demand.
1369             * Only accessed under instance lock so as to eliminate access races
1370             * and thus guarantee avoidance of creation of duplicates.
1371             * <p>
1372             * Not serialisable (because recomputable).
1373             */
1374            private transient ExhibitShort shortName;
1375    
1376            /**Extract strongly-typed ExhibitShortName, ie the filename component; never null.
1377             * This is a light-weight, non-intern()able CharSequence wrapper around the existing data.
1378             * <p>
1379             * This guarantees that only one ShortName instance can be generated for each FullName.
1380             * Thus intern()ing is neither needed nor supported.
1381             */
1382            public synchronized ExhibitShort getShortName()
1383                {
1384                ExhibitShort result = shortName;
1385                if(null != result) { return(result); }
1386                final int l = length();
1387                shortName = result = new ExhibitShort(this, TextUtils.lastIndexOf(this, '/', l - ExhibitName.MIN_FILENAME_LENGTH) + 1, l);
1388                return(result);
1389                }
1390    
1391            /**Create a "virtual" full name consisting of the category directory and then the file component; never null.
1392             * The results in a syntactically-valid full exhibit name,
1393             * but omits the internal directory "noise".
1394             * <p>
1395             * Returns this instance as-is if already 'virtual'.
1396             */
1397            public Name.ExhibitFull getVirtualExhibitName()
1398                {
1399                // Optimisation:
1400                // if there is only one slash, the name is already virtual,
1401                // so the name can be returned untouched.
1402                final int l = length();
1403                final int firstSlash = TextUtils.indexOf(this, '/', 1);
1404                final int lastSlash = TextUtils.lastIndexOf(this, '/', l - ExhibitName.MIN_FILENAME_LENGTH);
1405                if(firstSlash == lastSlash) { return(this); }
1406    
1407                // FIXME: avoid inefficient conversion via StringBuilder/char[].
1408                return(create(new StringBuilder(l - (lastSlash-firstSlash))
1409                    .append(subSequence(0, firstSlash+1))
1410                    .append(subSequence(lastSlash+1, l)),
1411                              this)); // Prefix and suffix should be entirely shareable with this...
1412                }
1413    
1414            /**Extract char at specified index.
1415             * This specialisation can take advantage of the fact that exhibit names are entirely 7-bit,
1416             * so there is no danger of sign extension in conversion from byte to int for comparison.
1417             */
1418            public final char charAt(final int index)
1419                //{ return((char) byteAt(index)); }
1420                {
1421                // Bounds check for public interface.
1422                if((index < 0) || (index >= length())) { throw new IllegalArgumentException(); }
1423                return((char) byteAtInner(this, index));
1424                }
1425    
1426            /**Provide natural sort order for this class.
1427             * This specialisation can take advantage of the fact that exhibit names are entirely 7-bit,
1428             * so there is no danger of sign extension in conversion from byte to int for comparison.
1429             */
1430            public final int compareTo(final Name.ExhibitFull o)
1431                {
1432                // Same instance compares equal...
1433                if(this == o) { return(0); }
1434                //return(TextUtils.compare(this, o));
1435    
1436                // Inlined and specialised for better performance...
1437                final int l1 = length();
1438                final int l2 = o.length();
1439                final int l = Math.min(l1, l2);
1440    
1441    //            if(l > 0)
1442                    {
1443                    final int[] args = new int[2]; // Allocate just once to minimise heap churn.
1444    
1445                    for(int i = 0; i < l; )
1446                        {
1447                        // Do this in chunks for efficiency...
1448                        final byte[] raw1 = bytesAtInner(this, i, args);
1449                        final int offset1 = args[0];
1450                        final int length1 = args[1];
1451                        final byte[] raw2 = bytesAtInner(o, i, args);
1452                        final int offset2 = args[0];
1453                        final int length2 = args[1];
1454    
1455                        final int ll = Math.min(l-i, Math.min(length1, length2));
1456    //if(EXTRA_ASSERTS) { assert(ll > 0); }
1457    
1458                        for(int j = 0; j < ll; ++j)
1459                            {
1460                            final int c1 = raw1[offset1 + j];
1461                            final int c2 = raw2[offset2 + j];
1462    //if(EXTRA_ASSERTS) { assert((c1 >= 0) && (c2 >= 0)) : "exhibit name chars can only be 7-bit so no danger of sign-extension"; }
1463                            if(c1 != c2)
1464                                {
1465                                final int cResult = c1 - c2;
1466    //if(EXTRA_ASSERTS) { assert(cResult == TextUtils.compare(this, o)); } // Very expensive but precise check.
1467                                return(cResult);
1468                                }
1469                            }
1470    
1471                        i += ll;
1472                        }
1473                    }
1474    
1475                final int lResult = l1 - l2;
1476    //if(EXTRA_ASSERTS) { assert(lResult == TextUtils.compare(this, o)); } // Very expensive but precise check.
1477                return(lResult);
1478                }
1479    
1480            /**Convert to String containing same char sequence; never null.
1481             * Automatically intern()s the full-exhibit-name result to avoid duplication.
1482             */
1483    //        @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1484            @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append((CharSequence8Bit) (this)).toString())); }
1485    
1486            /**Validate fields/state.
1487             * Called in the constructor and possibly after de-serialising.
1488             * <p>
1489             * Barf if something bad is found.
1490             * (Maybe allow some extra info in debug version.)
1491             */
1492            @Override
1493            public void validateObject()
1494                throws InvalidObjectException
1495                {
1496                // Check Name invariants.
1497                super.validateObject();
1498    
1499                // Verify that this is valid full Exhibit name content!
1500                if(!ExhibitName.validNameSyntaxForce(this))
1501                    { throw new InvalidObjectException("bad object: not a valid full exhibit name: "+this); }
1502                }
1503    
1504            /**Unique Serialisation class ID generated by http://random&#46;hd&#46;org/. */
1505            private static final long serialVersionUID = 8993934283490189577L;
1506            }
1507    
1508        /**Provide natural sort order for this and derived classes. */
1509        public final int compareTo(final Name o)
1510            {
1511            // Same instance compares equal...
1512            if(this == o) { return(0); }
1513            //return(TextUtils.compare(this, o));
1514            // Inlined and specialised for better performance...
1515            final int l1 = _length;
1516            final int l2 = o._length;
1517            final int l = Math.min(l1, l2);
1518            for(int i = 0; i < l; ++i)
1519                {
1520                final int c1 = (byteAt(i) & 0xff);
1521                final int c2 = (o.byteAt(i) & 0xff);
1522                if(c1 != c2) { return(c1 - c2); }
1523                }
1524            return(l1 - l2);
1525            }
1526    
1527        /**Compute the Integer prefix key for the supplied CharSequence; never null.
1528         * This assumes that the top 8 bits of each char are unimportant
1529         * since if the sequence is to be turned into a Name they must be zero.
1530         * <p>
1531         * We construct the prefix key from the first four characters of the putative Name
1532         * (or fewer if the name is shorter).
1533         */
1534        @SuppressWarnings("unused") private static Integer computeIntegerPrefixKey(final CharSequence cs)
1535            {
1536            if(EXTRA_ASSERTS) { assert(null != cs); }
1537            final int l = cs.length();
1538            if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1539            int result = cs.charAt(0);
1540            if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1541            result |= (((int) cs.charAt(1)) << 8);
1542            if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1543            result |= (((int) cs.charAt(2)) << 16);
1544            if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1545            result |= (((int) cs.charAt(3)) << 24);
1546            return(result);
1547            }
1548    
1549        /**Compute the Integer suffix key for the supplied CharSequence; never null.
1550         * This assumes that the top 8 bits of each char are unimportant
1551         * since if the sequence is to be turned into a Name they must be zero.
1552         * <p>
1553         * We construct the suffix key from the last four characters of the putative Name
1554         * (or fewer if the name is shorter).
1555         */
1556        @SuppressWarnings("unused") private static Integer computeIntegerSuffixKey(final CharSequence cs)
1557            {
1558            if(EXTRA_ASSERTS) { assert(null != cs); }
1559            final int l = cs.length();
1560            if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1561            int result = cs.charAt(l - 1);
1562            if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1563            result |= (((int) cs.charAt(l - 2)) << 8);
1564            if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1565            result |= (((int) cs.charAt(l - 3)) << 16);
1566            if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1567            result |= (((int) cs.charAt(l - 4)) << 24);
1568            return(result);
1569            }
1570    
1571        /**Contains lookup cache by prefix for all Name instances.
1572         * This is used to try to find a prefix for a newly-constructed Name
1573         * where no prefix has been supplied at all.
1574         * <p>
1575         * This looks up in a short list of recent/useful Name values with the same fixed-length prefix
1576         * for something of the same concrete type and the longest available prefix match,
1577         * returning the candidate 'prev' item (or null if none)
1578         * which then can be used in the usual way.
1579         * <p>
1580         * The Name instances are held via WeakReference wrappers
1581         * which should ensure that the cache does not keep them from being garbage collected.
1582         * <p>
1583         * This would usually be invoked by the factory method or constructor
1584         * where no 'prev' has been supplied.
1585         * <p>
1586         * This cache should grow as needed to get a decent hit rate,
1587         * and shrink to release memory when the hit rate is high or the memory subsystem is stressed.
1588         * <p>
1589         * Thread-safe but does not support concurrency (fully synchronized).
1590         * <p>
1591         * A lock can be held on instances of this object to make compound operations atomic.
1592         *
1593         * @param <TerminusKey>  type of key for prefix of Name instances
1594         *     must be immutable and support consistent hashCode() and equals()
1595         */
1596        private static final class AdHocPrefixCache<TerminusKey extends Number>
1597            {
1598            /**The lookup from prefix key to list of candidate same-prefix instances; never null.
1599             * On each list may be items all of exactly one prefix
1600             * or whose prefix hashes to the same value,
1601             * depending on how the prefix key value is constructed.
1602             * <p>
1603             * Empty lists (or those effectively empty because all the WeakReferences have been dropped)
1604             * will be purged (ie empty lists are not retained).
1605             * <p>
1606             * The lists values are of type LinkedList, with 'best' items first.
1607             * <p>
1608             * (In future, where a list is of length 1 and where the maximum length is currently 1,
1609             * a list may be stored as a singleton for efficiency.)
1610             * <p>
1611             * We hope for a reasonable minimum size to have a sensible chance of any match at all
1612             * across a large text base.
1613             * <p>
1614             * We size the maximum based on total heap capacity,
1615             * and allow everything to be discarded in case of extreme memory stress.
1616             */
1617            private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> prefixStore =
1618                LRUMapAutoSizeForHitRate.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1619                        0, computeCacheMaxSize(), "prefixStore");
1620    
1621            /**The lookup from suffix key to list of candidate same-suffix instances; never null.
1622             * Lighter-weight than the prefixStore, and smaller, since suffix matching is a secondary activity.
1623             */
1624            private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> suffixStore =
1625                SimpleProbabilisticCache.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1626                        computeCacheMaxSize() >> 2, // Smaller heap for suffixes.
1627                        "suffixStore");
1628    
1629            /**Compute maximum (prefix) cache size, partly based on heap size; strictly positive.
1630             * Aim to allow ~10000 entries per 1GB of heap, with a minimum of several hundred.
1631             */
1632            private static int computeCacheMaxSize()
1633                { return(Math.max(1<<9, (int) Math.min(1<<16, Runtime.getRuntime().totalMemory() >> 17))); }
1634    
1635    
1636            /**Returns a suitable prev (with a shared prefix/suffix) for the supplied non-null Name of any Name subclass, else null if none found.
1637             * May trim state, especially dead references.
1638             */
1639            synchronized Name findPrev(final CharSequence putativeName)
1640                {
1641                if(EXTRA_ASSERTS) { assert(null != putativeName); }
1642    
1643                final int len = putativeName.length();
1644    
1645                // Don't try for very short matches.
1646                if(len < MIN_TERMINUS_MATCH_LENGTH) { return(null); }
1647    
1648                // Find and return the best match, if any.
1649                // If we find any prefix match then we don't try for a suffix match.
1650                for(final boolean doPrefix : new boolean[]{true, false})
1651                    {
1652                    // Choose which store to operate on.
1653                    final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1654                        doPrefix ? prefixStore : suffixStore;
1655                    // Compute the correct key for the selected store.
1656                    final TerminusKey key = (TerminusKey)
1657                        (doPrefix ? computeIntegerPrefixKey(putativeName) : computeIntegerSuffixKey(putativeName));
1658    
1659                    // Get the existing list for this prefix if any...
1660                    final LinkedList<WeakReference<Name>> list = store.get(key);
1661                    // If no list then there is no available match.
1662                    if(null == list) { return(null); } // Nothing found.
1663    
1664                    Name result = null; // Nothing found yet.
1665                    int savedCharsBest = 0; // Best shared-characters length (prefix and suffix) found so far.
1666                    WeakReference<Name> eBest = null; // Container for the best item.
1667                    boolean first = true;
1668                    for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1669                        {
1670                        final WeakReference<Name> e = it.next();
1671                        final Name cachedName = e.get();
1672                        // If we find a dead entry then zap it and continue...
1673                        if(null == cachedName)
1674                            {
1675                            it.remove();
1676                            continue;
1677                            }
1678                        // Find common prefix/suffix length: skip if zero.
1679                        // Can be > len if sharable prefix and suffix overlap.
1680                        final int savedChars = Name.computeCommonPrefixLength(putativeName, cachedName) + Name.computeCommonSuffixLength(putativeName, cachedName);
1681                        if(savedChars == 0)
1682                            { continue; }
1683                        // If this is best so far than capture it.
1684                        if(savedChars > savedCharsBest)
1685                            {
1686    if(DEBUG /*|| !doPrefix*/) { System.out.println("***Name ad-hoc "+(doPrefix?"prefix":"suffix")+" match found "+((result==null) ? "" : "better ")+"match with shared chars "+savedChars); }
1687                            result = cachedName;
1688                            savedCharsBest = savedChars;
1689                            if(!first) { eBest = e; }
1690                            // If we already have a maximal match (no unshared characters) then stop looking.
1691                            if(savedChars >= len) { break; }
1692                            }
1693                        }
1694                    // Move best item to the front to mark it as 'best' (if not already there).
1695                    if(null != eBest)
1696                        {
1697                        list.remove(eBest);
1698                        list.addFirst(eBest);
1699                        }
1700    
1701                    // Trim the list iff now too long (eg because we are now memory-stressed).
1702                    trimListLength(list, doPrefix);
1703                    // If the list is now empty then remove it entirely.
1704                    if(list.isEmpty()) { store.remove(key); }
1705    
1706                    // If we found something then don't move on to the next store...
1707                    if(result != null) { return(result); }
1708                    }
1709    
1710                return(null); // Nothing found...
1711                }
1712    
1713            /**Offer a newly constructed Name instance to the cache; returns the value passed in.
1714             * Rejected if already at maximum chain length.
1715             * @return the input name unchanged
1716             */
1717            synchronized <N extends Name> N offerNewInstance(final N name)
1718                {
1719                if(null == name) { return(null); }
1720    
1721                // Don't clutter our tables with very short values.
1722                if(name.length() < MIN_TERMINUS_MATCH_LENGTH) { return(name); }
1723    
1724                // If too long to share termini with, espeically ad-hoc, then reject.
1725                if(name.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) { return(name); }
1726    
1727                for(final boolean doPrefix : new boolean[]{true, false})
1728                    {
1729                    // Choose which store to operate on.
1730                    final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1731                        doPrefix ? prefixStore : suffixStore;
1732                    // Compute the correct key for the selected store.
1733                    final TerminusKey key = (TerminusKey)
1734                        (doPrefix ? computeIntegerPrefixKey(name) : computeIntegerSuffixKey(name));
1735    
1736                    // Get the existing list for this prefix if any...
1737                    LinkedList<WeakReference<Name>> list = store.get(key);
1738                    // Create (and store) a new list if need be.
1739                    if(null == list)
1740                        {
1741                        list = new LinkedList<WeakReference<Name>>();
1742                        store.put(key, list);
1743                        }
1744                    // Trim out any dead items (ie weak references gone away).
1745                    trimDeadRefs(list);
1746                    // If something already equivalent to the supplied Name is on the list
1747                    // (ie same content and concrete class)
1748                    // then move it to the front (unless already at the front!)
1749                    // rather than creating a new WeakReference instance
1750                    // and/or filling up the list with duplicates for example.
1751                    boolean foundNameAlreadyOnList = false;
1752                    boolean first = true;
1753                    for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1754                        {
1755                        final WeakReference<Name> e = it.next();
1756                        final Name cachedName = e.get();
1757                        if(name.equals(cachedName))
1758                            {
1759                            foundNameAlreadyOnList = true;
1760                            if(!first)
1761                                {
1762    if(DEBUG) { System.out.println("Name.AHPC: moved up to first position: "+name); }
1763                                it.remove(); // Remove from this position...
1764                                list.addFirst(e); // Re-insert at start (without constructing a new WeakRef).
1765                                }
1766    else { if(DEBUG) { System.out.println("Name.AHPC: found already in first position: "+name); } }
1767                            break;
1768                            }
1769                        }
1770                    // Else stuff us on the front of the list as 'newest' and possibly best.
1771                    if(!foundNameAlreadyOnList)
1772                        {
1773                        list.addFirst(new WeakReference<Name>(name));
1774    if(DEBUG) { System.out.println("Name.AHPC: inserting new at first position: "+name); }
1775                        }
1776    
1777                    // Trim the list iff now too long.
1778                    trimListLength(list, doPrefix);
1779                    }
1780    
1781                return(name); // Return the input name.
1782                }
1783    
1784            /**Trim dead references from the supplied non-null list. */
1785            private void trimDeadRefs(final List<WeakReference<Name>> l)
1786                {
1787                for(final Iterator<WeakReference<Name>> it = l.iterator(); it.hasNext(); )
1788                    {
1789                    final WeakReference<Name> e = it.next();
1790                    // Test if ref is alive and zap entry if not.
1791                    // Note that get() doesn't keep ref artificially alive unlike for SoftReference...
1792                    if(null == e.get())
1793                        { it.remove(); }
1794                    }
1795                }
1796    
1797            /**Trim the supplied non-null list to size if too long. */
1798            private void trimListLength(final LinkedList<WeakReference<Name>> l, final boolean isPrefix)
1799                {
1800                // Cannot be overlong unless longer than smallest permitted max length...
1801                if(l.size() > STRESSED_LIST_LEN)
1802                    {
1803                    final int targetLen = getMaxListLen(isPrefix);
1804                    while(l.size() > targetLen)
1805                        { l.removeLast(); }
1806                    }
1807                }
1808    
1809            /**Maximum list length under normal circumstances; strictly positive.
1810             * Borrowing from GZIP '-6'/default experience a good value might be ~6.
1811             */
1812            private static final int MAX_LIST_LEN = 6;
1813    
1814            /**Maximum secondary list length under normal circumstances; strictly positive.
1815             * Since suffix matching is secondary goal this is shorter than the general limit
1816             * but should still probably be greater than 1.
1817             */
1818            private static final int MAX_SECONDARY_LIST_LEN = ((MAX_LIST_LEN >> 1) | 1);
1819    
1820            /**Maximum list length under memory stress; strictly positive, less than MAX_LIST_LEN.
1821             * This will do a less good job of finding matches (ie will probably do less compaction)
1822             * but will consume less memory in the ad-hoc cache.
1823             * <p>
1824             * Under stress, any entries we touch are capped to this length,
1825             * thus allowing us to gracefully and incrementally free space.
1826             * <p>
1827             * Borrowing from GZIP '-1/'-fast' experience a good value might be 1.
1828             */
1829            private static final int STRESSED_LIST_LEN = 1;
1830    
1831            /**Maximum same-prefix list length; strictly positive.
1832             * This will drop to a smaller measure if memory is under stress.
1833             */
1834            /* private */ int getMaxListLen(final boolean isPrefix)
1835                { return(MemoryTools.isMemoryStressed() ? STRESSED_LIST_LEN : (isPrefix ? MAX_LIST_LEN : MAX_SECONDARY_LIST_LEN)); }
1836            }
1837    
1838        /**Get instance of AdHocPrefixCache; never null. */
1839        private synchronized static final AdHocPrefixCache<Integer> getAdHocPrefixCache()
1840            {
1841            // If cache has not been discarded then return it.
1842            final SoftReference<AdHocPrefixCache<Integer>> cache = _adHocPrefixCache;
1843            AdHocPrefixCache<Integer> result = null;
1844            if((null != cache) && (null != (result = cache.get())))
1845                { return(result); }
1846            // Construct and cache and return a new instance.
1847            result = new AdHocPrefixCache<Integer>();
1848            _adHocPrefixCache = new SoftReference<AdHocPrefixCache<Integer>>(result);
1849            return(result);
1850            }
1851    
1852        /**Memory-sensitive cache of Name instances indexed by fixed-length prefix and suffix; initially null, not thereafter.
1853         * Borrows a few techniques from GZIP-like algorithms.
1854         * <p>
1855         * This uses Integer as a key for up to 4 bytes of prefix/suffix lookup.
1856         * <p>
1857         * Only accessed under the class lock, by getAdHocPrefixCache().
1858         * <p>
1859         * Held via a StaticReference to allow the entire cache to be discarded automatically
1860         * under extreme memory stress.
1861         * <p>
1862         * Accessed under the class lock.
1863         */
1864        private static SoftReference<AdHocPrefixCache<Integer>> _adHocPrefixCache;
1865    
1866        /**Case-insensitive Comparator for Name equivalent to TextUtils.CASE_INSENSITIVE_ORDER; not null.
1867         * May be able to operate more efficiently than generic TextUtils.CASE_INSENSITIVE_ORDER.
1868         */
1869        public static final Comparator<Name> CASE_INSENSITIVE_ORDER = new Comparator<Name>(){
1870            public int compare(final Name n1, final Name n2)
1871                { return(TextUtils.CASE_INSENSITIVE_ORDER.compare(n1, n2)); }
1872            };
1873    
1874        /**Set true for extra assert()s for internal invariants, usually only in non-release code. */
1875        private static final boolean EXTRA_ASSERTS = IsDebug.isDebug;
1876    
1877        /**Set true for extra debug/development logging output. */
1878        private static final boolean DEBUG = false;
1879        }