001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore;
030    
031    import java.io.IOException;
032    import java.io.InvalidObjectException;
033    import java.io.ObjectInputStream;
034    import java.io.ObjectInputValidation;
035    import java.io.ObjectOutputStream;
036    import java.io.ObjectStreamException;
037    import java.io.Serializable;
038    import java.lang.ref.SoftReference;
039    import java.lang.ref.WeakReference;
040    import java.security.MessageDigest;
041    import java.security.NoSuchAlgorithmException;
042    import java.util.Comparator;
043    import java.util.Iterator;
044    import java.util.LinkedList;
045    import java.util.List;
046    import java.util.Set;
047    
048    import org.hd.d.pg2k.svrCore.TextUtils.CharSequence8Bit;
049    
050    import ORG.hd.d.IsDebug;
051    
052    // TODO: Allow sharing of the underlying byte[] between any two non-prev equal-content Name types.
053    
054    /**Compact immutable in-memory representation of a (usually-short, sharable-prefix/suffix) 8-bit ASCII String.
055     * This can share a common prefix and suffix with another instance,
056     * avoiding having to store the shared elements itself.
057     * <p>
058     * This is particularly suitable for densely-packed sorted lists of values,
059     * such as sets of exhibit names which with tend to have by necessity long shared prefixes.
060     * <p>
061     * This is suitable for names but also other usually-short 8-bit text values,
062     * in particular those that don't have the sort of internal structure and repetition
063     * that Compact7BitString can take advantage of.
064     * <p>
065     * This supports random access reasonably efficiently
066     * (eg through the CharSequence interface)
067     * but some elements will be slower to access than others.
068     * (chars at either end will tend to be the slowest to access.)
069     * <p>
070     * Converting to and from this form should be relatively resource-light,
071     * eg with low CPU and memory requirements,
072     * though all instances are automatically intern()ed which has some cost.
073     * <p>
074     * Where there is a conflict between memory consumption and CPU,
075     * this implementation is biased to minimise memory use at the possible expense of CPU.
076     * <p>
077     * Internally this uses a byte[] to store the data
078     * (a basic halving of in-memory size in most JVMs),
079     * and use of a common prefix with a similar
080     * (nominally lexically previous close value)
081     * should help further trim memory use.
082     * <p>
083     * (This implementation internally attempts to minimise the number of objects kept live
084     * and that need to be traversed to complete construction.)
085     * <p>
086     * These classes are not publicly constructible in order to help control instance count,
087     * and indeed may use MemoryTools.intern() to eliminate duplicates.
088     * <p>
089     * This uses an internal cache to attempt to find ad-hoc prefix matches with existing instances
090     * if no 'prev' value is offered during creation of a new instance.
091     * This cache is memory-sensitive and will be silently sacrificed
092     * if the system is short of memory.
093     * <p>
094     * Note that MemoryTools.intern() may not release a 'duplicate'
095     * if there remain other instances hanging from it.
096     * <p>
097     * This is not final though all its constructors are private
098     * to allow only inner-class derived types in order to preserve immutability.
099     * <p>
100     * As this is performance-critical code, and may have to run on slow/non-JIT JVMs,
101     * most assert()s that check purely-internally-imposed invariants
102     * are entirely eliminated from release code;
103     * those that might still be triggered by runtime issues external to this class,
104     * such as run-time presence of hashing algorithms,
105     * remain present to enabled at runtime if required.
106     */
107    public class Name implements CharSequence8Bit, Comparable<Name>,
108                                 Serializable, ObjectInputValidation,
109                                 MemoryTools.Internable // Not currently supporting InternableRank
110        {
111        /**Create a Name instance; never null unless input is null.
112         * It is often best to attempt to share a prefix with the lexically previous (or next) item.
113         * <p>
114         * If passed a null this returns a null.
115         * <p>
116         * If passed a Name (but not including sub-classes) this returns it untouched.
117         * @param text  the 8-bit text to store; if null then null is returned
118         */
119        public static Name create(final CharSequence text)
120            { return(create(text, null)); }
121    
122        /**Create a Name instance with a previous value to attempt to share a prefix with; never null unless input is null.
123         * It is often best to attempt to share a prefix with the lexically previous (or next) item.
124         * <p>
125         * If passed a null this returns a null.
126         * <p>
127         * If passed a Name (but not including sub-classes) this returns it untouched.
128         *
129         * @param text  the 8-bit text to store; if null then null is returned
130         * @param prev  previous value with which to attempt to share a prefix; null if none
131         */
132        public static Name create(final CharSequence text,
133                                  final Name prev)
134            {
135            if(null == text) { return(null); }
136    
137            if(Name.class.equals(text.getClass()))
138                {
139                // Optimisation: if this is already a Name (and not a sub-class) then return it as-is
140                // if no prev has been offered (that might share a longer prefix).
141                if(prev == null) { return((Name) text); }
142                // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
143                else if(prev.contentEquals(text)) { return(prev); }
144                }
145    
146            final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
147            // If no prev was specified we look for one in the ad-hoc cache.
148            final Name prevToUse;
149            if(prev != null) { prevToUse = prev; }
150            else
151                {
152                // If we find an exact match with the right class in the ad-hoc cache then return it.
153                prevToUse = adHocPrefixCache.findPrev(text);
154                if((prevToUse != null) && (Name.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(text))
155                    { return(prevToUse); }
156                }
157            // Return intern()ed value to eliminate accidental duplicates.
158            // Pass it through the ad-hoc prefix cache too...
159            final Name result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new Name(text, prevToUse)));
160            if(EXTRA_ASSERTS) { assert(Name.class.equals(result.getClass())); }
161            return(result);
162            }
163    
164        /**Attempts to create a Name instance, but falls back to returning an intern()ed String instance otherwise.
165         * This copes will possible non-8-bit values by returning a String rather than a Name.
166         * <p>
167         * Essentially this returns the most memory-efficient representation reasonably possible.
168         *
169         * @return  null if text is null, else Name if 8-bit text, else intern()ed String instance
170         */
171        public static CharSequence createOrStringFallback(final CharSequence text, final Name prev)
172            {
173            try { return(create(text, prev)); }
174            catch(final IllegalArgumentException e) { return(MemoryTools.intern(text.toString())); }
175            }
176    
177        /**Shared empty instance; not null. */
178        public static final Name EMPTY = create("");
179    
180        /**Maximum 'prev' chain length that we will support; non-negative.
181         * Limits the amount of recursion and CPU involved in processing 'nested'/'chained' prefixes,
182         * at the possible expense of some lost compression (ie memory savings).
183         * <p>
184         * This limit also helps avoid blocking GC through overly tangled and straggly data structures.
185         * <p>
186         * Usually positive and in the range (say) 1 to 32 to allow compression,
187         * though higher values also hurt things such as sorting
188         * which forces lots of chain-chasing while comparing prefixes.
189         * <p>
190         * As at 2009/08/02 the maximum actual value when creating fresh ExhibitFull values is 18,
191         * ie 18 is sufficient to yield maximum compression/sharing.
192         * <p>
193         * We may need to raise or lower this limit even though serialised data already exists,
194         * so we gently coerce deserialised Name instances to cap this value where necessary.
195         */
196        private static final int MAX_PREV_CHAIN_LENGTH = 18;
197    
198        /**If true, always try to search back through prev values for an as-good or better match.
199         * This in principle allows less redundant intermediate values to be retained in memory
200         * and postpones any forced truncation of the chain.
201         */
202        private static final boolean ALWAYS_LOOK_FOR_EARLIER_PREV = true;
203    
204        /**Number of bits for prefix length (unsigned) value.
205         * Should usually be greater than the number of suffix bits,
206         * since the primary compaction mechanism is prefix sharing.
207         * <p>
208         * 9 is needed for maximal compression on 20090707 test set,
209         * but compression at 8 is very close.
210         */
211        private static final int PREFIX_BITS = 9;
212    
213        /**Mask for prefix bits (at bottom of word).
214         * Broken out to help pre-JIT execution performance.
215         */
216        private static final int PREFIX_MASK = ((1<<PREFIX_BITS)-1);
217    
218        /**Number of bits for suffix length (unsigned) value.
219         * This uses whatever bits are left after prefix bits have been taken
220         * eg from 16 bits in a shared short.
221         */
222        private static final int SUFFIX_BITS = 16 - PREFIX_BITS;
223    
224        /**Mask for suffix bits (at top of word).
225         * Broken out to help pre-JIT execution performance.
226         */
227        private static final int SUFFIX_MASK = ((1<<SUFFIX_BITS)-1);
228    
229        /**Max shared prefix length in characters; strictly positive.
230         * An actual match greater than this will be capped to this.
231         */
232        private static final int MAX_PREFIX_LENGTH = PREFIX_MASK;
233    
234        /**Max shared prefix length in characters; strictly positive.
235         * An actual match greater than this will be capped to this.
236         */
237        private static final int MAX_SUFFIX_LENGTH = SUFFIX_MASK;
238    
239        /**Minimum terminus match length; strictly positive.
240         * This helps eliminate completely spurious linkages that may hurt GC
241         * and trail rubbish in the serialised form of such spuriously-linked values.
242         * <p>
243         * We won't share very short prefixes/suffixes.
244         * <p>
245         * We avoid matches of "" (that should be dealt with by intern()) when this is &gt;0.
246         * We should probably avoid picking up single letters such as trailing full-stops when this is &gt;1.
247         * We should probably allow everything of at least length 3 or 4
248         * (such as ".jpg" or ".au" for example)
249         * which is probably roughly the difference in bytes in a serialised stream
250         * of adaptively encoding a null vs a non-null prev pointer.
251         */
252        private static final int MIN_TERMINUS_MATCH_LENGTH = 3;
253    
254        /**Minimum prefix/suffix total share length to use a prev; strictly positive.
255         * If 1 then we always try to use a 'prev' when there is even one character matching,
256         * but this may entrain lots of 'dead wood' that might otherwise be garbage.
257         * <p>
258         * A higher value may ultimately result in lower memory consumption
259         * (and no loss of compactness with or without compression of the serialised form)
260         * though with some loss in the headline immediate 'characters saved' figure.
261         * <p>
262         * This does not need to be enforced on (old) deserialised data,
263         * since for example intern() may often resolve such issues silently,
264         * and in any case it is not a correctness issue.
265         * <p>
266         * For consistency we line this up with the minimum prefix/suffix match length.
267         */
268        private static final int MIN_SHARE_CHARS = MIN_TERMINUS_MATCH_LENGTH;
269    
270        /**Create a new instance with a previous value to attempt to share a prefix with.
271         * Note that the 'prev' value may be any sub-type of Name.
272         * <p>
273         * Private in order to better manage instance control.
274         *
275         * @param text  the 8-bit text to store and must not equal the prev; never null but can be empty
276         * @param putativePrev  non-empty previous value with which to attempt to share a prefix; null if none
277         */
278        private Name(final CharSequence text, final Name putativePrev)
279            {
280            if(EXTRA_ASSERTS) { assert(null != text) : "factory method should already have screened out null text value"; }
281            // Verify all text is 7-bit.
282            final int length = text.length();
283            for(int i = length; --i >= 0; )
284                { if(text.charAt(i) > 255) { throw new IllegalArgumentException(); } }
285    
286            // Condition use of the prev value.
287            Name p = putativePrev;
288            // If the prev chain is already maximum length
289            // then try going back at least one step
290            // which may still be of some help with compression.
291            // We may in any case always look back for the oldest good-enough prev
292            // to help trim memory use and preserve elbow room.
293            //
294            // This treats suffix matching as a secondary opportunistic task
295            // and we'll optimise any prefix by preference
296            // unless the initial prefix is shorter than the initial suffix.
297            if(null != p)
298                {
299                final int initialPrefixMatchLength = computeCommonPrefixLength(text, p);
300                final int initialSuffixMatchLength = computeCommonSuffixLength(text, p);
301    
302                // Abandon it now now if the offered prev has no shared termini with us!
303                if((initialPrefixMatchLength == 0) && (initialSuffixMatchLength == 0))
304                    { p = null; }
305                else
306                    {
307                    if(ALWAYS_LOOK_FOR_EARLIER_PREV)
308                        {
309                        // Prefer prefix matching if better or same as suffix length.
310                        if(initialPrefixMatchLength >= initialSuffixMatchLength)
311                            {
312                            // Attempt to quickly find at-least-as-good recent prev
313                            // when there is a prefix match available.
314                            while((p != null) && (p.prev != null) &&
315                                    (computeCommonPrefixLength(text, p.prev) >= initialPrefixMatchLength))
316                                { p = p.prev; }
317                            }
318                        else
319                            {
320                            // If no prefix match was available
321                            // then try to find the shortest chain while retaining the current prefix.
322                            while((p != null) && (p.prev != null) &&
323                                    (computeCommonSuffixLength(text, p.prev) >= initialSuffixMatchLength))
324                                { p = p.prev; }
325                            }
326                        }
327    
328                    final int ppCL = p.getPrevChainLength();
329                    if(EXTRA_ASSERTS) { assert(ppCL >= 0); }
330                    if(EXTRA_ASSERTS) { assert(ppCL <= MAX_PREV_CHAIN_LENGTH); }
331                    if(ppCL >= MAX_PREV_CHAIN_LENGTH)
332                        {
333    if(DEBUG) { System.out.println("SharedPrefix7BitString: limiting chain length"); }
334                        // Go back at least one step; further if it doesn't hurt prefix match length much.
335                        // This should give headroom to build a new useful near-maximal-length prefix.
336                        final int targetMatchLength = Math.max(initialPrefixMatchLength / 2, MIN_SHARE_CHARS);
337                        do { p = p.prev; }
338                        while((p != null) && (p.prev != null) &&
339                                ((p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) ||
340                                        (computeCommonPrefixLength(text, p.prev) >= targetMatchLength)));
341                        }
342                    // Ensure that any prev that we now have has a short enough chain length to tag on to.
343                    if(EXTRA_ASSERTS) { assert((p == null) || (p.getPrevChainLength() < MAX_PREV_CHAIN_LENGTH)); }
344                    }
345                }
346            // Find how much of prev is actually in common with the new text.
347            int sharedPrefixLen = (null == p) ? 0 : computeCommonPrefixLength(text, p);
348            // Only store/use any putative prev if there is in fact some common prefix/suffix string.
349            if(null != p)
350                { p = ((sharedPrefixLen > 0) || (computeCommonSuffixLength(text, p) > 0)) ? p : null; }
351            if(EXTRA_ASSERTS) { assert((sharedPrefixLen == 0) || (p != null)); } // No shared prefix possible without a non-null prev.
352            if(EXTRA_ASSERTS) { assert(getPrevChainLength() <= MAX_PREV_CHAIN_LENGTH); }
353    
354            // Now extract any matching suffix from the tail residue after extracting a maximal prefix.
355            int tailLen = length - sharedPrefixLen;
356            int residualSharedSuffixLen = (null == p) ? 0 : computeCommonSuffixLength(text.subSequence(sharedPrefixLen, length), p.subSequence(sharedPrefixLen, p.length()));
357            final int putativeSharedLength = sharedPrefixLen + residualSharedSuffixLen;
358    
359            // If not enough shared length to justify a link to prev
360            // then avoid any such sharing...
361            if(putativeSharedLength < MIN_SHARE_CHARS)
362                {
363                p = null; // Reset to no sharing.
364                sharedPrefixLen = 0; // Reset to no sharing.
365                residualSharedSuffixLen = 0; // Reset to no sharing.
366                tailLen = length; // Reset to no sharing.
367                }
368    
369            // No record the sharing that we are going to use.
370            prev = p;
371            // Now pack the termini lengths in together.
372            terminiLengths = (short) (sharedPrefixLen | (residualSharedSuffixLen << PREFIX_BITS));
373            // Set up the fast-access shadow prefix-length value.
374            _prefixLen = (short) sharedPrefixLen;
375    
376            if(EXTRA_ASSERTS) { assert(getPrefixLen() == sharedPrefixLen) /* : "getPrefixLen()="+getPrefixLen()+", sharedPrefixLen="+sharedPrefixLen */ ; }
377            if(EXTRA_ASSERTS) { assert(getSuffixLen() == residualSharedSuffixLen); }
378            // We can't claim more shared chars than exist in the 'prev' value!
379            if(EXTRA_ASSERTS) { assert(sharedPrefixLen + residualSharedSuffixLen <= ((p == null) ? 0 : p.length())) : ("text="+text+", prev="+p); }
380    
381            // Compute size of middle (unshared) portion.
382            final int midLen = tailLen - residualSharedSuffixLen;
383    
384            if(EXTRA_ASSERTS) { assert(midLen >= 0); }
385            if(EXTRA_ASSERTS) { assert(sharedPrefixLen + midLen + residualSharedSuffixLen == text.length()); }
386    
387            if(midLen == 0)
388                {
389                // Avoid any allocation for the tail if no middle bytes.
390                mid = EMPTIES.EMPTY_MID;
391                }
392            // Usual case: copy the non-shared middle bytes.
393            else
394                {
395                final byte t[] = new byte[midLen];
396                for(int i = midLen; --i >= 0; )
397                    { t[i] = (byte) text.charAt(i + sharedPrefixLen); }
398                mid = t;
399                }
400    
401    if(DEBUG) { System.out.println("prefixLen|midLen|suffixLen = " + sharedPrefixLen+"|"+midLen+"|"+residualSharedSuffixLen + " from "+text.subSequence(0, sharedPrefixLen)+"|"+text.subSequence(sharedPrefixLen, sharedPrefixLen+midLen)+"|"+text.subSequence(sharedPrefixLen+midLen, text.length())+", prevChainLength = " + getPrevChainLength()); }
402    //if(DEBUG && (sharedPrefixLen == 0) && (residualSharedSuffixLen != 0)) { System.out.println("***Shared-suffix-only (suffix len "+residualSharedSuffixLen+") Name instance created: "+text); }
403    
404    if(EXTRA_ASSERTS) { assert((prev == null) || (sharedPrefixLen + residualSharedSuffixLen >= MIN_SHARE_CHARS)); }
405    
406            // Verify what we've been given.
407            try { validateObject(); }
408            catch(final InvalidObjectException e)
409                { throw new IllegalArgumentException(e); }
410            }
411    
412        /**Compute the shared/common prefix length. */
413        private static int computeCommonPrefixLength(final CharSequence text, final Name p)
414            {
415            int sharedPrefixLen = 0;
416            final int l = Math.min(Math.min(text.length(), p.length()), MAX_PREFIX_LENGTH);
417            for(int i = 0; i < l; ++i, ++sharedPrefixLen)
418                { if(p.charAt(i) != text.charAt(i)) { break; } }
419            if(EXTRA_ASSERTS) { assert(sharedPrefixLen >= 0); }
420            return(sharedPrefixLen);
421            }
422    
423        /**Compute the shared/common suffix length.
424         * Note that we may have to match against some tail of the prev text
425         * (to avoid overlapping prefix/suffix parts)
426         * and thus the p parameter is the more general CharSequence.
427         */
428        private static int computeCommonSuffixLength(final CharSequence text, final CharSequence p)
429            {
430            int sharedSuffixLen = 0;
431            final int tLen = text.length();
432            final int pLen = p.length();
433            final int l = Math.min(Math.min(tLen, pLen), MAX_SUFFIX_LENGTH);
434            for(int i = 0; i < l; ++i, ++sharedSuffixLen)
435                { if(p.charAt(pLen - i - 1) != text.charAt(tLen - i - 1)) { break; } }
436            if(EXTRA_ASSERTS) { assert(sharedSuffixLen >= 0); }
437            if(EXTRA_ASSERTS) { assert((sharedSuffixLen == l) || (p.charAt(pLen - sharedSuffixLen - 1) != text.charAt(tLen - sharedSuffixLen - 1))) : "returned match must be maximal length"; }
438            return(sharedSuffixLen);
439            }
440    
441        /**Cache of the computed hash code.
442         * Not part of the permanent/serialised state of the object since easy to recompute,
443         * but this is called so often by intern(), etc, that it is worth storing
444         * given that typical texts will be long.
445         */
446        private transient int hash; // Default to 0
447    
448        /**The hash is computed over the entire text as for String.
449         * This does not depend on how the data is internally stored, eg using a prefix or not.
450         */
451        @Override
452        public final int hashCode()
453            {
454            int h = hash;
455            if(h == 0) // Probably not yet computed, so do so now.
456                {
457                // Hash needs to be (re)computed.
458                final int len = length();
459                for(int i = 0; i < len; i++)
460                    { h = 31*h + charAt(i); }
461                hash = h; // Cache the result.
462                }
463            return(h);
464            }
465    
466        /**Equality depends on the items being the same concrete (derived) type and lexically identical.
467         * This does not depend on how the data is internally stored, eg using a prefix or not.
468         * Specifically designed to work for derived classes too by default.
469         */
470        @Override
471        public final boolean equals(final Object obj)
472            {
473            if(this == obj) { return(true); }
474            if(null == obj) { return(false); }
475            if(!getClass().equals(obj.getClass())) { return(false); }
476            // Can't use 'fast' copies in order to avoid potential infinite recursion...
477            return(TextUtils.contentEquals(this, (Name) obj));
478            }
479    
480        /**True iff this represents the same sequence of char values as the specified sequence.
481         * @param  other  the sequence to compare this {@code String} against
482         *
483         * @return  {@code true} if this represents the same
484         *          sequence of char values as the specified sequence,
485         *          {@code false} otherwise
486         */
487        public final boolean contentEquals(final CharSequence o)
488            {
489            // Very quick length-equality check before any possible conversion, etc...
490            if(length() != o.length()) { return(false); }
491            // Use the 'fast' version if possible since much better for sorting/searching...
492            return(TextUtils.contentEquals((this),
493                    (o instanceof Name) ? (((Name) o)) : o));
494            }
495    
496    //    /**The InternableRanking is lower/better the more characters we are already sharing (ie not having to hold locally).
497    //     * We then aim to minimise the prev chain length.
498    //     * <p>
499    //     * The aim is to be left with the best (most compact) version intern()ed.
500    //     */
501    //    public final int internableRank(final InternableRank obj)
502    //        {
503    //        // Must only be called on items already equals().
504    ////        assert(equals(obj));
505    //
506    //        final Name other = (Name) obj;
507    //
508    //        // See how much prefix/suffix we are sharing.
509    //        final int shared = _prefixLen + getSuffixLen();
510    //        final int sharedOther = other._prefixLen + other.getSuffixLen();
511    //        // More sharing is better so lower rank value.
512    //        if(shared > sharedOther) { return(-1); }
513    //        if(shared < sharedOther) { return(+1); }
514    //
515    //        // Break ties on the length of the prev chain: lower/shorter is better.
516    //        return(getPrevChainLength() - other.getPrevChainLength());
517    //        }
518    
519    
520        /**Non-empty item with which we share a prefix and/or suffix, or null if entire value is empty (equivalent to "") or if there is no shared part(s).
521         * May be non-empty if this new value is a prefix/suffix of the prev value.
522         * <p>
523         * Is protected since potentially to derived instances.
524         */
525        protected final Name prev;
526    
527        /**Termini lengths (unsigned) packed into a single value for efficiency.
528         * The prefix bits are the low-order bits.
529         * <p>
530         * This is part of the compact serialised state.
531         */
532        private final short terminiLengths;
533    
534        /**This is a shadow of the most-frequently accessed prefix length value; non-negative.
535         * This contains a copy of the prefix length.
536         * <p>
537         * Should not expand the memory footprint of an instance
538         * because it should slot in the gap left by the terminiLengths short value.
539         * <p>
540         * This is not part of the serialised state.
541         */
542        private transient short _prefixLen;
543    
544        /**Middle portion of value, or a constant empty value iff there are no middle bytes; never null.
545         * We don't use default serialisation for this:
546         * it is written on the wire as raw bytes (if non-empty, else not at all)
547         * following a one-byte (positive) length if <= 127 chars,
548         * else a 4-byte (negated) length to be able to represent maximal-size CharSequences.
549         */
550        private transient /* final */ byte[] mid;
551    
552        /**Used to hold values that may be needed before Name's own static initialisation can have completed.
553         * This can happen with circular initialisation dependencies.
554         */
555        private static final class EMPTIES
556            {
557            /**Constant/immutable/shareable empty mid value for any empty mid-section; never null. */
558            static final byte[] EMPTY_MID = new byte[0];
559    
560            /**Constant/immutable/shareable empty cached termini; never null. */
561            static final SoftReference<byte[]> EMPTY_TERMINI = new SoftReference<byte[]>(null);
562            }
563    
564        /**Cached termini, catenated to minimise memory overhead; never null.
565         * Held via a SoftReference to allow automatic release if memory is stressed.
566         * <p>
567         * Marked transient since redundant and not part of the persisted state.
568         * <p>
569         * Marked volatile for lock-free thread-safe access.
570         * <p>
571         * When empty we can set it to the constant/empty shared value
572         * which takes up no extra space but saves a null test for this value.
573         */
574        private transient volatile SoftReference<byte[]> _cached_termini = EMPTIES.EMPTY_TERMINI;
575    
576        /**Minimum length of name that we'll attempt to cache termini for; non-negative.
577         * Avoids silly memory overheads for very small instances.
578         */
579        private static final int MIN_TERMINI_CACHE_CHARS = 32;
580    
581    //    /**If true then we can cache the termini of this instance if we wish.
582    //     * Generally this will if the shared termini are long enough
583    //     * to amortise the memory overheads of a SoftReference and another array
584    //     * (which implies that we're not the root of a chain for example)
585    //     * and if we're not close to a root or another cached value
586    //     * that would have a low-enough recursion cost to reach.
587    //     * <p>
588    //     * This reduces the maximum memory burden even when everything possible is cached,
589    //     * but without hurting speed too much.
590    //     * <p>
591    //     * Should be quick to compute as potentially accessed on each charAt().
592    //     */
593    //    private boolean canCacheTermini()
594    //        {
595    //        // Can't/shouldn't cache if at a root.
596    //        if(prev == null) { return(false); }
597    //
598    //        // Can't cache if shared chars are too few to be worth the memory overheads.
599    //        final int sharedChars = _prefixLen + getSuffixLen();
600    //        if(sharedChars < MIN_TERMINI_CACHE_CHARS) { return(false); }
601    //
602    //        // Looks OK to cache...
603    //        return(true);
604    //        }
605    
606        /**Make cached termini and return the cached value; never null.
607         * Computes the cached form of the termini and stores it wrapped in a SoftReference,
608         * discarding anything else already present.
609         * <p>
610         * This has the form of all the chars of the shared prefix
611         * immediately followed by all the chars of the shared suffix,
612         * stored as byte values with no padding.
613         */
614        private byte[] cacheTermini()
615            {
616            final int pL = _prefixLen;
617            final int sL = getSuffixLen();
618            final byte t[] = new byte[pL + sL];
619            if(EXTRA_ASSERTS) { assert(t.length > 0); } // Shouldn't be doing this without some shared state to be cached...
620            for(int i = pL; --i >= 0; )
621                { t[i] = (byte) prev.charAtInternal(i); }
622            final int l = prev.length();
623            for(int i = sL; --i >= 0; )
624                { t[i+pL] = (byte) prev.charAtInternal(l-sL + i); }
625            _cached_termini = new SoftReference<byte[]>(t);
626            return(t);
627            }
628    
629        /**Chain depth, ie how deep our chain of prev values is, zero if this instance has no prev; non-negative.
630         * Can be used to help avoid getting silly tangles of non-GCable prev chains,
631         * and indeed might help avoid situations that might attempt to create circular references.
632         * <p>
633         * Not for normal use of this object!
634         */
635        public final int getPrevChainLength()
636            {
637    //        if(null == prev) { return(0); }
638    //        return(1 + prev.getPrevChainLength());
639            // Convert to iteration...
640            int len = 0;
641            for(Name p = this; null != (p = p.prev); ++len) { }
642            return(len);
643            }
644    
645        /**Length of common prefix with prev item if any; non-negative.
646         * Only of minimal interest outside the class and may be withdrawn from the API.
647         */
648        public final int getPrefixLen()
649            {
650            return(_prefixLen); // return(terminiLengths & PREFIX_MASK); // Bottom bits.
651            }
652    
653        /**Length of common suffix with prev item if any; non-negative.
654         * No one really needs to see this outside the class...
655         * <p>
656         * Being private may encourage a static optimiser such as ProGuard to inline it.
657         */
658        private final int getSuffixLen()
659            {
660            return((terminiLengths >>> PREFIX_BITS) & SUFFIX_MASK); // Uses all remaining most-significant bits.
661            }
662    
663        /**Convert to String containing same char sequence.
664         * We do not expensively intern() the result as it may be short-lived..
665         */
666        @Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
667    
668        /**Writes the name as ASCII bytes to the given (non-null) byte array starting at the given offset.
669         * The array must be long enough (from the given offset) else an exception will be thrown.
670         * <p>
671         * Aims to be efficient (in CPU and memory footprint).
672         */
673        public final void writeToByteArray(final byte[] buf, final int offset)
674            {
675            if(null == buf) { throw new IllegalArgumentException(); }
676            if(offset < 0) { throw new IllegalArgumentException(); }
677            final int length = length();
678            if(offset + length > buf.length) { throw new IllegalArgumentException(); }
679    
680            // Copy in the shared prefix, if any.
681            final short pLen = _prefixLen;
682            if(pLen != 0)
683                {
684                // Use the cache if already present (but don't force creation of one).
685                final SoftReference<byte[]> cacheSR = _cached_termini;
686                final byte[] cache = cacheSR.get();
687                if(null != cache)
688                    { System.arraycopy(cache, 0, buf, offset, pLen); }
689                else // No cache: fetch directly from prev Name.
690                    {
691                    for(int i = pLen; --i >= 0; )
692                        { buf[i + offset] = prev.charAtInternal(i); }
693                    }
694                }
695            // Copy in the unshared mid, if any.
696            final int mLen = mid.length;
697            if(mLen != 0)
698                { System.arraycopy(mid, 0, buf, pLen + offset, mLen); }
699            // Copy in the shared suffix, if any.
700            final int sStart = pLen + mLen;
701            if(sStart < length)
702                {
703                // Use the cache if already present (but don't force creation of one).
704                final SoftReference<byte[]> cacheSR = _cached_termini;
705                final byte[] cache = cacheSR.get();
706                if(null != cache)
707                    { System.arraycopy(cache, pLen, buf, sStart + offset, length-sStart); }
708                else // No cache: fetch directly from prev Name.
709                    {
710                    for(int i = length; --i >= sStart; )
711                        {
712                        final int reverseIndex = length - i;
713                        buf[i + offset] = prev.charAtInternal(prev.length() - reverseIndex);
714                        }
715                    }
716                }
717            }
718    
719        /**Create private byte[] copy for caller; never null.
720         * Aims to be efficient (in CPU and memory footprint).
721         */
722        public final byte[] toByteArray()
723            {
724            final int length = length();
725            final byte[] result = new byte[length];
726            writeToByteArray(result, 0);
727            return(result);
728            }
729    
730        /**Internal version of charAt() with bounds checking assumed done, etc.
731         * This has no further cacheing...
732         */
733        private byte charAtInternal(final int index)
734            {
735            // Extract from the shared prefix.
736            final int pLen = _prefixLen;
737            if(index < pLen)
738                {
739                // Recurse to fetch from further up the chain.
740                return(prev.charAtInternal(index));
741                }
742    
743            // Extract from the shared suffix.
744            final int mLen = mid.length;
745            final int sStart = pLen + mLen;
746            if(index >= sStart)
747                {
748                // Recurse to fetch from further up the chain.
749                final int length = sStart + getSuffixLen(); // length();
750                if(EXTRA_ASSERTS) { assert(index < length); }
751                final int reverseIndex = length - index;
752                if(EXTRA_ASSERTS) { assert(reverseIndex > 0); }
753                return(prev.charAtInternal(prev.length() - reverseIndex));
754                }
755    
756            // Extract from the (unshared) mid text, which is nice and fast...
757            return(mid[index - pLen]);
758            }
759    
760        //@Override
761        public final char charAt(final int index)
762            {
763            // Lower bounds check on public interface.
764            if(index < 0) { throw new IllegalArgumentException(); }
765    
766            // Use/create the cache here if possible
767            // so that the inner loop implemented by charAtInternal() is as tight as possible.
768    
769            // If the requested char is in the unshared mid portion
770            // then we need not touch the cache at all.
771            final int pLen = _prefixLen;
772            final boolean notInSharedPrefix = (index >= pLen);
773            final int mLen = (mid.length);
774            final int mOffset;
775            if(notInSharedPrefix && (mLen > (mOffset = (index - pLen))))
776                { return((char) (mid[mOffset] & 0xff)); }
777    
778            // Now do the upper bound check in the input...
779            final int sLen = getSuffixLen();
780            final int length = pLen + mLen + sLen; // Quicker than having length() recompute it...
781            if(index >= length) { throw new IllegalArgumentException(); }
782    
783            // If we get here then this instance is not a root.
784            // (Else it would have been satisfied from the mid section which is the whole value.)
785            if(EXTRA_ASSERTS) { assert(prev != null); }
786    
787            // Get value from cache if permitted and available.
788            // This must have enough unshared chars to be worth the SoftReference overheads,
789            // and not so near to a root as to not save much time.
790            if(((pLen + sLen) >= MIN_TERMINI_CACHE_CHARS) && (prev.prev != null) && (prev.prev.prev != null))
791                {
792                final SoftReference<byte[]> cacheSR = _cached_termini;
793                final byte[] cache = cacheSR.get();
794                if(null != cache)
795                    {
796                    // If not in prefix then must be in suffix...
797                    final int offset = notInSharedPrefix ? (index - mLen) : index;
798                    return((char) (cache[offset] & 0xff));
799                    }
800                // If no cache available and we may create one then do so, and use it immediately.
801                else // if(canCacheTermini())
802                    {
803                    final int offset = notInSharedPrefix ? (index - mLen) : index;
804                    return((char) ((cacheTermini())[offset] & 0xff));
805                    }
806                }
807    
808            // Not from the mid section, and no cache available...
809            return((char) (charAtInternal(index) & 0xff));
810            }
811    
812        //@Override
813        public final int length()
814            {
815            return(_prefixLen + mid.length + getSuffixLen());
816            }
817    
818        /**Effectively-immutable light-weight wrapper around the underlying text.
819         * Can be intern()ed to help eliminate some expensive duplicates.
820         */
821        private static class CSWrapper<T extends Name> implements CharSequence8Bit,
822                                                                  Comparable<CharSequence>,
823                                                                  MemoryTools.Internable
824    
825            {
826            /**Underlying instance; non-null.
827             * Definitely not intended to be serialised.
828             */
829            transient final T underlying;
830            /**Start offset (inclusive) on underlying impl; non-negative. */
831            final int start;
832            /**End offset (exclusive) on underlying impl; non-negative. */
833            final int end;
834    
835            /**Create an instance.
836             * Only called from trusted classes so arg-checking can be by assert() to save release-build CPU cycles.
837             * @param underlying  the Name instance that this provides a view of; never null
838             * @param start  inclusive start index; in range [0,end]
839             * @param end  exclusive end index; in range [start,underlying.length()]
840             */
841            CSWrapper(final T underlying, final int start, final int end)
842                {
843                if(EXTRA_ASSERTS)
844                    {
845                    assert(null != underlying);
846                    assert(start >= 0);
847                    assert(end <= underlying.length());
848                    assert(start <= end);
849                    }
850    
851                this.underlying = underlying;
852                this.start = start;
853                this.end = end;
854                }
855    
856            /* (non-Javadoc)
857             * @see java.lang.CharSequence#charAt(int)
858             */
859            //@Override
860            public char charAt(final int index)
861                {
862                if(index < 0) { throw new IllegalArgumentException(); }
863                if(index >= (end-start)) { throw new IllegalArgumentException(); }
864                return(underlying.charAt(index + start));
865                }
866    
867            /* (non-Javadoc)
868             * @see java.lang.CharSequence#length()
869             */
870            //@Override
871            public int length()
872                {
873                return(end-start);
874                }
875    
876            /* (non-Javadoc)
877             * @see java.lang.CharSequence#subSequence(int, int)
878             */
879            //@Override
880            public CharSequence subSequence(final int start, final int end)
881                {
882                // We validate the parameters here since our constructor will not.
883                if(start < 0) { throw new IllegalArgumentException(); }
884                if(start > end) { throw new IllegalArgumentException(); }
885                if(end > length()) { throw new IllegalArgumentException(); }
886                if(EXTRA_ASSERTS) { assert(this.start + end <= underlying.length()); }
887                // Creates new wrapper around original underlying text for efficiency,
888                // ie not chained through this CSWrapper instance.
889                return(new CSWrapper<T>(underlying, this.start + start, this.start + end));
890                }
891    
892            /**Equality checks for the same type and content. */
893            @Override public boolean equals(final Object obj)
894                {
895                if(this == obj) { return(true); }
896                if(!(obj instanceof CSWrapper)) { return(false); }
897                return(TextUtils.contentEquals(this, (CharSequence)obj));
898                }
899    
900            /**The hash code is based on a fixed-time sampling of the content for speed. */
901            @Override public int hashCode()
902                {
903                final int l = length();
904                int h = l;
905                final int step = (l >>> 3);
906                for(int i = l; --i >= 0; i -= step)
907                    { h = 37*h + charAt(i); }
908                h -= l;
909                return(h);
910                }
911    
912            //@Override
913            public int compareTo(final CharSequence other)
914                { return(TextUtils.compare(this, other)); }
915    
916            /**Convert to String containing same char sequence.
917             * We don't expensively intern() the result as it may be short-lived.
918             */
919            @Override public String toString() { return((new StringBuilder(length())).append(this).toString()); }
920    
921            /**Create private byte[] copy for caller; never null. */
922            public final byte[] toByteArray()
923                {
924                final int length = length();
925                final byte[] result = new byte[length];
926                // Too hideous to fully optimise (yet) so do the simple way.
927                for(int i = length; --i >= 0; )
928                    { result[i] = (byte) underlying.charAt(i + start); } // May make use of cache...
929                return(result);
930                }
931            }
932    
933        /**Maximum number of chars to take a copy of the data. */
934        private static final int MAX_TRIVIAL_COPY = 4;
935    
936        /**Generate a light-weight wrapper, or for very small amounts of data, a copy; never null.
937         * Not necessarily Serializable.
938         */
939        //@Override
940        public final CharSequence subSequence(final int start, final int end)
941            {
942            if(start < 0) { throw new IndexOutOfBoundsException(); }
943            final int length = length();
944            if(end > length) { throw new IndexOutOfBoundsException(); }
945            if(start > end) { throw new IndexOutOfBoundsException(); }
946    
947            final int newLength = end - start;
948    
949            // If this is the whole existing value then return it.
950            if((start == 0) && (length == newLength)) { return(this); }
951    
952            // In trivial empty case return fixed empty value.
953            if(newLength == 0) { return(EMPTY); }
954    
955            // For trivial small subsequences,
956            // make a copy of the relevant data,
957            // eg where we wouldn't want to pin down the possibly-large original
958            // for something not even worthy of a pointer.
959            // We don't intern() so as to avoid high overheads.
960            if(newLength <= MAX_TRIVIAL_COPY)
961                { return(new CS8Bit(this, start, end)); }
962    
963            // TODO: optimisation: if this is a prefix then return as a prefix instance.
964    
965            // GENERAL CASE: make a light-weight wrapper.
966            return(new CSWrapper<Name>(this, start, end));
967            }
968    
969    
970        /**Validate fields/state.
971         * Called in the constructor and possibly after deserialising.
972         * <p>
973         * Barf if something bad is found.
974         */
975        public void validateObject()
976            throws InvalidObjectException
977            {
978            // Test some invariants that duff external data should not be able to influence.
979            if(EXTRA_ASSERTS) { assert(null != _cached_termini); }
980            if(EXTRA_ASSERTS) { assert(null != mid); }
981            if(EXTRA_ASSERTS) { assert((mid.length != 0) || (mid == EMPTIES.EMPTY_MID)); }
982    
983            // We should use a fixed single-instance array rather than an empty one.
984            final int prefixLen = _prefixLen;
985            if(EXTRA_ASSERTS) { assert(getPrefixLen() == _prefixLen); }
986            final int suffixLen = getSuffixLen();
987            if(EXTRA_ASSERTS) { assert((prefixLen >= 0) && (suffixLen >= 0)); }
988            final int sharedLen = prefixLen + suffixLen;
989            if(prev != null)
990                {
991                if(sharedLen > prev.length())
992                    { throw new InvalidObjectException("bad object: prefix/suffix too long for prev: shared="+sharedLen+" vs prev.length()="+prev.length()); }
993                if(EXTRA_ASSERTS) { assert(getPrevChainLength() >= 0); }
994                if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
995                    { throw new InvalidObjectException("bad object: overlong prevChainLength: "+getPrevChainLength()); }
996                // Prev must only be non-null if we have some sharing of prefix/suffix.
997                // Note persisted values may have hash a different MIN_SHARE_CHARS at creation
998                // so don't reject something simply because of 'not enough' sharing by current standards.
999                if(sharedLen == 0)
1000                    { throw new InvalidObjectException("bad object: invalid unused prev"); }
1001                }
1002            else
1003                {
1004                if(EXTRA_ASSERTS) { assert(getPrevChainLength() == 0); }
1005                if(sharedLen != 0)
1006                    { throw new InvalidObjectException("bad object: missing prev needed for shared text"); }
1007                }
1008            if(Integer.MAX_VALUE - prefixLen - suffixLen < length())
1009                { throw new InvalidObjectException("bad object: overlong components"); }
1010            }
1011    
1012        /**Deserialise.
1013         * Expects validation to happen in readResolve().
1014         */
1015        private void readObject(final ObjectInputStream in)
1016            throws IOException, ClassNotFoundException
1017            {
1018            // Read in most of the fields in the usual way.
1019            in.defaultReadObject();
1020    
1021            // Set up the fast-access shadow prefix-length value.
1022            _prefixLen = (short) (terminiLengths & PREFIX_MASK);
1023    
1024            // Fix up cache to be non-null (even if not with any cached data for now)...
1025            _cached_termini = EMPTIES.EMPTY_TERMINI;
1026    
1027            // Read in the mid text with a variable-length 'length' prefix.
1028            // This mechanism also ensures that our 'mid' element is unshared.
1029            final byte l0 = in.readByte();
1030            final int length;
1031            if(l0 >= 0) { length = l0; }
1032            else
1033                {
1034                final byte l1 = in.readByte();
1035                final byte l2 = in.readByte();
1036                final byte l3 = in.readByte();
1037                length = -((l0 << 24) + ((l1 & 0xff) << 16) + ((l2 & 0xff) << 8) + (l3 & 0xff));
1038                }
1039            // Create and read the text field; don't create a new empty byte[] for size 0.
1040            if(length == 0) { mid = EMPTIES.EMPTY_MID; }
1041            else
1042                {
1043                mid = new byte[length];
1044                in.readFully(mid);
1045                }
1046    
1047            // IMPORTANT: this instance is now safe to use
1048            // but may not be fully valid (eg prev chain length may be too long).
1049            // such issues have to be handled in read-resolve.
1050            }
1051    
1052        /**Write out a minimally-redundant form of our internal information.
1053         * The more-efficient on-the-wire format also makes defensive
1054         * reading easier.
1055         */
1056        private void writeObject(final ObjectOutputStream oos)
1057            throws IOException
1058            {
1059            // Check if we're about to persist a bogus value.
1060            if(EXTRA_ASSERTS) { validateObject(); }
1061    
1062            // Write out most of the fields in the usual way.
1063            oos.defaultWriteObject();
1064    
1065            // Write the length in a variable-length format
1066            // (very short strings require fewer length bytes).
1067            // For very small strings use a byte-value length.
1068            if(mid.length <= Byte.MAX_VALUE) { oos.writeByte(mid.length); }
1069            // Write larger text sizes using a negative (int, 4-byte) length
1070            // so that reading the first byte will give a negative value.
1071            else { oos.writeInt(-mid.length); }
1072            // Now write the text bytes (if any) directly, unwrapped.
1073            if(0 != mid.length) { oos.write(mid); }
1074            }
1075    
1076        /**Deserialise: validate this instance, coerce mildly-errant values, and eliminate duplicates.
1077         * Has to handle derived classes too.
1078         */
1079        protected Object readResolve()
1080            throws ObjectStreamException
1081            {
1082            // In the case where the prev chain length on the wire is too large for current parameters,
1083            // we generate a new valid replacement here.
1084            if(getPrevChainLength() > MAX_PREV_CHAIN_LENGTH)
1085                {
1086    System.err.println("WARNING: replacing deserialised Name item that has an overly-long chain: "+getPrevChainLength()+" vs "+MAX_PREV_CHAIN_LENGTH);
1087                // Attempt to retain some of the sharing if possible.
1088                Name p;
1089                for(p = prev; (p != null) && (p.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH); p = p.prev)
1090                    { }
1091                if(EXTRA_ASSERTS) { assert((p == null) || (p.getPrevChainLength() < MAX_PREV_CHAIN_LENGTH)); }
1092    
1093                // FIXME: nasty interdependency between base and derived classes here...
1094                if(ExhibitFull.class.equals(getClass()))
1095                    { return(ExhibitFull.create(new CS8Bit(this), p)); }
1096                return(Name.create(new CS8Bit(this), p));
1097                }
1098    
1099            // Check that this instance is valid.
1100            validateObject();
1101    
1102            // Eliminate duplicates while deserialising this and closely-coupled child classes.
1103            return(MemoryTools.intern(this));
1104            }
1105    
1106        /**Unique serialisation ID. */
1107        private static final long serialVersionUID = 2798109831174584816L;
1108    
1109    
1110        /**ExhibitShortName immutable CharSequence light-weight wrapper around underlying ExhibitFullName data.
1111         * Though slow, these can be used as Map/Set/etc elements,
1112         * as they have a working (though inefficient) consistent hashCode() and equals(),
1113         * but it is much preferred (more efficient/fast) to use ExhibitFull as a key.
1114         */
1115        public static final class ExhibitShort extends CSWrapper<ExhibitFull>
1116            {
1117            /**Create an instance.
1118             * Only intended to be created by an ExhibitFull instance.
1119             */
1120            private ExhibitShort(final ExhibitFull underlying, final int start, final int end)
1121                {
1122                super(underlying, start, end);
1123                if(EXTRA_ASSERTS) { assert(ExhibitName.validNameFinalComponentSyntax(this)); }
1124                }
1125    
1126            /**Retrieve full name for this short name; never null. */
1127            public ExhibitFull getFullName() { return(underlying); }
1128    
1129            /**Extract the main words (stem) component; never null nor empty.
1130             * As ExhibitName.getMainWordsComponent() but optimised for this common case,
1131             * with the result content identical to that from getMainWordsComponent(getFullName()).
1132             * <p>
1133             * The result is intern()able to help conflate identical values if required,
1134             * ie has a working hashCode() and equals(),
1135             * which may save space and accelerate searching for example.
1136             *
1137             * @param allAttrWords  a Set of all legal attribute words (String values);
1138             *     may be empty but not null
1139             */
1140            public CharSequence8Bit getMainWordsComponent(final Set<String> allAttrWords)
1141                {
1142                final int endOfAttrWords = ExhibitName.getEndOfAttrWords(this);
1143                final int endOfMainWords = ExhibitName.getEndOfMainWords(this, -1, endOfAttrWords, allAttrWords);
1144                final CharSequence8Bit result = new CSWrapper<ExhibitFull>(underlying, start, start + endOfMainWords);
1145                // Should be identical result to getMainWordsComponent() on full name.
1146    //            assert(TextUtils.contentEquals(result, ExhibitName.getMainWordsComponent(underlying, allAttrWords)));
1147                return(result);
1148                }
1149    
1150            /**Equality checks for the same type and content.
1151             * Will return true in the case of an identical short name of a different full name.
1152             */
1153            @Override public boolean equals(final Object obj)
1154                {
1155                if(this == obj) { return(true); }
1156                if(!(obj instanceof Name.ExhibitShort)) { return(false); }
1157                return(TextUtils.contentEquals(this, (Name.ExhibitShort)obj));
1158                }
1159    
1160            /**The hash code is based on a fixed-time sampling of the content for speed. */
1161            @Override public int hashCode()
1162                {
1163                final int l = length();
1164                int h = l;
1165                final int step = (l >>> 3);
1166                for(int i = l; --i >= 0; i -= step)
1167                    { h = 37*h + charAt(i); }
1168                h -= l;
1169                return(h);
1170                }
1171    
1172            /**Convert to String containing same char sequence.
1173             * We automatically intern() the short exhibit name result to avoid duplication.
1174             */
1175            @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1176    
1177            /**Unique leading printable-ASCII character for persistableKey(), not a valid exhibit name char. */
1178            public static final char PERSISTABLE_NAME_LEADING_CHAR = '!';
1179    
1180            /**Length of persistable key in chars; strictly positive. */
1181            public static final char PERSISTABLE_NAME_LENGTH = 8;
1182    
1183            /**Persistable unique printable-ASCII immutable CharSequence key; never null.
1184             * This generates a key:
1185             * <ul>
1186             * <li>that is of fixed (small) length</li>
1187             * <li>that should be shorter than most actual exhibit short names thus representing a reduced memory footprint</li>
1188             * <li>that is quickly and easily distinguished from (and can be unambiguously mixed with) exhibit short (and full) names, by leading PERSISTABLE_NAME_LEADING_CHAR</li>
1189             * <li>that should in practice be unique across all short exhibit names and thus can be used as a substitute for a short name</li>
1190             * <li>depends only on the short name, not any other part of the full name</li>
1191             * <li>has a small amount of verification built in (second char of key is first char of short name)</li>
1192             * <li>does not contain any ASCII control characters or whitespace (&le;32)</li>
1193             * <li><em>not</em> intern()ed here because a key is small enough for the overhead to be potentially prohibitive</li>
1194             * </ul>
1195             * <p>
1196             * This may be slow to compute.
1197             */
1198            public CharSequence persistableKey()
1199                {
1200                final StringBuilder sb = new StringBuilder(PERSISTABLE_NAME_LENGTH);
1201                sb.append(PERSISTABLE_NAME_LEADING_CHAR); // Unique prefix that guarantees it not a valid exhibit name.
1202                sb.append(charAt(0)); // Allows some cross-checking with original short name.
1203    
1204                // Extract a strong SHA-1 digest/hash of the bytes of the short name.
1205                // (We can exclude the first char already explicitly in the key.)
1206                final MessageDigest digestor;
1207                try { digestor = MessageDigest.getInstance(CoreConsts.HASH_SHA1); }
1208                catch(final NoSuchAlgorithmException e) { throw new Error("Unable to locate hash impl: "+CoreConsts.HASH_SHA1); }
1209                for(int i = length(); --i > 0; ) { digestor.update((byte) charAt(i)); }
1210                final byte digest[] = digestor.digest();
1211                if(EXTRA_ASSERTS) { assert(digest != null); }
1212                assert(digest.length >= 4);
1213                // Convert digest to printable ~6.5-bits-per-char form
1214                // to retain as much digest as possible and thus minimise chance of key collision.
1215                for(int i = 0; sb.length() < PERSISTABLE_NAME_LENGTH; i++)
1216                    {
1217                    int ib = digest[i] & 0x7f; // Basic 7-bit form.
1218                    // If result would be control/whitespace (or DEL),
1219                    // then translate it into the printable region,
1220                    // and incorporate the otherwise-discarded digest bit.
1221                    if(ib <= ' ')
1222                        { ib += (((digest[i] & 0x80) != 0) ? 64 : 94); }
1223                    else if(ib == 127) // Avoid DEL code.
1224                        { ib = ((digest[i] & 0x80) != 0) ? 63 : 93; }
1225                    if(EXTRA_ASSERTS) { assert((ib > ' ') && (ib < 127)); }
1226                    sb.append((char) ib); // Append digest char.
1227                    }
1228    
1229                if(EXTRA_ASSERTS) { assert(sb.length() == PERSISTABLE_NAME_LENGTH); }
1230                return(new CS8Bit(sb)); // Convert to immutable compact form.
1231                }
1232            }
1233    
1234        /**ExhibitFullName implementation on top of SharedTermini8BitString.
1235         * This will only allow construction of instances representing a valid full exhibit name.
1236         * <p>
1237         * This carries no significant extra data over SharedTermini8BitString,
1238         * and mainly represents a guarantee that the data is a valid full exhibit name,
1239         * plus some utility methods/types for conveniently and efficiently accessing components of that name.
1240         * <p>
1241         * The constructors are private and only accessible via factory methods
1242         * to help with instance control,
1243         * and all instances are automatically intern()ed to eliminate duplicates.
1244         */
1245        public static final class ExhibitFull extends Name
1246            {
1247            private ExhibitFull(final CharSequence name, final Name prev)
1248                {
1249                super(name, prev); // Encode/store the text.
1250    
1251                // Verify what we've been given.
1252                try { validateObject(); }
1253                catch(final InvalidObjectException e)
1254                    { throw new IllegalArgumentException(e.getMessage()); }
1255                }
1256    
1257            /**Create an ExhibitFull instance; never null.
1258             * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1259             * <p>
1260             * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1261             *
1262             * @param fullName  the (8-bit) syntactically-valid full exhibit name; never null nor empty
1263             *
1264             * @throws IllegalArgumentException  in the case of a null or invalid putative full name
1265             */
1266            public static ExhibitFull create(final CharSequence fullName)
1267                { return(create(fullName, null)); }
1268    
1269            /**Create an ExhibitFull instance with a previous value to attempt to share a prefix with; never null.
1270             * It is often best to attempt to share a prefix with the lexically previous (or next) item.
1271             * <p>
1272             * If passed an ExhibitFull (but not including sub-classes) this returns it untouched.
1273             *
1274             * @param fullName  the (8-bit) syntactically-valid full exhibit name; never null nor empty
1275             * @param prev  previous Name with which to attempt to share a prefix; null if none
1276             *
1277             * @throws IllegalArgumentException  in the case of a null or invalid putative full name
1278             */
1279            public static ExhibitFull create(final CharSequence fullName,
1280                                             final Name prev)
1281                {
1282                if(null == fullName) { throw new IllegalArgumentException("ExhibitFull Name cannot be null"); }
1283    
1284                if(ExhibitFull.class.equals(fullName.getClass()))
1285                    {
1286                    // Optimisation: if this is already an ExhibitFull (and not a sub-class) then return it as-is
1287                    // if no prev has been offered (that might share a longer prefix).
1288                    if(prev == null) { return((ExhibitFull) fullName); }
1289                    // Optimisation: if prev content equals this then return prev to save a visit to intern(), etc.
1290                    else if(prev.contentEquals(fullName)) { return((ExhibitFull) prev); }
1291                    }
1292    
1293                final AdHocPrefixCache<Integer> adHocPrefixCache = getAdHocPrefixCache();
1294                // If no prev was specified we look for one in the ad-hoc cache.
1295                final Name prevToUse;
1296                if(prev != null) { prevToUse = prev; }
1297                else
1298                    {
1299                    // If we find an exact match with the right class in the ad-hoc cache then return it.
1300                    prevToUse = adHocPrefixCache.findPrev(fullName);
1301                    if((prevToUse != null) && (ExhibitFull.class.equals(prevToUse.getClass())) && prevToUse.contentEquals(fullName))
1302                        { return((ExhibitFull) prevToUse); }
1303                    }
1304                // Return intern()ed value to eliminate accidental duplicates.
1305                // Pass it through the ad-hoc prefix cache too...
1306                final ExhibitFull result = adHocPrefixCache.offerNewInstance(MemoryTools.intern(new ExhibitFull(fullName, prevToUse)));
1307                if(EXTRA_ASSERTS) { assert(ExhibitFull.class.equals(result.getClass())); }
1308                return(result);
1309                }
1310    
1311            /**Shared dummy (valid, small, alphabetically-late) instance with no prev; not null.
1312             * Can help to avoid picking up random dependencies on other ExhibitFull instances,
1313             * eg by blocking use of ad-hoc prefix sharing,
1314             * which may be useful for data due to be serialised.
1315             */
1316            public static final Name.ExhibitFull DUMMY = MemoryTools.intern(create("z/z-Z.z", null));
1317    
1318            /**Cache of link to parent, computed on demand.
1319             * Only accessed under instance lock so as to eliminate access races
1320             * and thus guarantee avoidance of creation of duplicates.
1321             * <p>
1322             * Not serialisable (because recomputable).
1323             */
1324            private transient ExhibitShort shortName;
1325    
1326            /**Extract strongly-typed ExhibitShortName, ie the filename component; never null.
1327             * This is a light-weight, non-intern()able CharSequence wrapper around the existing data.
1328             * <p>
1329             * This guarantees that only one ShortName instance can be generated for each FullName.
1330             * Thus intern()ing is neither needed nor supported.
1331             */
1332            public synchronized ExhibitShort getShortName()
1333                {
1334                ExhibitShort result = shortName;
1335                if(null != shortName) { return(result); }
1336                shortName = result = new ExhibitShort(this, TextUtils.lastIndexOf(this, '/') + 1, length());
1337                return(result);
1338                }
1339    
1340            /**Create a "virtual" full name consisting of the category directory and then the file component; never null.
1341             * The results in a syntactically-valid full exhibit name,
1342             * but omits the internal directory "noise".
1343             * <p>
1344             * Returns this instance as-is if already 'virtual'.
1345             */
1346            public Name.ExhibitFull getVirtualExhibitName()
1347                {
1348                // Optimisation:
1349                // if there is only one slash, the name is already virtual,
1350                // so the name can be returned untouched.
1351                final int firstSlash = TextUtils.indexOf(this, '/');
1352                final int lastSlash = TextUtils.lastIndexOf(this, '/');
1353                if(firstSlash == lastSlash) { return(this); }
1354    
1355                final int length = length();
1356                // FIXME: avoid inefficient conversion via StringBuilder/char[].
1357                return(create(new StringBuilder(length - (lastSlash-firstSlash))
1358                    .append(subSequence(0, firstSlash+1))
1359                    .append(subSequence(lastSlash+1, length)),
1360                              this)); // Prefix and suffix should be entirely shareable with this...
1361                }
1362    
1363            /**Convert to String containing same char sequence; never null.
1364             * We automatically intern() the full-exhibit-name result to avoid duplication.
1365             */
1366    //        @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append(this).toString())); }
1367            @Override public String toString() { return(MemoryTools.intern((new StringBuilder(length())).append((CharSequence8Bit) (this)).toString())); }
1368    
1369            /**Validate fields/state.
1370             * Called in the constructor and possibly after de-serialising.
1371             * <p>
1372             * Barf if something bad is found.
1373             * (Maybe allow some extra info in debug version.)
1374             */
1375            @Override
1376            public void validateObject()
1377                throws InvalidObjectException
1378                {
1379                // Check Name invariants.
1380                super.validateObject();
1381    
1382                // Verify that this is a valid full Exhibit name!
1383                if(!ExhibitName.validNameSyntax(this))
1384                    { throw new InvalidObjectException("bad object: not a valid full exhibit name: "+this); }
1385                }
1386    
1387            /**Unique Serialisation class ID generated by http://random&#46;hd&#46;org/. */
1388            private static final long serialVersionUID = 8993934283490189577L;
1389            }
1390    
1391        /**Provide natural sort order for this and derived classes. */
1392        public final int compareTo(final CharSequence o)
1393            {
1394            if(this == o) { return(0); }
1395            // Use the 'fast' version if possible since much better for sorting/searching...
1396            return(TextUtils.compare((this),
1397                    (o instanceof Name) ? (((Name) o)) : o));
1398            }
1399    
1400        /**Provide natural sort order for this and derived classes. */
1401        public final int compareTo(final Name o)
1402            {
1403            // Same instance compares equal...
1404            if(this == o) { return(0); }
1405            // Use the 'fast' version if possible since much better for sorting/searching...
1406            return(TextUtils.compare(this, o));
1407            }
1408    
1409        /**Compute the Integer prefix key for the supplied CharSequence; never null.
1410         * This assumes that the top 8 bits of each char are unimportant
1411         * since if the sequence is to be turned into a Name they must be zero.
1412         * <p>
1413         * We construct the prefix key from the first four characters of the putative Name
1414         * (or fewer if the name is shorter).
1415         */
1416        @SuppressWarnings("unused") private static Integer computeIntegerPrefixKey(final CharSequence cs)
1417            {
1418            if(EXTRA_ASSERTS) { assert(null != cs); }
1419            final int l = cs.length();
1420            if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1421            int result = cs.charAt(0);
1422            if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1423            result |= (((int) cs.charAt(1)) << 8);
1424            if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1425            result |= (((int) cs.charAt(2)) << 16);
1426            if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1427            result |= (((int) cs.charAt(3)) << 24);
1428            return(result);
1429            }
1430    
1431        /**Compute the Integer suffix key for the supplied CharSequence; never null.
1432         * This assumes that the top 8 bits of each char are unimportant
1433         * since if the sequence is to be turned into a Name they must be zero.
1434         * <p>
1435         * We construct the suffix key from the last four characters of the putative Name
1436         * (or fewer if the name is shorter).
1437         */
1438        @SuppressWarnings("unused") private static Integer computeIntegerSuffixKey(final CharSequence cs)
1439            {
1440            if(EXTRA_ASSERTS) { assert(null != cs); }
1441            final int l = cs.length();
1442            if((MIN_TERMINUS_MATCH_LENGTH <= 0) && (l == 0)) { return(0); }
1443            int result = cs.charAt(l - 1);
1444            if((MIN_TERMINUS_MATCH_LENGTH <= 1) && (l == 1)) { return(result); }
1445            result |= (((int) cs.charAt(l - 2)) << 8);
1446            if((MIN_TERMINUS_MATCH_LENGTH <= 2) && (l == 2)) { return(result); }
1447            result |= (((int) cs.charAt(l - 3)) << 16);
1448            if((MIN_TERMINUS_MATCH_LENGTH <= 3) && (l == 3)) { return(result); }
1449            result |= (((int) cs.charAt(l - 4)) << 24);
1450            return(result);
1451            }
1452    
1453        /**Contains lookup cache by prefix for all Name instances.
1454         * This is used to try to find a prefix for a newly-constructed Name
1455         * where no prefix has been supplied at all.
1456         * <p>
1457         * This looks up in a short list of recent/useful Name values with the same fixed-length prefix
1458         * for something of the same concrete type and the longest available prefix match,
1459         * returning the candidate 'prev' item (or null if none)
1460         * which then can be used in the usual way.
1461         * <p>
1462         * The Name instances are held via WeakReference wrappers
1463         * which should ensure that the cache does not keep them from being garbage collected.
1464         * <p>
1465         * This would usually be invoked by the factory method or constructor
1466         * where no 'prev' has been supplied.
1467         * <p>
1468         * This cache should grow as needed to get a decent hit rate,
1469         * and shrink to release memory when the hit rate is high or the memory subsystem is stressed.
1470         * <p>
1471         * Thread-safe but does not support concurrency (fully synchronized).
1472         * <p>
1473         * A lock can be held on instances of this object to make compound operations atomic.
1474         *
1475         * @param <TerminusKey>  type of key for prefix of Name instances
1476         *     must be immutable and support consistent hashCode() and equals()
1477         */
1478        private static final class AdHocPrefixCache<TerminusKey extends Number>
1479            {
1480            /**The lookup from prefix key to list of candidate same-prefix instances; never null.
1481             * On each list may be items all of exactly one prefix
1482             * or whose prefix hashes to the same value,
1483             * depending on how the prefix key value is constructed.
1484             * <p>
1485             * Empty lists (or those effectively empty because all the WeakReferences have been dropped)
1486             * will be purged (ie empty lists are not retained).
1487             * <p>
1488             * The lists values are of type LinkedList, with 'best' items first.
1489             * <p>
1490             * (In future, where a list is of length 1 and where the maximum length is currently 1,
1491             * a list may be stored as a singleton for efficiency.)
1492             * <p>
1493             * We hope for a reasonable minimum size to have a sensible chance of any match at all
1494             * across a large text base.
1495             * <p>
1496             * We size the maximum based on total heap capacity,
1497             * and allow everything to be discarded in case of extreme memory stress.
1498             */
1499            private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> prefixStore =
1500                MemoryTools.SimpleLRUMapAutoSizeForHitRate.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1501                        0, computeCacheMaxSize(), "prefixStore");
1502    
1503            /**The lookup from suffix key to list of candidate same-suffix instances; never null.
1504             * Lighter-weight than the prefixStore, and smaller, since suffix matching is a secondary activity.
1505             */
1506            private final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> suffixStore =
1507                MemoryTools.SimpleProbabilisticCache.<TerminusKey, LinkedList<WeakReference<Name>>>create(
1508                        computeCacheMaxSize() >> 2, // Smaller heap for suffixes.
1509                        "suffixStore");
1510    
1511            /**Compute maximum (prefix) cache size, partly based on heap size; strictly positive.
1512             * Aim to allow ~10000 entries per 1GB of heap, with a minimum of several hundred.
1513             */
1514            private static int computeCacheMaxSize()
1515                { return(Math.max(1<<9, (int) Math.min(1<<16, Runtime.getRuntime().totalMemory() >> 17))); }
1516    
1517    
1518            /**Returns a suitable prev (with a shared prefix/suffix) for the supplied non-null Name of any Name subclass, else null if none found.
1519             * May trim state, especially dead references.
1520             */
1521            synchronized Name findPrev(final CharSequence putativeName)
1522                {
1523                if(EXTRA_ASSERTS) { assert(null != putativeName); }
1524    
1525                final int len = putativeName.length();
1526    
1527                // Don't try for very short matches.
1528                if(len < MIN_TERMINUS_MATCH_LENGTH) { return(null); }
1529    
1530                // Find and return the best match, if any.
1531                // If we find any prefix match then we don't try for a suffix match.
1532                for(final boolean doPrefix : new boolean[]{true, false})
1533                    {
1534                    // Choose which store to operate on.
1535                    final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1536                        doPrefix ? prefixStore : suffixStore;
1537                    // Compute the correct key for the selected store.
1538                    final TerminusKey key = (TerminusKey)
1539                        (doPrefix ? computeIntegerPrefixKey(putativeName) : computeIntegerSuffixKey(putativeName));
1540    
1541                    // Get the existing list for this prefix if any...
1542                    final LinkedList<WeakReference<Name>> list = store.get(key);
1543                    // If no list then there is no available match.
1544                    if(null == list) { return(null); } // Nothing found.
1545    
1546                    Name result = null; // Nothing found yet.
1547                    int savedCharsBest = 0; // Best shared-characters length (prefix and suffix) found so far.
1548                    WeakReference<Name> eBest = null; // Container for the best item.
1549                    boolean first = true;
1550                    for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1551                        {
1552                        final WeakReference<Name> e = it.next();
1553                        final Name cachedName = e.get();
1554                        // If we find a dead entry then zap it and continue...
1555                        if(null == cachedName)
1556                            {
1557                            it.remove();
1558                            continue;
1559                            }
1560                        // Find common prefix/suffix length: skip if zero.
1561                        // Can be > len if sharable prefix and suffix overlap.
1562                        final int savedChars = Name.computeCommonPrefixLength(putativeName, cachedName) + Name.computeCommonSuffixLength(putativeName, cachedName);
1563                        if(savedChars == 0)
1564                            { continue; }
1565                        // If this is best so far than capture it.
1566                        if(savedChars > savedCharsBest)
1567                            {
1568    if(DEBUG /*|| !doPrefix*/) { System.out.println("***Name ad-hoc "+(doPrefix?"prefix":"suffix")+" match found "+((result==null) ? "" : "better ")+"match with shared chars "+savedChars); }
1569                            result = cachedName;
1570                            savedCharsBest = savedChars;
1571                            if(!first) { eBest = e; }
1572                            // If we already have a maximal match (no unshared characters) then stop looking.
1573                            if(savedChars >= len) { break; }
1574                            }
1575                        }
1576                    // Move best item to the front to mark it as 'best' (if not already there).
1577                    if(null != eBest)
1578                        {
1579                        list.remove(eBest);
1580                        list.addFirst(eBest);
1581                        }
1582    
1583                    // Trim the list iff now too long (eg because we are now memory-stressed).
1584                    trimListLength(list, doPrefix);
1585                    // If the list is now empty then remove it entirely.
1586                    if(list.isEmpty()) { store.remove(key); }
1587    
1588                    // If we found something then don't move on to the next store...
1589                    if(result != null) { return(result); }
1590                    }
1591    
1592                return(null); // Nothing found...
1593                }
1594    
1595            /**Offer a newly constructed Name instance to the cache; returns the value passed in.
1596             * Rejected if already at maximum chain length.
1597             * @return the input name unchanged
1598             */
1599            synchronized <N extends Name> N offerNewInstance(final N name)
1600                {
1601                if(null == name) { return(null); }
1602    
1603                // Don't clutter our tables with very short values.
1604                if(name.length() < MIN_TERMINUS_MATCH_LENGTH) { return(name); }
1605    
1606                // If too long to share termini with, espeically ad-hoc, then reject.
1607                if(name.getPrevChainLength() >= MAX_PREV_CHAIN_LENGTH) { return(name); }
1608    
1609                for(final boolean doPrefix : new boolean[]{true, false})
1610                    {
1611                    // Choose which store to operate on.
1612                    final MemoryTools.CacheMiniMap<TerminusKey, LinkedList<WeakReference<Name>>> store =
1613                        doPrefix ? prefixStore : suffixStore;
1614                    // Compute the correct key for the selected store.
1615                    final TerminusKey key = (TerminusKey)
1616                        (doPrefix ? computeIntegerPrefixKey(name) : computeIntegerSuffixKey(name));
1617    
1618                    // Get the existing list for this prefix if any...
1619                    LinkedList<WeakReference<Name>> list = store.get(key);
1620                    // Create (and store) a new list if need be.
1621                    if(null == list)
1622                        {
1623                        list = new LinkedList<WeakReference<Name>>();
1624                        store.put(key, list);
1625                        }
1626                    // Trim out any dead items (ie weak references gone away).
1627                    trimDeadRefs(list);
1628                    // If something already equivalent to the supplied Name is on the list
1629                    // (ie same content and concrete class)
1630                    // then move it to the front (unless already at the front!)
1631                    // rather than creating a new WeakReference instance
1632                    // and/or filling up the list with duplicates for example.
1633                    boolean foundNameAlreadyOnList = false;
1634                    boolean first = true;
1635                    for(final Iterator<WeakReference<Name>> it = list.iterator(); it.hasNext(); first = false)
1636                        {
1637                        final WeakReference<Name> e = it.next();
1638                        final Name cachedName = e.get();
1639                        if(name.equals(cachedName))
1640                            {
1641                            foundNameAlreadyOnList = true;
1642                            if(!first)
1643                                {
1644    if(DEBUG) { System.out.println("Name.AHPC: moved up to first position: "+name); }
1645                                it.remove(); // Remove from this position...
1646                                list.addFirst(e); // Re-insert at start (without constructing a new WeakRef).
1647                                }
1648    else { if(DEBUG) { System.out.println("Name.AHPC: found already in first position: "+name); } }
1649                            break;
1650                            }
1651                        }
1652                    // Else stuff us on the front of the list as 'newest' and possibly best.
1653                    if(!foundNameAlreadyOnList)
1654                        {
1655                        list.addFirst(new WeakReference<Name>(name));
1656    if(DEBUG) { System.out.println("Name.AHPC: inserting new at first position: "+name); }
1657                        }
1658    
1659                    // Trim the list iff now too long.
1660                    trimListLength(list, doPrefix);
1661                    }
1662    
1663                return(name); // Return the input name.
1664                }
1665    
1666            /**Trim dead references from the supplied non-null list. */
1667            private void trimDeadRefs(final List<WeakReference<Name>> l)
1668                {
1669                for(final Iterator<WeakReference<Name>> it = l.iterator(); it.hasNext(); )
1670                    {
1671                    final WeakReference<Name> e = it.next();
1672                    // Test if ref is alive and zap entry if not.
1673                    // Note that get() doesn't keep ref artificially alive unlike for SoftReference...
1674                    if(null == e.get())
1675                        { it.remove(); }
1676                    }
1677                }
1678    
1679            /**Trim the supplied non-null list to size if too long. */
1680            private void trimListLength(final LinkedList<WeakReference<Name>> l, final boolean isPrefix)
1681                {
1682                // Cannot be overlong unless longer than smallest permitted max length...
1683                if(l.size() > STRESSED_LIST_LEN)
1684                    {
1685                    final int targetLen = getMaxListLen(isPrefix);
1686                    while(l.size() > targetLen)
1687                        { l.removeLast(); }
1688                    }
1689                }
1690    
1691            /**Maximum list length under normal circumstances; strictly positive.
1692             * Borrowing from GZIP '-6'/default experience a good value might be ~6.
1693             */
1694            private static final int MAX_LIST_LEN = 6;
1695    
1696            /**Maximum secondary list length under normal circumstances; strictly positive.
1697             * Since suffix matching is secondary goal this is shorter than the general limit
1698             * but should still probably be greater than 1.
1699             */
1700            private static final int MAX_SECONDARY_LIST_LEN = ((MAX_LIST_LEN >> 1) | 1);
1701    
1702            /**Maximum list length under memory stress; strictly positive, less than MAX_LIST_LEN.
1703             * This will do a less good job of finding matches (ie will probably do less compaction)
1704             * but will consume less memory in the ad-hoc cache.
1705             * <p>
1706             * Under stress, any entries we touch are capped to this length,
1707             * thus allowing us to gracefully and incrementally free space.
1708             * <p>
1709             * Borrowing from GZIP '-1/'-fast' experience a good value might be 1.
1710             */
1711            private static final int STRESSED_LIST_LEN = 1;
1712    
1713            /**Maximum same-prefix list length; strictly positive.
1714             * This will drop to a smaller measure if memory is under stress.
1715             */
1716            /* private */ int getMaxListLen(final boolean isPrefix)
1717                { return(MemoryTools.isMemoryStressed() ? STRESSED_LIST_LEN : (isPrefix ? MAX_LIST_LEN : MAX_SECONDARY_LIST_LEN)); }
1718            }
1719    
1720        /**Get instance of AdHocPrefixCache; never null. */
1721        private synchronized static final AdHocPrefixCache<Integer> getAdHocPrefixCache()
1722            {
1723            // If cache has not been discarded then return it.
1724            final SoftReference<AdHocPrefixCache<Integer>> cache = _adHocPrefixCache;
1725            AdHocPrefixCache<Integer> result = null;
1726            if((null != cache) && (null != (result = cache.get())))
1727                { return(result); }
1728            // Construct and cache and return a new instance.
1729            result = new AdHocPrefixCache<Integer>();
1730            _adHocPrefixCache = new SoftReference<AdHocPrefixCache<Integer>>(result);
1731            return(result);
1732            }
1733    
1734        /**Memory-sensitive cache of Name instances indexed by fixed-length prefix and suffix; initially null, not thereafter.
1735         * Borrows a few techniques from GZIP-like algorithms.
1736         * <p>
1737         * This uses Integer as a key for up to 4 bytes of prefix/suffix lookup.
1738         * <p>
1739         * Only accessed under the class lock, by getAdHocPrefixCache().
1740         * <p>
1741         * Held via a StaticReference to allow the entire cache to be discarded automatically
1742         * under extreme memory stress.
1743         * <p>
1744         * Accessed under the class lock.
1745         */
1746        private static SoftReference<AdHocPrefixCache<Integer>> _adHocPrefixCache;
1747    
1748        /**Case-insensitive Comparator for Name equivalent to TextUtils.CASE_INSENSITIVE_ORDER; not null.
1749         * May be able to operate more efficiently than generic TextUtils.CASE_INSENSITIVE_ORDER.
1750         */
1751        public static final Comparator<Name> CASE_INSENSITIVE_ORDER = new Comparator<Name>(){
1752            public int compare(final Name n1, final Name n2)
1753                { return(TextUtils.CASE_INSENSITIVE_ORDER.compare(n1, n2)); }
1754            };
1755    
1756        /**Set true for extra assert()s for internal invariants, usually only in non-release code. */
1757        private static final boolean EXTRA_ASSERTS = IsDebug.isDebug;
1758    
1759        /**Set true for extra debug/development logging output. */
1760        private static final boolean DEBUG = false;
1761        }