001    package org.hd.d.pg2k.svrCore;
002    
003    import java.io.IOException;
004    import java.io.ObjectInputStream;
005    import java.io.ObjectOutputStream;
006    import java.util.Arrays;
007    
008    import org.hd.d.pg2k.svrCore.TextUtils.CharSequence8Bit;
009    
010    /**Immutable lightweight CharSequence holding 8-bit text only.
011     * Internally the text is stored as a byte[] for compactness.
012     * <p>
013     * Implements hashCode() and equals() so suitable as a hash key.
014     */
015    public final class CS8Bit implements CharSequence8Bit,
016                                         java.io.Serializable,
017                                         MemoryTools.Internable
018        {
019        /**The 8-bit text of this sequence; ever null but may be empty. */
020        private transient /* final */ byte[] text;
021    
022        /**Copy from byte array. */
023        public CS8Bit(final byte[] d)
024            { text = d.clone(); }
025    
026        /**Copy from another CharSequence8Bit (defensively). */
027        public CS8Bit(final CharSequence8Bit other)
028            { text = other.toByteArray().clone(); } // Somewhat paranoid to copy again...
029    
030        /**Copy a sub-sequence of another CharSequence8Bit.
031         * @param other  other sequence; never null
032         * @param start  inclusive start index
033         * @param end  exclusive end index
034         */
035        public CS8Bit(final CharSequence8Bit other, final int start, final int end)
036            {
037            text = new byte[end - start];
038            for(int i = text.length; --i >= 0; )
039                { text[i] = (byte) other.charAt(i + start); }
040            }
041    
042        /**Copy from another CharSequence, vetoing if any non-8-bit data. */
043        public CS8Bit(final CharSequence other)
044            throws IllegalArgumentException
045            {
046            text = new byte[other.length()];
047            for(int i = text.length; --i >= 0; )
048                {
049                final char c = other.charAt(i);
050                if(c >= 0x100) { throw new IllegalArgumentException(); }
051                text[i] = (byte) c;
052                }
053            }
054    
055        public int length()
056            { return(text.length); }
057    
058        public char charAt(final int index)
059            { return((char) (text[index] & 0xff)); }
060    
061        /**Extract subsequence; never null.
062         * Usually a very lightweight wrapper around this instance,
063         * but may be an outright copy for very short subsequences for efficiency.
064         */
065        public CharSequence8Bit subSequence(final int startSS, final int endSS)
066            {
067            if((startSS < 0) || (endSS < startSS) || (endSS > length())) { throw new IllegalArgumentException(); }
068    
069            final int lengthSS = endSS - startSS;
070            // For very small fragments take a copy of the data as this is assumed to be fast,
071            // and probably a smaller footprint than wrapping,
072            // and potentially allows GC of the parent instance...
073            if(lengthSS <= 8) { return(new CS8Bit(this, startSS, endSS)); }
074    
075            // Construct very lightweight wrapper around this CS8Bit instance.
076            return(new CharSequence8Bit() {
077                public int length() { return(lengthSS); }
078                public char charAt(final int index)
079                    {
080                    if((index < 0) || (index >= lengthSS)) { throw new IllegalArgumentException(); }
081                    return((char) (text[startSS + index] & 0xff));
082                    }
083                /**Rewrap the original underlying CS8Bit for efficiency; never null. */
084                public CharSequence subSequence(final int start, final int end)
085                    {
086                    if((start < 0) || (end < start) || (end > lengthSS)) { throw new IllegalArgumentException(); }
087                    return(CS8Bit.this.subSequence(startSS + start, startSS + end));
088                    }
089                @Override public final String toString()
090                    { return((new StringBuilder(lengthSS)).append(this).toString()); }
091                /**Create private byte[] copy for caller; never null. */
092                public final byte[] toByteArray() { return(Arrays.copyOfRange(text, startSS, startSS+lengthSS)); }
093                });
094            }
095    
096        /**Convert to String with same char sequence; never null. */
097        @Override public final String toString()
098            { return((new StringBuilder(text.length)).append(this).toString()); }
099    
100        /**Create private byte[] copy for caller; never null. */
101        public final byte[] toByteArray() { return(text.clone()); }
102    
103    
104        /**Cache of the computed hash code.
105         * Not part of the permanent/serialised state of the object since easy to recompute.
106         */
107        private transient int hash; // Default to 0
108    
109        /**The hash is computed over the entire text as for String. */
110        @Override public int hashCode()
111            {
112            int h = hash;
113            if(h == 0) // Probably not yet computed, so do so now.
114                {
115                // Hash needs to be (re)computed.
116                final int len = length();
117                for(int i = 0; i < len; i++)
118                    { h = 31*h + text[i]; }
119                hash = h; // Cache the result.
120                }
121            return(h);
122            }
123    
124        /**Equality depends on the entire content being the same. */
125        @Override public boolean equals(final Object o)
126            {
127            if(this == o) { return(true); }
128            if(!(o instanceof CS8Bit)) { return(false); }
129            return(Arrays.equals(text, ((CS8Bit)o).text));
130            }
131    
132    
133        /**Unique serialisation ID. */
134        private static final long serialVersionUID = -475170579269562789L;
135    
136        /**Empty sequence. */
137        public static final CS8Bit EMPTY = new CS8Bit(new byte[0]);
138    
139        /**Deserialise. */
140        private void readObject(final ObjectInputStream in)
141            throws IOException, ClassNotFoundException
142            {
143            final byte l0 = in.readByte();
144            final int length;
145            if(l0 >= 0) { length = l0; }
146            else // Read rest of 32-bit form of length...
147                {
148                final byte l1 = in.readByte();
149                final int us1 = in.readUnsignedShort();
150                length = -((l0 << 24) + ((l1 & 0xff) << 16) + us1);
151                }
152            // Create and read the text field.
153            if(length == 0) { text = EMPTY.text; }
154            else
155                {
156                text = new byte[length];
157                in.readFully(text);
158                }
159            }
160    
161        /**Write out a minimally-redundant form of our internal information.
162         * The more-efficient on-the-wire format also makes defensive
163         * reading easier.
164         * <p>
165         * We don't write *any* default state.
166         * <p>
167         * We must never be asked to serialise if we have a (non-null) dictionary.
168         * <p>
169         * We must never be asked to serialise if the text is too long for us to encode.
170         */
171        private void writeObject(final ObjectOutputStream oos)
172            throws IOException
173            {
174            // Write the length in a variable-length format.
175            // For very small strings use a byte-value length.
176            if(text.length <= Byte.MAX_VALUE) { oos.writeByte(text.length); }
177            // Write larger text sizes using a negative length
178            // so that reading the first byte will give a negative value.
179            else { oos.writeInt(-text.length); }
180    
181            // Now write the text bytes directly, if any, unwrapped.
182            if(text.length != 0) { oos.write(text); }
183            }
184    
185        /**Deserialise: use constructor for validation, etc.
186         * Also resolve all empty instances to a single value as a minor optimisation.
187         */
188        protected Object readResolve()
189            // throws ObjectStreamException
190            {
191            // Avoid duplicates of empty case.
192            if(text.length == 0) { return(EMPTY); }
193    
194            // Attempt to mop up duplicates to help with instance control.
195            // Assume that defensive copying is not necessary since content is unshared.
196            return(MemoryTools.intern(this));
197            }
198        }