001 package org.hd.d.pg2k.svrCore;
002
003 import java.io.IOException;
004 import java.io.ObjectInputStream;
005 import java.io.ObjectOutputStream;
006 import java.util.Arrays;
007
008 import org.hd.d.pg2k.svrCore.TextUtils.CharSequence8Bit;
009
010 /**Immutable lightweight CharSequence holding 8-bit text only.
011 * Internally the text is stored as a byte[] for compactness.
012 * <p>
013 * Implements hashCode() and equals() so suitable as a hash key.
014 */
015 public final class CS8Bit implements CharSequence8Bit,
016 java.io.Serializable,
017 MemoryTools.Internable
018 {
019 /**The 8-bit text of this sequence; ever null but may be empty. */
020 private transient /* final */ byte[] text;
021
022 /**Copy from byte array. */
023 public CS8Bit(final byte[] d)
024 { text = d.clone(); }
025
026 /**Copy from another CharSequence8Bit (defensively). */
027 public CS8Bit(final CharSequence8Bit other)
028 { text = other.toByteArray().clone(); } // Somewhat paranoid to copy again...
029
030 /**Copy a sub-sequence of another CharSequence8Bit.
031 * @param other other sequence; never null
032 * @param start inclusive start index
033 * @param end exclusive end index
034 */
035 public CS8Bit(final CharSequence8Bit other, final int start, final int end)
036 {
037 text = new byte[end - start];
038 for(int i = text.length; --i >= 0; )
039 { text[i] = (byte) other.charAt(i + start); }
040 }
041
042 /**Copy from another CharSequence, vetoing if any non-8-bit data. */
043 public CS8Bit(final CharSequence other)
044 throws IllegalArgumentException
045 {
046 text = new byte[other.length()];
047 for(int i = text.length; --i >= 0; )
048 {
049 final char c = other.charAt(i);
050 if(c >= 0x100) { throw new IllegalArgumentException(); }
051 text[i] = (byte) c;
052 }
053 }
054
055 public int length()
056 { return(text.length); }
057
058 public char charAt(final int index)
059 { return((char) (text[index] & 0xff)); }
060
061 /**Extract subsequence; never null.
062 * Usually a very lightweight wrapper around this instance,
063 * but may be an outright copy for very short subsequences for efficiency.
064 */
065 public CharSequence8Bit subSequence(final int startSS, final int endSS)
066 {
067 if((startSS < 0) || (endSS < startSS) || (endSS > length())) { throw new IllegalArgumentException(); }
068
069 final int lengthSS = endSS - startSS;
070 // For very small fragments take a copy of the data as this is assumed to be fast,
071 // and probably a smaller footprint than wrapping,
072 // and potentially allows GC of the parent instance...
073 if(lengthSS <= 8) { return(new CS8Bit(this, startSS, endSS)); }
074
075 // Construct very lightweight wrapper around this CS8Bit instance.
076 return(new CharSequence8Bit() {
077 public int length() { return(lengthSS); }
078 public char charAt(final int index)
079 {
080 if((index < 0) || (index >= lengthSS)) { throw new IllegalArgumentException(); }
081 return((char) (text[startSS + index] & 0xff));
082 }
083 /**Rewrap the original underlying CS8Bit for efficiency; never null. */
084 public CharSequence subSequence(final int start, final int end)
085 {
086 if((start < 0) || (end < start) || (end > lengthSS)) { throw new IllegalArgumentException(); }
087 return(CS8Bit.this.subSequence(startSS + start, startSS + end));
088 }
089 @Override public final String toString()
090 { return((new StringBuilder(lengthSS)).append(this).toString()); }
091 /**Create private byte[] copy for caller; never null. */
092 public final byte[] toByteArray() { return(Arrays.copyOfRange(text, startSS, startSS+lengthSS)); }
093 });
094 }
095
096 /**Convert to String with same char sequence; never null. */
097 @Override public final String toString()
098 { return((new StringBuilder(text.length)).append(this).toString()); }
099
100 /**Create private byte[] copy for caller; never null. */
101 public final byte[] toByteArray() { return(text.clone()); }
102
103
104 /**Cache of the computed hash code.
105 * Not part of the permanent/serialised state of the object since easy to recompute.
106 */
107 private transient int hash; // Default to 0
108
109 /**The hash is computed over the entire text as for String. */
110 @Override public int hashCode()
111 {
112 int h = hash;
113 if(h == 0) // Probably not yet computed, so do so now.
114 {
115 // Hash needs to be (re)computed.
116 final int len = length();
117 for(int i = 0; i < len; i++)
118 { h = 31*h + text[i]; }
119 hash = h; // Cache the result.
120 }
121 return(h);
122 }
123
124 /**Equality depends on the entire content being the same. */
125 @Override public boolean equals(final Object o)
126 {
127 if(this == o) { return(true); }
128 if(!(o instanceof CS8Bit)) { return(false); }
129 return(Arrays.equals(text, ((CS8Bit)o).text));
130 }
131
132
133 /**Unique serialisation ID. */
134 private static final long serialVersionUID = -475170579269562789L;
135
136 /**Empty sequence. */
137 public static final CS8Bit EMPTY = new CS8Bit(new byte[0]);
138
139 /**Deserialise. */
140 private void readObject(final ObjectInputStream in)
141 throws IOException, ClassNotFoundException
142 {
143 final byte l0 = in.readByte();
144 final int length;
145 if(l0 >= 0) { length = l0; }
146 else // Read rest of 32-bit form of length...
147 {
148 final byte l1 = in.readByte();
149 final int us1 = in.readUnsignedShort();
150 length = -((l0 << 24) + ((l1 & 0xff) << 16) + us1);
151 }
152 // Create and read the text field.
153 if(length == 0) { text = EMPTY.text; }
154 else
155 {
156 text = new byte[length];
157 in.readFully(text);
158 }
159 }
160
161 /**Write out a minimally-redundant form of our internal information.
162 * The more-efficient on-the-wire format also makes defensive
163 * reading easier.
164 * <p>
165 * We don't write *any* default state.
166 * <p>
167 * We must never be asked to serialise if we have a (non-null) dictionary.
168 * <p>
169 * We must never be asked to serialise if the text is too long for us to encode.
170 */
171 private void writeObject(final ObjectOutputStream oos)
172 throws IOException
173 {
174 // Write the length in a variable-length format.
175 // For very small strings use a byte-value length.
176 if(text.length <= Byte.MAX_VALUE) { oos.writeByte(text.length); }
177 // Write larger text sizes using a negative length
178 // so that reading the first byte will give a negative value.
179 else { oos.writeInt(-text.length); }
180
181 // Now write the text bytes directly, if any, unwrapped.
182 if(text.length != 0) { oos.write(text); }
183 }
184
185 /**Deserialise: use constructor for validation, etc.
186 * Also resolve all empty instances to a single value as a minor optimisation.
187 */
188 protected Object readResolve()
189 // throws ObjectStreamException
190 {
191 // Avoid duplicates of empty case.
192 if(text.length == 0) { return(EMPTY); }
193
194 // Attempt to mop up duplicates to help with instance control.
195 // Assume that defensive copying is not necessary since content is unshared.
196 return(MemoryTools.intern(this));
197 }
198 }