001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.io.BufferedInputStream;
032 import java.io.DataOutputStream;
033 import java.io.File;
034 import java.io.FileInputStream;
035 import java.io.IOException;
036 import java.io.InputStreamReader;
037 import java.io.InvalidObjectException;
038 import java.io.ObjectInputValidation;
039 import java.io.OutputStream;
040 import java.io.Reader;
041 import java.io.Serializable;
042 import java.util.Arrays;
043 import java.util.HashSet;
044 import java.util.Iterator;
045 import java.util.Properties;
046 import java.util.Set;
047
048 import org.hd.d.pg2k.svrCore.location.Location;
049
050 /**Immutable (and serialisable) store of all loadable auxiliary properties of a single exhibit.
051 * These are properties (such as descriptive comment) that must be fetched
052 * from the exhibit database and that cannot be computed.
053 * <p>
054 * This is designed to be efficient on the wire and in memory, since these
055 * details will be held for each and every exhibit.
056 * <p>
057 * Note that a factory method is used to generate instances of this class
058 * and where there is no loadable data for an exhibit (which will be rare)
059 * null is returned.
060 * <p>
061 * Equals is implemented so that duplicates (particularly of EMPTY) can
062 * easily be discarded. hashCode() is implemented to support the semantics
063 * of equals() and as a hash over all the contents of the object.
064 * <p>
065 * By default this looks for data in a new-style properties file first
066 * (fileBase + '/' + exhibitName + ".props"),
067 * and then, iff READ_OLD_FILES == true, in the old-style individual files.
068 */
069 public final class ExhibitPropsLoadable implements Serializable, ObjectInputValidation,
070 MemoryTools.Internable,
071 MemoryTools.Compactable
072 {
073 /**Shared EMPTY instance (no auxiliary data on exhibit). */
074 public static final ExhibitPropsLoadable EMPTY = new ExhibitPropsLoadable(
075 null, // No description.
076 null, // No location.
077 null, // No accession data.
078 false);
079
080 /**Private constructor so creation is by factory method.
081 * Parameters should already have been checked before we are called.
082 * <p>
083 * We may intern() components that are likely to be duplicated
084 * and that may consume significant memory.
085 *
086 * @param _description no more than MAX_DESCRIPTION_CHARS or null or "" if none
087 * @param doNotCompact if true then do not attempt to compact the description; store as passed in
088 */
089 private ExhibitPropsLoadable(final Object _description,
090 final Location.Base _location,
091 final AccessionData _accession,
092 final boolean doNotCompact)
093 {
094 // Store the accession data.
095 // Replace empty instances with null
096 // and intern() the rest to reduce (tenured) heap churn during updates.
097 accession = ((_accession == null) || (_accession.equals(AccessionData.EMPTY))) ?
098 null : MemoryTools.intern(_accession);
099
100 // If the description is null or empty then store a (canonical) null.
101 description = _description; // Allows use of getDescription().
102 if((_description == null) || "".equals(getDescription()))
103 { description = null; }
104 else
105 {
106 // If not forbidding compaction entirely
107 // (and there's not lots of memory free in order to eagerly make compact form)
108 // then keep whatever form we were given.
109 // Eager compaction may be able to use otherwise-idle cycles while deserialising for example.
110 if(doNotCompact || (_description instanceof Compact7BitString) ||
111 (DEFER_COMPACTION && !MemoryTools.lotsFree()))
112 { description = _description; }
113 // Else if not in preferred form then try to make it so.
114 else if(!(_description instanceof Compact7BitString))
115 {
116 Object d = _description;
117 try { d = MemoryTools.intern(Compact7BitString.convertToCompact7BitString(getDescription(), sDict)); }
118 catch(final Exception e) { }
119 description = d;
120 }
121 }
122
123 // Store any location information supplied.
124 if(Location.NONE.equals(_location))
125 { location = null; } // More efficient representation for "NONE".
126 else
127 { location = MemoryTools.intern(_location); }
128
129
130 // Verify object state.
131 try { validateObject(); }
132 catch(final InvalidObjectException e)
133 { throw new IllegalArgumentException(e.getMessage()); }
134 }
135
136 /**Make relative path to new-style properties file. */
137 private static String relPathToNewProperties(final Name.ExhibitFull exhibitName)
138 {
139 return(exhibitName + EXHIBIT_PROPS_FILENAME_SUFFIX);
140 }
141
142 /**Make relative path to old-style en-GB HTML generic description file. */
143 private static String relPathToOldDescription(final Name.ExhibitFull exhibitName)
144 {
145 return(exhibitName + CoreConsts.DESCRIPTION_FILE_SUFFIX);
146 }
147
148 /**Make relative path to new accession-date file; never null.
149 * This is an XML-format file.
150 * <p>
151 * Public so that other routines can find and read this file directly.
152 *
153 * @param exhibitName non-null relative path to exhibit
154 */
155 public static String relPathToNewAccession(final Name.ExhibitFull exhibitName)
156 {
157 // final String exhibitDir = (new File(exhibitName)).getParent();
158 // final String exhibitFile = (new File(exhibitName)).getName();
159 // return((new File(exhibitDir, ".accession." + exhibitFile + ".xml")).getPath());
160 if(null == exhibitName) { throw new IllegalArgumentException(); }
161 final Name.ExhibitShort f = exhibitName.getShortName();
162 final CharSequence d = exhibitName.subSequence(0, exhibitName.length() - f.length()); // d includes trailing slash.
163 assert(d.charAt(d.length()-1) == '/');
164 return(d + ".accession." + f + ".xml");
165 }
166
167 /**Make relative path to old accession-date file; never null.
168 * Historically the format of this file is a single line of the format:
169 * <pre>
170 * YYYY[/MM[/DD]]
171 * </pre>
172 * where YYYY is the year and MM is the month 01--12 and DD is the 01-31,
173 * all relative to UTC/GMT, and represent the approximate date that the
174 * exhibits where incorporated in the library.
175 * <p>
176 * Public so that other routines can find and read this file directly.
177 *
178 * @param exhibitName non-null relative path to exhibit
179 */
180 public static String relPathToOldAccession(final Name.ExhibitFull exhibitName)
181 {
182 // final String exhibitDir = (new File(exhibitName)).getParent();
183 // final String exhibitFile = (new File(exhibitName)).getName();
184 // return((new File(exhibitDir, ".accession." + exhibitFile + ".txt")).getPath());
185 if(null == exhibitName) { throw new IllegalArgumentException(); }
186 final Name.ExhibitShort f = exhibitName.getShortName();
187 final CharSequence d = exhibitName.subSequence(0, exhibitName.length() - f.length()); // d includes trailing slash.
188 assert(d.charAt(d.length()-1) == '/');
189 return(d + ".accession." + f + ".txt");
190 }
191
192 /**Make relative path to old-style accession-date YYYY[/MM[/DD]] file. */
193 private static String relPathToOldLocation(final Name.ExhibitFull exhibitName)
194 {
195 // final String exhibitDir = (new File(exhibitName)).getParent();
196 // final String exhibitFile = (new File(exhibitName)).getName();
197 // return((new File(exhibitDir, ".location." + exhibitFile + ".txt")).getPath());
198 if(null == exhibitName) { throw new IllegalArgumentException(); }
199 final Name.ExhibitShort f = exhibitName.getShortName();
200 final CharSequence d = exhibitName.subSequence(0, exhibitName.length() - f.length()); // d includes trailing slash.
201 assert(d.charAt(d.length()-1) == '/');
202 return(d + ".location." + f + ".txt");
203 }
204
205
206 /**Make a new instance with data loaded from the filesystem, given the exhibit name and some filesystem base directories.
207 * In case of severe unexpected problems with retrieving information
208 * an IOException is thrown, but minor problems are resolved
209 * quietly and a message is reported on System.err.
210 *
211 * @throws IOException in case of severe, unrecoverable difficulty with this exhibit
212 * @return null, or non-null non-EMPTY result
213 */
214 public static ExhibitPropsLoadable getLoadableProperties(final Name.ExhibitFull exhibitName,
215 final File exhibitBaseDir)
216 throws IOException
217 {
218 // final File fullExhibitPath = new File(exhibitBaseDir, exhibitName);
219 // if(!fullExhibitPath.exists())
220 // { throw new FileNotFoundException("exhibit missing"); }
221
222 // Attempt to load the new-style properties file,
223 // if extant.
224 Properties p = null;
225 final File newPropsPath = new File(exhibitBaseDir, relPathToNewProperties(exhibitName));
226 if(newPropsPath.exists())
227 {
228 // Attempt to load from file and allow any errors to
229 // propagate to caller.
230 final FileInputStream fis = new FileInputStream(newPropsPath);
231 try { p = FileTools.loadProperties(new BufferedInputStream(fis)); }
232 finally { fis.close(); }
233 }
234
235 Object _description = null;
236 // Try to retrieve description from new properties.
237 if(p != null) { _description = p.getProperty(PNAME_DESCRIPTION); }
238 // If not in new properties, and in right mode, look for old file.
239 if(READ_OLD_FILES && (_description == null))
240 {
241 final File oldDescriptionPath = new File(exhibitBaseDir, relPathToOldDescription(exhibitName));
242 // If old file exists, try to read it, propagating any IOException.
243 if(oldDescriptionPath.exists())
244 {
245 _description = FileTools.readTextFile(oldDescriptionPath);
246 }
247 }
248 // Trim the description, and if zero length, set it back to null.
249 if(_description instanceof String)
250 {
251 if(_description != null) { _description = ((String) _description).trim(); }
252 if((_description != null) && (((String) _description).length() == 0)) { _description = null; }
253 // If it is over-length, complain on System.err and set it back to null.
254 if((_description != null) && (((String) _description).length() > CoreConsts.DESCRIPTION_MAX_CHARS))
255 {
256 System.err.println("ERROR: description too long (max "+CoreConsts.DESCRIPTION_MAX_CHARS+" chars) for exhibit " + exhibitName);
257 _description = null;
258 }
259 // If not deferring compaction and we have been passed a String
260 // attempt to compact and intern() it now.
261 if(!DEFER_COMPACTION)
262 {
263 try { _description = MemoryTools.intern(Compact7BitString.convertToCompact7BitString((String) _description, sDict)); }
264 catch(final Exception e) { }
265 }
266 }
267
268 Location.Base _location = null;
269 // Try to retrieve description from new properties.
270 if((p != null) &&
271 // Check that at least minimal location field is present
272 // before attempting full parse.
273 (p.getProperty(PNAME_LOCATION_PREFIX + '.' + Location.Base.typeKey) != null))
274 {
275 _location = Location.Base.buildFromProperties(
276 true, // Mark as "specific".
277 PNAME_LOCATION_PREFIX,
278 p);
279 }
280 // If not in new properties, and in right mode, look for old file.
281 if(READ_OLD_FILES && (_location == null))
282 {
283 final File oldLocationPath = new File(exhibitBaseDir, relPathToOldLocation(exhibitName));
284 // If old file exists, try to read it, propagating any IOException.
285 if(oldLocationPath.exists())
286 { _location = Location.Base.buildFromFile(true, oldLocationPath); }
287 }
288
289 // Try to load new accession data, if present.
290 // There is no point trying to intern() this to save memory
291 // since it should be different for every exhibit,
292 // though doing so may help reduce tenured heap churn during updates.
293 AccessionData ad = null;
294 final File newAccessionDataPath = new File(exhibitBaseDir, relPathToNewAccession(exhibitName));
295 if(newAccessionDataPath.exists())
296 {
297 try
298 {
299 // Get the XML accession data from (UTF-8 format) file...
300 final Reader r = new InputStreamReader(
301 new BufferedInputStream(
302 new FileInputStream(newAccessionDataPath)),
303 CoreConsts.FILE_ENCODING_UTF_8);
304 try
305 {
306 final StringBuilder sb = new StringBuilder((int) newAccessionDataPath.length());
307 int ic;
308 while((ic = r.read()) != -1)
309 { sb.append((char) ic); }
310 ad = AccessionData.parseFromXML(sb.toString());
311 }
312 finally
313 { r.close(); }
314 }
315 catch(final Exception e)
316 {
317 e.printStackTrace();
318 throw new IOException("unable to parse accession file: " + newAccessionDataPath + ": " + e.getMessage());
319 }
320 }
321
322
323 // Try to create a new set of loadable properties.
324 final ExhibitPropsLoadable result = new ExhibitPropsLoadable(_description,
325 _location,
326 ad,
327 false /* Allow compaction. */ );
328
329 // If it is equivalent to the EMPTY one then return null instead.
330 if(EMPTY.equals(result)) { return(null); }
331
332 // Got some loadable properties!
333 // Make sure that we eliminate any easy duplicates.
334 return(MemoryTools.intern(result));
335 }
336
337
338 /**Based on entire content of object; if equivalent content then they are equal.
339 * Can be used to eliminate duplicates, eg after deserialising.
340 */
341 @Override
342 public boolean equals(final Object o)
343 {
344 if(this == o) { return(true); } // True if same object!
345 if(!(o instanceof ExhibitPropsLoadable)) { return(false); }
346 final ExhibitPropsLoadable other = (ExhibitPropsLoadable) o;
347
348 // Compare location for equality.
349 // Handle null case specially.
350 // This should be fast to compare, so do it early.
351 if(location == null) { if(other.location != null) { return(false); } }
352 else if(!location.equals(other.location)) { return(false); }
353
354 // This may be somewhat slow to compare.
355 if(accession == null) { if(other.accession != null) { return(false); } }
356 else if(!accession.equals(other.accession)) { return(false); }
357
358 // Compare description by external form (likely to be very slow).
359 // Use quick internal instance compare first to see
360 // if we really need to decode the description at all...
361 if(description != other.description)
362 {
363 final String desc = getDescription();
364 final String descO = other.getDescription();
365 // Handle null case specially.
366 if(desc == null) { if(descO != null) { return(false); } }
367 else if(!desc.equals(descO)) { return(false); }
368 }
369
370 assert(hashCode() == other.hashCode());
371
372 return(true); // Yes, they are the same!
373 }
374
375 /**Human-readable summary. */
376 @Override public String toString()
377 {
378 final StringBuilder sb = new StringBuilder();
379 sb.append("<ExhibitPropsLoadable");
380 final String desc = getDescription();
381 if(desc != null) { sb.append(":description=").append(description); }
382 if(location != null) { sb.append(":location=").append(location); }
383 if(accession != null) { sb.append(":accession=").append(accession); }
384 sb.append(">");
385 return(sb.toString());
386 }
387
388 /**Private to hashCode() lock-free cache of computed hash value; initially 0 indicating not computed. */
389 private transient int _hash;
390
391 /**Based on entire content of object; if equivalent content then they have same hash.
392 * Can be used for tracking if content has changed from one scan to another,
393 * ie is optimised for spotting changes rather than key distribution for use
394 * in a Hashtable, for example, though it should be serviceable for
395 * that purpose too as it is computed once and cached (transiently).
396 * <p>
397 * The cache relies on atomic assignment of int and is lock-free.
398 * <p>
399 * This may be slow to compute.
400 * <p>
401 * The hash value is never zero.
402 */
403 @Override
404 public int hashCode()
405 {
406 int result = _hash;
407 if(result == 0) // Result needs recomputing.
408 {
409 // Recompute and cache.
410 try {
411 // Make a CRC-32 `digest' or checksum...
412 // We use CRC32 rather than Adler32 because we are
413 // likely to be operating on small amounts of data
414 // or small changes where CRC32 may prove much superior.
415 final java.util.zip.Checksum check = new java.util.zip.CRC32();
416 final DataOutputStream dos =
417 new DataOutputStream(
418 (new OutputStream(){
419 @Override
420 public final void write(final int b)
421 { check.update(b); }
422 @Override
423 public final void write(final byte[] b, final int off, final int len)
424 { check.update(b, off, len); }
425 })
426 );
427
428 // Insert data/property values into the checksum...
429 // We must always insert the same value for a field
430 // while its external representation remains unchanged,
431 // and insert a different value if the external value changes,
432 // so as to make the hash sensitive to what a user of the
433 // class would see.
434
435 // Hash the description as a UTF8 String in its external representation...
436 final String desc = getDescription();
437 if(desc == null) { dos.writeByte(255); } else { dos.writeUTF(desc); }
438
439 // We write out the hashCode() to avoid toString() formatting funnies.
440 final Location.Base loc = getLocation();
441 if(loc == null) { dos.writeByte(1); } else { dos.writeInt(loc.hashCode()); }
442
443 // Capture the accession data as part of the hash/checksum...
444 final AccessionData accMD = getAccessionMetadata();
445 if(accMD == null) { dos.writeByte(2); } else { dos.writeInt(accMD.hashCode()); }
446
447 dos.flush(); // Make sure that all the data is in the checksum.
448 result = (int) check.getValue();
449 if(result == 0) { result = 1; } // Ensure computed hash is non-zero.
450 _hash = result; // Cache the computed hash.
451 }
452 catch(final IOException e) { throw new Error("internal error"); } // Should not happen.
453 }
454 return(result); // Return the hash...
455 }
456
457
458
459 /**My serial ID. */
460 private static final long serialVersionUID = 8659120481539311156L;
461
462 /**Deserialise: use constructor for validation, defensive copying, etc.
463 * Also resolve all empty instances to a singleton as a minor optimisation,
464 * and immediately intern() new values so as to immediately discard
465 * duplicates (eg of exhibits already known) ASAP to minimise heap churn.
466 */
467 protected Object readResolve()
468 // throws ObjectStreamException
469 {
470 // Avoid duplicates of empty case.
471 if(equals(EMPTY)) { return(EMPTY); }
472
473 // Construct new instance of object in normal defensive way.
474 // This may also allow compaction on the fly.
475 return(new ExhibitPropsLoadable(description, location, accession, false));
476 }
477
478 /**Serialise: write in the best format for the wire.
479 * To get best aggregate compressed size on the wire,
480 * eg where the compressed stream contains many similar non-identical instances,
481 * we always write out (non-null) description in Name format (minus the outer tags)
482 * regardless of how it is actually held in memory.
483 * This also makes us immune to changes in the the internals of the other formats
484 * and allows use of a static dictionary with Compact7BitString for better in-memory compression
485 * (ie effectively cross-instance compression).
486 * <p>
487 * This allows a stream compressor to effectively remove the redundancy between
488 * instances of this class on the wire as well as internal redundancies.
489 * <p>
490 * We assume that there will almost never be entirely identical instances on one stream
491 * so we don't mind writing new copies each time where it does happen.
492 */
493 protected Object writeReplace()
494 // throws ObjectStreamException
495 {
496 // Don't write multiple EMPTY instances.
497 if(this.equals(EMPTY)) { return(EMPTY); }
498
499 // If the description is null then write this instance as-is.
500 if(null == description) { return(this); }
501
502 // Return a Name-based instance for better inter-instance stream compression,
503 // and for a more stable serialised representation.
504 // We make sure that the String description is de-duped if at all possible.
505 return(new ExhibitPropsLoadable(Name.create(getDescription()),
506 location,
507 accession,
508 true)); // Keep Name representation for the wire.
509 }
510
511 /**Validate fields/state.
512 * Called in the constructor and possibly after de-serialising.
513 * <p>
514 * Barf if something bad is found.
515 * (Maybe allow some extra info in debug version.)
516 */
517 public void validateObject()
518 throws InvalidObjectException
519 {
520 // Check that all components are sane and safe.
521 // Check that the description is in an allowed form and can be decompressed.
522 if(description != null)
523 {
524 if(description instanceof String)
525 {
526 final int len = ((String) description).length();
527 if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
528 { throw new InvalidObjectException("bad object: bad description String length"); }
529 }
530 else if(description instanceof Name)
531 {
532 final int len = ((Name) description).length();
533 if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
534 { throw new InvalidObjectException("bad object: bad description Name length"); }
535 }
536 else if(description instanceof Compact7BitString)
537 {
538 final String s = description.toString();
539 final int len = s.length();
540 if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
541 { throw new InvalidObjectException("bad object: bad description Compact7BitString length"); }
542 }
543 else if(description instanceof ROByteArray)
544 {
545 final int rawLen = ((ROByteArray) description).length();
546 if((rawLen < 1) || (rawLen > CoreConsts.DESCRIPTION_MAX_CHARS))
547 { throw new InvalidObjectException("bad object: bad description ROByteArray length"); }
548 try {
549 final int len = (ROByteArray.uncompressToString((ROByteArray) description)).length();
550 if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
551 { throw new InvalidObjectException("bad object: bad description ROByteArray uncompressed length"); }
552 }
553 catch(final Exception e)
554 { throw new InvalidObjectException("bad object: bad description ROByteArray: " + e.getMessage()); }
555 }
556 else if(description instanceof byte[])
557 {
558 final int rawLen = ((byte[]) description).length;
559 if((rawLen < 1) || (rawLen > CoreConsts.DESCRIPTION_MAX_CHARS))
560 { throw new InvalidObjectException("bad object: bad description byte[] length"); }
561 try {
562 final int len = _decompressASCII7Text((byte[]) description).length();
563 if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
564 { throw new InvalidObjectException("bad object: bad description byte[] uncompressed length"); }
565 }
566 catch(final Exception e)
567 { throw new InvalidObjectException("bad object: bad description byte[]: " + e.getMessage()); }
568 }
569 else
570 { throw new InvalidObjectException("bad object: bad description type"); }
571 }
572
573 // If location data is not null, it must be marked "specific".
574 if(location != null)
575 {
576 if(Location.NONE.equals(location))
577 { throw new InvalidObjectException("bad object: location must be null rather than NONE"); }
578 if(location.specific == false)
579 { throw new InvalidObjectException("bad object: location data must be \"specific\""); }
580 }
581
582 if(AccessionData.EMPTY.equals(accession))
583 { throw new InvalidObjectException("bad object: accession must be null rather than EMPTY"); }
584 }
585
586 /**Suffix for new-format exhibit-properties file, if extant. */
587 public static final String EXHIBIT_PROPS_FILENAME_SUFFIX = ".props";
588
589 /**Allow backwards compatibility if true, ie look for old-style files for info not found in new props file. */
590 public static final boolean READ_OLD_FILES = true;
591
592 /**Name of property for (default English text/HTML) description in new properties file. */
593 public static final String PNAME_DESCRIPTION = "description";
594
595 /**Prefix of property names for location information in new properties file. */
596 public static final String PNAME_LOCATION_PREFIX = "location";
597
598 /**Shared static dictionary for use with in-memory Compact7BitString metadata.
599 * We expose this to help generate tuned dictionary values;
600 * this is safe to do since this dictionary is immutable.
601 */
602 public static final Compact7BitString.StaticDictionary sDict = new Compact7BitString.StaticDictionary("EPL",
603 // Most-common and largest early tokens first in list for maximum savings.
604 Arrays.asList(new String[]{
605 "the", /* count=804, saving=1608, meanFirstPos=18 */
606 "Taken", /* count=285, saving=1140, meanFirstPos=4 */
607 "and", /* count=462, saving=924, meanFirstPos=30 */
608 "from", /* count=211, saving=633, meanFirstPos=27 */
609 "with", /* count=208, saving=624, meanFirstPos=26 */
610 ", ", /* count=605, saving=605, meanFirstPos=25 */
611 "of", /* count=541, saving=541, meanFirstPos=25 */
612 "The", /* count=260, saving=520, meanFirstPos=20 */
613 "target", /* count=102, saving=510, meanFirstPos=41 */
614 "Arrived", /* count=82, saving=492, meanFirstPos=3 */
615 "in", /* count=448, saving=448, meanFirstPos=27 */
616 "this", /* count=146, saving=438, meanFirstPos=32 */
617 "http", /* count=141, saving=423, meanFirstPos=29 */
618 "that", /* count=136, saving=408, meanFirstPos=42 */
619 "blockquote", /* count=45, saving=405, meanFirstPos=44 */
620 "called", /* count=78, saving=390, meanFirstPos=17 */
621 "href", /* count=130, saving=390, meanFirstPos=27 */
622 "This", /* count=125, saving=375, meanFirstPos=17 */
623 "to", /* count=375, saving=375, meanFirstPos=33 */
624 "for", /* count=168, saving=336, meanFirstPos=36 */
625 "_top", /* count=111, saving=333, meanFirstPos=43 */
626 "over", /* count=111, saving=333, meanFirstPos=14 */
627 "is", /* count=312, saving=312, meanFirstPos=28 */
628 "was", /* count=149, saving=298, meanFirstPos=33 */
629 "on", /* count=283, saving=283, meanFirstPos=27 */
630 "://", /* count=141, saving=282, meanFirstPos=30 */
631 "afternoon", /* count=33, saving=264, meanFirstPos=13 */
632 "distribution", /* count=24, saving=264, meanFirstPos=12 */
633 " (", /* count=252, saving=252, meanFirstPos=30 */
634 ". ", /* count=240, saving=240, meanFirstPos=34 */
635 "www", /* count=120, saving=240, meanFirstPos=32 */
636 "taken", /* count=57, saving=228, meanFirstPos=22 */
637 "London", /* count=45, saving=225, meanFirstPos=27 */
638 "are", /* count=112, saving=224, meanFirstPos=38 */
639 "picture", /* count=37, saving=222, meanFirstPos=29 */
640 "about", /* count=55, saving=220, meanFirstPos=33 */
641 "Originally", /* count=24, saving=216, meanFirstPos=0 */
642 "background", /* count=24, saving=216, meanFirstPos=26 */
643 "which", /* count=54, saving=216, meanFirstPos=45 */
644 "at", /* count=212, saving=212, meanFirstPos=28 */
645 "email", /* count=52, saving=208, meanFirstPos=16 */
646 "resolution", /* count=23, saving=207, meanFirstPos=20 */
647 "Scanned", /* count=34, saving=204, meanFirstPos=9 */
648 "2000", /* count=67, saving=201, meanFirstPos=15 */
649 "scanned", /* count=32, saving=192, meanFirstPos=12 */
650 "building", /* count=27, saving=189, meanFirstPos=25 */
651 "have", /* count=62, saving=186, meanFirstPos=50 */
652 "100dpi", /* count=35, saving=175, meanFirstPos=18 */
653 "by", /* count=175, saving=175, meanFirstPos=30 */
654 "people", /* count=35, saving=175, meanFirstPos=43 */
655 "...", /* count=85, saving=170, meanFirstPos=34 */
656 "colour", /* count=34, saving=170, meanFirstPos=27 */
657 "compression", /* count=17, saving=170, meanFirstPos=47 */
658 "England", /* count=28, saving=168, meanFirstPos=31 */
659 "local", /* count=42, saving=168, meanFirstPos=26 */
660 "\" ", /* count=167, saving=167, meanFirstPos=34 */
661 ") ", /* count=165, saving=165, meanFirstPos=35 */
662 "very", /* count=55, saving=165, meanFirstPos=30 */
663 ". ", /* count=82, saving=164, meanFirstPos=31 */
664 "some", /* count=54, saving=162, meanFirstPos=32 */
665 "Net", /* count=78, saving=156, meanFirstPos=10 */
666 "more", /* count=52, saving=156, meanFirstPos=34 */
667 "after", /* count=38, saving=152, meanFirstPos=32 */
668 "2004", /* MANUALLY ADDED: prefix count ~ 70 */
669 "</", /* count=147, saving=147, meanFirstPos=48 */
670 "Possibly", /* count=21, saving=147, meanFirstPos=1 */
671 "See", /* count=73, saving=146, meanFirstPos=16 */
672 "can", /* count=73, saving=146, meanFirstPos=39 */
673 " \"", /* count=144, saving=144, meanFirstPos=24 */
674 "2003", /* count=48, saving=144, meanFirstPos=17 */
675 "apparently", /* count=16, saving=144, meanFirstPos=26 */
676 "=\"", /* count=142, saving=142, meanFirstPos=28 */
677 "but", /* count=71, saving=142, meanFirstPos=45 */
678 "says", /* count=47, saving=141, meanFirstPos=16 */
679 "image", /* count=35, saving=140, meanFirstPos=37 */
680 "original", /* count=20, saving=140, meanFirstPos=40 */
681 "small", /* count=34, saving=136, meanFirstPos=27 */
682 "like", /* count=45, saving=135, meanFirstPos=28 */
683 "think", /* count=33, saving=132, meanFirstPos=36 */
684 "through", /* count=22, saving=132, meanFirstPos=42 */
685 "as", /* count=130, saving=130, meanFirstPos=37 */
686 "rather", /* count=26, saving=130, meanFirstPos=38 */
687 "html", /* count=43, saving=129, meanFirstPos=41 */
688 "GMT", /* count=64, saving=128, meanFirstPos=11 */
689 "; ", /* count=127, saving=127, meanFirstPos=31 */
690 "one", /* count=63, saving=126, meanFirstPos=37 */
691 "behind", /* count=25, saving=125, meanFirstPos=38 */
692 " <", /* count=123, saving=123, meanFirstPos=25 */
693 "images", /* count=24, saving=120, meanFirstPos=33 */
694 "foreground", /* count=13, saving=117, meanFirstPos=23 */
695 "camera", /* count=22, saving=110, meanFirstPos=30 */
696 "University", /* count=12, saving=108, meanFirstPos=13 */
697 "between", /* count=18, saving=108, meanFirstPos=31 */
698 "just", /* count=36, saving=108, meanFirstPos=27 */
699 "outside", /* count=18, saving=108, meanFirstPos=18 */
700 "subject", /* count=18, saving=108, meanFirstPos=17 */
701 "little", /* count=21, saving=105, meanFirstPos=29 */
702 "buildings", /* count=13, saving=104, meanFirstPos=25 */
703 "From", /* count=34, saving=102, meanFirstPos=17 */
704 "station", /* count=17, saving=102, meanFirstPos=18 */
705 "white", /* count=25, saving=100, meanFirstPos=24 */
706 "yellow", /* count=19, saving=95, meanFirstPos=22 */
707 "midnight", /* count=13, saving=91, meanFirstPos=9 */
708 "June", /* count=30, saving=90, meanFirstPos=18 */
709 "Routemaster", /* count=9, saving=90, meanFirstPos=8 */
710 "blue", /* count=30, saving=90, meanFirstPos=14 */
711 "green", /* count=22, saving=88, meanFirstPos=16 */
712 "thanks", /* count=17, saving=85, meanFirstPos=18 */
713 "Note", /* count=28, saving=84, meanFirstPos=10 */
714 "Probably", /* count=12, saving=84, meanFirstPos=13 */
715 "Canon", /* count=18, saving=72, meanFirstPos=5 */
716 "Hattusha", /* count=10, saving=70, meanFirstPos=0 */
717 "Bruno", /* count=17, saving=68, meanFirstPos=7 */
718 "Brighton", /* count=9, saving=63, meanFirstPos=9 */
719 "Courtesy", /* count=8, saving=56, meanFirstPos=0 */
720 "Looking", /* count=9, saving=54, meanFirstPos=2 */
721 "Cappadocia", /* count=5, saving=45, meanFirstPos=0 */
722 "Southbound", /* count=5, saving=45, meanFirstPos=0 */
723 "Guarameranga", /* count=4, saving=44, meanFirstPos=0 */
724 "View", /* count=14, saving=42, meanFirstPos=1 */
725 "Three", /* count=8, saving=32, meanFirstPos=0 */
726 }));
727
728 /**The description.
729 * This can be:
730 * <lu>
731 * <li>null, meaning no description,</li>
732 * <li>a non-empty String for the literal description</li>
733 * <li>a non-empty byte[] containing a maximally deflated
734 * copy of the string without zlib or gzip headers and trailers</li>
735 * <li>a ROByteArray created with compressFromString()</li>
736 * <li>a Name (the preferred on-the-wire form)</li>
737 * <li>a Compact7BitString (the preferred in-memory form).</li>
738 * <p>
739 * Support for the byte[] and ROByteArray formats
740 * exists mainly for backwards compatibility,
741 * ie deserialising old values.
742 * All bytes in the source string are of value
743 * 0--127 (ie ASCII 7-bit).
744 * <p>
745 * Marked volatile to allow safe lockless update by compact().
746 */
747 private volatile Object description;
748
749 /**Get the description; returns null if none, else non-empty String.
750 * This text may contain non-XML-safe and non-ASCII 7-bit characters.
751 */
752 public String getDescription()
753 {
754 // Capture local/fast snapshot.
755 final Object d = description;
756
757 if(d == null) { return(null); }
758
759 // If this is an ROByteArray or byte[] then attempt to uncompress it.
760 try
761 {
762 if(d instanceof ROByteArray)
763 { return(ROByteArray.uncompressToString((ROByteArray) d)); }
764 if(d instanceof byte[])
765 { return(_decompressASCII7Text((byte[]) d)); }
766 }
767 catch(final Exception e)
768 {
769 e.printStackTrace(); // Whinge...
770 return(null); // Return null in case of problem!
771 }
772
773 // Assume that all other representations can use toString().
774 return(d.toString());
775 }
776
777 /**Reports if there is a description; returns true if so.
778 * This is relatively inexpensive as it does not require
779 * inspecting or decompressing (etc) any message text.
780 */
781 public boolean hasDescription()
782 { return(description != null); }
783
784
785 /**Attempt to decompress some deflated bytes to an ASCII 7-bit String.
786 * (Actually, we won't mind if it happens to be 8-bit.)
787 */
788 private static String _decompressASCII7Text(final byte compressed[])
789 throws IOException
790 {
791 final byte raw7bitText[] = FileTools.decompressDeflatedData(compressed);
792 // Turn this into a String.
793 final StringBuilder sb = new StringBuilder(raw7bitText.length);
794 for(int i = 0; i < raw7bitText.length; ++i)
795 { sb.append((char) ((raw7bitText[i]) & 0xff)); }
796 return(sb.toString());
797 }
798
799 // /**Attempt to compress an ASCII 7-bit String to a maximally-deflated (no-header/no-trailer) byte[].
800 // * The string must be pure ASCII 7-bit, ie all characters 0--127.
801 // */
802 // private static byte[] _compressASCII7Text(String in)
803 // {
804 // final byte raw7bitText[] = new byte[in.length()];
805 // for(int i = raw7bitText.length; --i >= 0; )
806 // {
807 // final char c = in.charAt(i);
808 // assert (c & ~0x7f) == 0;
809 // raw7bitText[i] = (byte) c;
810 // }
811 // return(FileTools.compressDeflatableData(raw7bitText, 0, raw7bitText.length));
812 // }
813
814 /**Location information; null if none.
815 * Information here must always be marked "specific".
816 */
817 private final Location.Base location;
818
819 /**Return location information; null if none.
820 * Always marked as "specific".
821 */
822 public Location.Base getLocation()
823 {
824 return(location);
825 }
826
827 /**Accession data; null if none. */
828 private final AccessionData accession;
829
830 /**Get exhibit accession metadata; null if none available.
831 * This data includes such items as:
832 * <ul>
833 * <li>When the exhibit was loaded into to the Gallery.
834 * <li>Its checksums, timestamp, length, etc, when added to the Gallery.
835 * </ul>
836 */
837 public AccessionData getAccessionMetadata()
838 {
839 return(accession);
840 }
841
842 /**If true then defer compaction of metadata.
843 * If true then (any) compaction is <em>NOT</em> done
844 * during construction or deserialisation
845 * since it may place memory under more stress
846 * with old and new versions in memory simultaneously,
847 * but until some later point such as a call to compact().
848 * <p>
849 * Conversely, this could save a lot of time constructing/deserialising data
850 * before the first operations can be performed on it
851 * and where memory space is not the primary constraint.
852 */
853 private static final boolean DEFER_COMPACTION = true;
854
855 /**Compact the internal representation of this instance (and its sub-objects) if possible.
856 * This has no effect on the logical content of this instance in-memory or serialised,
857 * is guaranteed to be safe to run concurrently with other uses of this instance
858 * (and will take any locks as needed to work incrementally),
859 * and may do nothing but consume some CPU cycles.
860 * <p>
861 * This may be able to convert some state to a more memory-efficient representation
862 * after construction or deserialisation,
863 * and is suitable to call in a background thread.
864 * <p>
865 * We don't prevent multiple concurrent calls to this routine,
866 * since they are at worst wasteful of CPU but not unsafe.
867 */
868 public void compact()
869 {
870 if(!DEFER_COMPACTION) { return; /* Nothing to do; already must be compacted. */ }
871
872 final Object d = description; // Capture snapshot.
873
874 // If already in preferred compact form (or null) then return.
875 if(d == null) { return; /* Nothing to do. */ }
876 if(d instanceof Compact7BitString) { return; /* Nothing to do. */ }
877
878 // Replace metadata with intern()ed/compact form if possible.
879 // DHD20060815: About half the metadata instances are duplicates.
880 try { description = MemoryTools.intern(Compact7BitString.convertToCompact7BitString(getDescription(), sDict)); }
881 // Cope with non-7-bit-pure text by leaving as-is, though intern()ed.
882 catch(final IllegalArgumentException e) { description = MemoryTools.intern(d); }
883 }
884
885 /**Get name of this Compactable instance for tracking purposes, or null if none. */
886 public String getCompactableInstanceName() { return("EPL|"+System.identityHashCode(this)); }
887
888
889 /**Method that lists exhibit-specific files containing loadable properties of the given exhibit.
890 * This takes the same arguments as getLoadableProperties() and returns
891 * a Set of the String names of all files (relative to exhibitBaseDir)
892 * that contain loadable-properties for this exhibit.
893 * <p>
894 * This may name more files than are currently used by the system,
895 * eg including legacy files.
896 * <p>
897 * This never returns null, though may return an empty set.
898 * <p>
899 * When backing up exhibit data these files should be backed up
900 * with the relevant exhibit.
901 */
902 public static Set<String> listPropsFiles(final Name.ExhibitFull exhibitName,
903 final File exhibitBaseDir)
904 {
905 if(null == exhibitName) { throw new IllegalArgumentException(); }
906
907 // final File fullExhibitPath = new File(exhibitBaseDir, exhibitName);
908 // if(!fullExhibitPath.exists())
909 // { throw new FileNotFoundException("exhibit missing"); }
910
911 // Generate a Set of String candidate names,
912 // and eliminate those that don't actually exist.
913 final Set<String> result = new HashSet<String>(11);
914 // New properties file.
915 result.add(relPathToNewProperties(exhibitName));
916 // Old description file.
917 result.add(relPathToOldDescription(exhibitName));
918 // New accession file.
919 result.add(relPathToNewAccession(exhibitName));
920 // Old accession file.
921 result.add(relPathToOldAccession(exhibitName));
922 // Old location file.
923 result.add(relPathToOldLocation(exhibitName));
924
925 for(final Iterator<String> it = result.iterator(); it.hasNext(); )
926 {
927 final String f = it.next();
928 if(!(new File(exhibitBaseDir, f)).exists())
929 {
930 // We should remove this file as it doesn't exist.
931 it.remove();
932 }
933 }
934
935 return(result);
936 }
937 }
938