001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore;
030    
031    import java.io.BufferedInputStream;
032    import java.io.DataOutputStream;
033    import java.io.File;
034    import java.io.FileInputStream;
035    import java.io.IOException;
036    import java.io.InputStreamReader;
037    import java.io.InvalidObjectException;
038    import java.io.ObjectInputValidation;
039    import java.io.OutputStream;
040    import java.io.Reader;
041    import java.io.Serializable;
042    import java.util.Arrays;
043    import java.util.HashSet;
044    import java.util.Iterator;
045    import java.util.Properties;
046    import java.util.Set;
047    
048    import org.hd.d.pg2k.svrCore.location.Location;
049    
050    /**Immutable (and serialisable) store of all loadable auxiliary properties of a single exhibit.
051     * These are properties (such as descriptive comment) that must be fetched
052     * from the exhibit database and that cannot be computed.
053     * <p>
054     * This is designed to be efficient on the wire and in memory, since these
055     * details will be held for each and every exhibit.
056     * <p>
057     * Note that a factory method is used to generate instances of this class
058     * and where there is no loadable data for an exhibit (which will be rare)
059     * null is returned.
060     * <p>
061     * Equals is implemented so that duplicates (particularly of EMPTY) can
062     * easily be discarded.  hashCode() is implemented to support the semantics
063     * of equals() and as a hash over all the contents of the object.
064     * <p>
065     * By default this looks for data in a new-style properties file first
066     * (fileBase + '/' + exhibitName + ".props"),
067     * and then, iff READ_OLD_FILES == true, in the old-style individual files.
068     */
069    public final class ExhibitPropsLoadable implements Serializable, ObjectInputValidation,
070                                                       MemoryTools.Internable,
071                                                       MemoryTools.Compactable
072        {
073        /**Shared EMPTY instance (no auxiliary data on exhibit). */
074        public static final ExhibitPropsLoadable EMPTY = new ExhibitPropsLoadable(
075            null,    // No description.
076            null,    // No location.
077            null,    // No accession data.
078            false);
079    
080        /**Private constructor so creation is by factory method.
081         * Parameters should already have been checked before we are called.
082         * <p>
083         * We may intern() components that are likely to be duplicated
084         * and that may consume significant memory.
085         *
086         * @param _description no more than MAX_DESCRIPTION_CHARS or null or "" if none
087         * @param doNotCompact  if true then do not attempt to compact the description; store as passed in
088         */
089        private ExhibitPropsLoadable(final Object _description,
090                                     final Location.Base _location,
091                                     final AccessionData _accession,
092                                     final boolean doNotCompact)
093            {
094            // Store the accession data.
095            // Replace empty instances with null
096            // and intern() the rest to reduce (tenured) heap churn during updates.
097            accession = ((_accession == null) || (_accession.equals(AccessionData.EMPTY))) ?
098                        null : MemoryTools.intern(_accession);
099    
100            // If the description is null or empty then store a (canonical) null.
101            description = _description; // Allows use of getDescription().
102            if((_description == null) || "".equals(getDescription()))
103                { description = null; }
104            else
105                {
106                // If not forbidding compaction entirely
107                // (and there's not lots of memory free in order to eagerly make compact form)
108                // then keep whatever form we were given.
109                // Eager compaction may be able to use otherwise-idle cycles while deserialising for example.
110                if(doNotCompact || (_description instanceof Compact7BitString) ||
111                   (DEFER_COMPACTION && !MemoryTools.lotsFree()))
112                    { description = _description; }
113                // Else if not in preferred form then try to make it so.
114                else if(!(_description instanceof Compact7BitString))
115                    {
116                    Object d = _description;
117                    try { d = MemoryTools.intern(Compact7BitString.convertToCompact7BitString(getDescription(), sDict)); }
118                    catch(final Exception e) { }
119                    description = d;
120                    }
121                }
122    
123            // Store any location information supplied.
124            if(Location.NONE.equals(_location))
125                { location = null; } // More efficient representation for "NONE".
126            else
127                { location = MemoryTools.intern(_location); }
128    
129    
130            // Verify object state.
131            try { validateObject(); }
132            catch(final InvalidObjectException e)
133                { throw new IllegalArgumentException(e.getMessage()); }
134            }
135    
136        /**Make relative path to new-style properties file. */
137        private static String relPathToNewProperties(final Name.ExhibitFull exhibitName)
138            {
139            return(exhibitName + EXHIBIT_PROPS_FILENAME_SUFFIX);
140            }
141    
142        /**Make relative path to old-style en-GB HTML generic description file. */
143        private static String relPathToOldDescription(final Name.ExhibitFull exhibitName)
144            {
145            return(exhibitName + CoreConsts.DESCRIPTION_FILE_SUFFIX);
146            }
147    
148        /**Make relative path to new accession-date file; never null.
149         * This is an XML-format file.
150         * <p>
151         * Public so that other routines can find and read this file directly.
152         *
153         * @param exhibitName  non-null relative path to exhibit
154         */
155        public static String relPathToNewAccession(final Name.ExhibitFull exhibitName)
156            {
157    //        final String exhibitDir = (new File(exhibitName)).getParent();
158    //        final String exhibitFile = (new File(exhibitName)).getName();
159    //        return((new File(exhibitDir, ".accession." + exhibitFile + ".xml")).getPath());
160            if(null == exhibitName) { throw new IllegalArgumentException(); }
161            final Name.ExhibitShort f = exhibitName.getShortName();
162            final CharSequence d = exhibitName.subSequence(0, exhibitName.length() - f.length()); // d includes trailing slash.
163            assert(d.charAt(d.length()-1) == '/');
164            return(d + ".accession." + f + ".xml");
165            }
166    
167        /**Make relative path to old accession-date file; never null.
168         * Historically the format of this file is a single line of the format:
169         * <pre>
170         *     YYYY[/MM[/DD]]
171         * </pre>
172         * where YYYY is the year and MM is the month 01--12 and DD is the 01-31,
173         * all relative to UTC/GMT, and represent the approximate date that the
174         * exhibits where incorporated in the library.
175         * <p>
176         * Public so that other routines can find and read this file directly.
177         *
178         * @param exhibitName  non-null relative path to exhibit
179         */
180        public static String relPathToOldAccession(final Name.ExhibitFull exhibitName)
181            {
182    //        final String exhibitDir = (new File(exhibitName)).getParent();
183    //        final String exhibitFile = (new File(exhibitName)).getName();
184    //        return((new File(exhibitDir, ".accession." + exhibitFile + ".txt")).getPath());
185            if(null == exhibitName) { throw new IllegalArgumentException(); }
186            final Name.ExhibitShort f = exhibitName.getShortName();
187            final CharSequence d = exhibitName.subSequence(0, exhibitName.length() - f.length()); // d includes trailing slash.
188            assert(d.charAt(d.length()-1) == '/');
189            return(d + ".accession." + f + ".txt");
190            }
191    
192        /**Make relative path to old-style accession-date YYYY[/MM[/DD]] file. */
193        private static String relPathToOldLocation(final Name.ExhibitFull exhibitName)
194            {
195    //        final String exhibitDir = (new File(exhibitName)).getParent();
196    //        final String exhibitFile = (new File(exhibitName)).getName();
197    //        return((new File(exhibitDir, ".location." + exhibitFile + ".txt")).getPath());
198            if(null == exhibitName) { throw new IllegalArgumentException(); }
199            final Name.ExhibitShort f = exhibitName.getShortName();
200            final CharSequence d = exhibitName.subSequence(0, exhibitName.length() - f.length()); // d includes trailing slash.
201            assert(d.charAt(d.length()-1) == '/');
202            return(d + ".location." + f + ".txt");
203            }
204    
205    
206        /**Make a new instance with data loaded from the filesystem, given the exhibit name and some filesystem base directories.
207         * In case of severe unexpected problems with retrieving information
208         * an IOException is thrown, but minor problems are resolved
209         * quietly and a message is reported on System.err.
210         *
211         * @throws IOException  in case of severe, unrecoverable difficulty with this exhibit
212         * @return null, or non-null non-EMPTY result
213         */
214        public static ExhibitPropsLoadable getLoadableProperties(final Name.ExhibitFull exhibitName,
215                                                                 final File exhibitBaseDir)
216            throws IOException
217            {
218    //        final File fullExhibitPath = new File(exhibitBaseDir, exhibitName);
219    //        if(!fullExhibitPath.exists())
220    //            { throw new FileNotFoundException("exhibit missing"); }
221    
222            // Attempt to load the new-style properties file,
223            // if extant.
224            Properties p = null;
225            final File newPropsPath = new File(exhibitBaseDir, relPathToNewProperties(exhibitName));
226            if(newPropsPath.exists())
227                {
228                // Attempt to load from file and allow any errors to
229                // propagate to caller.
230                final FileInputStream fis = new FileInputStream(newPropsPath);
231                try { p = FileTools.loadProperties(new BufferedInputStream(fis)); }
232                finally { fis.close(); }
233                }
234    
235            Object _description = null;
236            // Try to retrieve description from new properties.
237            if(p != null) { _description = p.getProperty(PNAME_DESCRIPTION); }
238            // If not in new properties, and in right mode, look for old file.
239            if(READ_OLD_FILES && (_description == null))
240                {
241                final File oldDescriptionPath = new File(exhibitBaseDir, relPathToOldDescription(exhibitName));
242                // If old file exists, try to read it, propagating any IOException.
243                if(oldDescriptionPath.exists())
244                    {
245                    _description = FileTools.readTextFile(oldDescriptionPath);
246                    }
247                }
248            // Trim the description, and if zero length, set it back to null.
249            if(_description instanceof String)
250                {
251                if(_description != null) { _description = ((String) _description).trim(); }
252                if((_description != null) && (((String) _description).length() == 0)) { _description = null; }
253                // If it is over-length, complain on System.err and set it back to null.
254                if((_description != null) && (((String) _description).length() > CoreConsts.DESCRIPTION_MAX_CHARS))
255                    {
256                    System.err.println("ERROR: description too long (max "+CoreConsts.DESCRIPTION_MAX_CHARS+" chars) for exhibit " + exhibitName);
257                    _description = null;
258                    }
259                // If not deferring compaction and we have been passed a String
260                // attempt to compact and intern() it now.
261                if(!DEFER_COMPACTION)
262                    {
263                    try { _description = MemoryTools.intern(Compact7BitString.convertToCompact7BitString((String) _description, sDict)); }
264                    catch(final Exception e) { }
265                    }
266                }
267    
268            Location.Base _location = null;
269            // Try to retrieve description from new properties.
270            if((p != null) &&
271                // Check that at least minimal location field is present
272                // before attempting full parse.
273                (p.getProperty(PNAME_LOCATION_PREFIX + '.' + Location.Base.typeKey) != null))
274                {
275                _location = Location.Base.buildFromProperties(
276                                true, // Mark as "specific".
277                                PNAME_LOCATION_PREFIX,
278                                p);
279                }
280            // If not in new properties, and in right mode, look for old file.
281            if(READ_OLD_FILES && (_location == null))
282                {
283                final File oldLocationPath = new File(exhibitBaseDir, relPathToOldLocation(exhibitName));
284                // If old file exists, try to read it, propagating any IOException.
285                if(oldLocationPath.exists())
286                    { _location = Location.Base.buildFromFile(true, oldLocationPath); }
287                }
288    
289            // Try to load new accession data, if present.
290            // There is no point trying to intern() this to save memory
291            // since it should be different for every exhibit,
292            // though doing so may help reduce tenured heap churn during updates.
293            AccessionData ad = null;
294            final File newAccessionDataPath = new File(exhibitBaseDir, relPathToNewAccession(exhibitName));
295            if(newAccessionDataPath.exists())
296                {
297                try
298                    {
299                    // Get the XML accession data from (UTF-8 format) file...
300                    final Reader r = new InputStreamReader(
301                        new BufferedInputStream(
302                            new FileInputStream(newAccessionDataPath)),
303                                                    CoreConsts.FILE_ENCODING_UTF_8);
304                    try
305                        {
306                        final StringBuilder sb = new StringBuilder((int) newAccessionDataPath.length());
307                        int ic;
308                        while((ic = r.read()) != -1)
309                            { sb.append((char) ic); }
310                        ad = AccessionData.parseFromXML(sb.toString());
311                        }
312                    finally
313                        { r.close(); }
314                    }
315                catch(final Exception e)
316                    {
317                    e.printStackTrace();
318                    throw new IOException("unable to parse accession file: " + newAccessionDataPath + ": " + e.getMessage());
319                    }
320                }
321    
322    
323            // Try to create a new set of loadable properties.
324            final ExhibitPropsLoadable result = new ExhibitPropsLoadable(_description,
325                                                                         _location,
326                                                                         ad,
327                                                                         false /* Allow compaction. */ );
328    
329            // If it is equivalent to the EMPTY one then return null instead.
330            if(EMPTY.equals(result)) { return(null); }
331    
332            // Got some loadable properties!
333            // Make sure that we eliminate any easy duplicates.
334            return(MemoryTools.intern(result));
335            }
336    
337    
338        /**Based on entire content of object; if equivalent content then they are equal.
339         * Can be used to eliminate duplicates, eg after deserialising.
340         */
341        @Override
342        public boolean equals(final Object o)
343            {
344            if(this == o) { return(true); } // True if same object!
345            if(!(o instanceof ExhibitPropsLoadable)) { return(false); }
346            final ExhibitPropsLoadable other = (ExhibitPropsLoadable) o;
347    
348            // Compare location for equality.
349            // Handle null case specially.
350            // This should be fast to compare, so do it early.
351            if(location == null) { if(other.location != null) { return(false); } }
352            else if(!location.equals(other.location)) { return(false); }
353    
354            // This may be somewhat slow to compare.
355            if(accession == null) { if(other.accession != null) { return(false); } }
356            else if(!accession.equals(other.accession)) { return(false); }
357    
358            // Compare description by external form (likely to be very slow).
359            // Use quick internal instance compare first to see
360            // if we really need to decode the description at all...
361            if(description != other.description)
362                {
363                final String desc = getDescription();
364                final String descO = other.getDescription();
365                // Handle null case specially.
366                if(desc == null) { if(descO != null) { return(false); } }
367                else if(!desc.equals(descO)) { return(false); }
368                }
369    
370            assert(hashCode() == other.hashCode());
371    
372            return(true); // Yes, they are the same!
373            }
374    
375        /**Human-readable summary. */
376        @Override public String toString()
377            {
378            final StringBuilder sb = new StringBuilder();
379            sb.append("<ExhibitPropsLoadable");
380            final String desc = getDescription();
381            if(desc != null) { sb.append(":description=").append(description); }
382            if(location != null) { sb.append(":location=").append(location); }
383            if(accession != null) { sb.append(":accession=").append(accession); }
384            sb.append(">");
385            return(sb.toString());
386            }
387    
388        /**Private to hashCode() lock-free cache of computed hash value; initially 0 indicating not computed. */
389        private transient int _hash;
390    
391        /**Based on entire content of object; if equivalent content then they have same hash.
392         * Can be used for tracking if content has changed from one scan to another,
393         * ie is optimised for spotting changes rather than key distribution for use
394         * in a Hashtable, for example, though it should be serviceable for
395         * that purpose too as it is computed once and cached (transiently).
396         * <p>
397         * The cache relies on atomic assignment of int and is lock-free.
398         * <p>
399         * This may be slow to compute.
400         * <p>
401         * The hash value is never zero.
402         */
403        @Override
404        public int hashCode()
405            {
406            int result = _hash;
407            if(result == 0) // Result needs recomputing.
408                {
409                // Recompute and cache.
410                try {
411                    // Make a CRC-32 `digest' or checksum...
412                    // We use CRC32 rather than Adler32 because we are
413                    // likely to be operating on small amounts of data
414                    // or small changes where CRC32 may prove much superior.
415                    final java.util.zip.Checksum check = new java.util.zip.CRC32();
416                    final DataOutputStream dos =
417                        new DataOutputStream(
418                            (new OutputStream(){
419                                @Override
420                                public final void write(final int b)
421                                    { check.update(b); }
422                                @Override
423                                public final void write(final byte[] b, final int off, final int len)
424                                    { check.update(b, off, len); }
425                                })
426                            );
427    
428                    // Insert data/property values into the checksum...
429                    // We must always insert the same value for a field
430                    // while its external representation remains unchanged,
431                    // and insert a different value if the external value changes,
432                    // so as to make the hash sensitive to what a user of the
433                    // class would see.
434    
435                    // Hash the description as a UTF8 String in its external representation...
436                    final String desc = getDescription();
437                    if(desc == null) { dos.writeByte(255); } else { dos.writeUTF(desc); }
438    
439                    // We write out the hashCode() to avoid toString() formatting funnies.
440                    final Location.Base loc = getLocation();
441                    if(loc == null) { dos.writeByte(1); } else { dos.writeInt(loc.hashCode()); }
442    
443                    // Capture the accession data as part of the hash/checksum...
444                    final AccessionData accMD = getAccessionMetadata();
445                    if(accMD == null) { dos.writeByte(2); } else { dos.writeInt(accMD.hashCode()); }
446    
447                    dos.flush(); // Make sure that all the data is in the checksum.
448                    result = (int) check.getValue();
449                    if(result == 0) { result = 1; } // Ensure computed hash is non-zero.
450                    _hash = result; // Cache the computed hash.
451                    }
452                catch(final IOException e) { throw new Error("internal error"); } // Should not happen.
453                }
454            return(result); // Return the hash...
455            }
456    
457    
458    
459        /**My serial ID. */
460        private static final long serialVersionUID = 8659120481539311156L;
461    
462        /**Deserialise: use constructor for validation, defensive copying, etc.
463         * Also resolve all empty instances to a singleton as a minor optimisation,
464         * and immediately intern() new values so as to immediately discard
465         * duplicates (eg of exhibits already known) ASAP to minimise heap churn.
466         */
467        protected Object readResolve()
468            // throws ObjectStreamException
469            {
470            // Avoid duplicates of empty case.
471            if(equals(EMPTY)) { return(EMPTY); }
472    
473            // Construct new instance of object in normal defensive way.
474            // This may also allow compaction on the fly.
475            return(new ExhibitPropsLoadable(description, location, accession, false));
476            }
477    
478        /**Serialise: write in the best format for the wire.
479         * To get best aggregate compressed size on the wire,
480         * eg where the compressed stream contains many similar non-identical instances,
481         * we always write out (non-null) description in Name format (minus the outer tags)
482         * regardless of how it is actually held in memory.
483         * This also makes us immune to changes in the the internals of the other formats
484         * and allows use of a static dictionary with Compact7BitString for better in-memory compression
485         * (ie effectively cross-instance compression).
486         * <p>
487         * This allows a stream compressor to effectively remove the redundancy between
488         * instances of this class on the wire as well as internal redundancies.
489         * <p>
490         * We assume that there will almost never be entirely identical instances on one stream
491         * so we don't mind writing new copies each time where it does happen.
492         */
493        protected Object writeReplace()
494            // throws ObjectStreamException
495            {
496            // Don't write multiple EMPTY instances.
497            if(this.equals(EMPTY)) { return(EMPTY); }
498    
499            // If the description is null then write this instance as-is.
500            if(null == description) { return(this); }
501    
502            // Return a Name-based instance for better inter-instance stream compression,
503            // and for a more stable serialised representation.
504            // We make sure that the String description is de-duped if at all possible.
505            return(new ExhibitPropsLoadable(Name.create(getDescription()),
506                            location,
507                            accession,
508                            true)); // Keep Name representation for the wire.
509            }
510    
511        /**Validate fields/state.
512         * Called in the constructor and possibly after de-serialising.
513         * <p>
514         * Barf if something bad is found.
515         * (Maybe allow some extra info in debug version.)
516         */
517        public void validateObject()
518            throws InvalidObjectException
519            {
520            // Check that all components are sane and safe.
521            // Check that the description is in an allowed form and can be decompressed.
522            if(description != null)
523                {
524                if(description instanceof String)
525                    {
526                    final int len = ((String) description).length();
527                    if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
528                        { throw new InvalidObjectException("bad object: bad description String length"); }
529                    }
530                else if(description instanceof Name)
531                    {
532                    final int len = ((Name) description).length();
533                    if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
534                        { throw new InvalidObjectException("bad object: bad description Name length"); }
535                    }
536                else if(description instanceof Compact7BitString)
537                    {
538                    final String s = description.toString();
539                    final int len = s.length();
540                    if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
541                        { throw new InvalidObjectException("bad object: bad description Compact7BitString length"); }
542                    }
543                else if(description instanceof ROByteArray)
544                    {
545                    final int rawLen = ((ROByteArray) description).length();
546                    if((rawLen < 1) || (rawLen > CoreConsts.DESCRIPTION_MAX_CHARS))
547                        { throw new InvalidObjectException("bad object: bad description ROByteArray length"); }
548                    try {
549                        final int len = (ROByteArray.uncompressToString((ROByteArray) description)).length();
550                        if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
551                            { throw new InvalidObjectException("bad object: bad description ROByteArray uncompressed length"); }
552                        }
553                    catch(final Exception e)
554                        { throw new InvalidObjectException("bad object: bad description ROByteArray: " + e.getMessage()); }
555                    }
556                else if(description instanceof byte[])
557                    {
558                    final int rawLen = ((byte[]) description).length;
559                    if((rawLen < 1) || (rawLen > CoreConsts.DESCRIPTION_MAX_CHARS))
560                        { throw new InvalidObjectException("bad object: bad description byte[] length"); }
561                    try {
562                        final int len = _decompressASCII7Text((byte[]) description).length();
563                        if((len < 1) || (len > CoreConsts.DESCRIPTION_MAX_CHARS))
564                            { throw new InvalidObjectException("bad object: bad description byte[] uncompressed length"); }
565                        }
566                    catch(final Exception e)
567                        { throw new InvalidObjectException("bad object: bad description byte[]: " + e.getMessage()); }
568                    }
569                else
570                    { throw new InvalidObjectException("bad object: bad description type"); }
571                }
572    
573            // If location data is not null, it must be marked "specific".
574            if(location != null)
575                {
576                if(Location.NONE.equals(location))
577                    { throw new InvalidObjectException("bad object: location must be null rather than NONE"); }
578                if(location.specific == false)
579                    { throw new InvalidObjectException("bad object: location data must be \"specific\""); }
580                }
581    
582            if(AccessionData.EMPTY.equals(accession))
583                { throw new InvalidObjectException("bad object: accession must be null rather than EMPTY"); }
584            }
585    
586        /**Suffix for new-format exhibit-properties file, if extant. */
587        public static final String EXHIBIT_PROPS_FILENAME_SUFFIX = ".props";
588    
589        /**Allow backwards compatibility if true, ie look for old-style files for info not found in new props file. */
590        public static final boolean READ_OLD_FILES = true;
591    
592        /**Name of property for (default English text/HTML) description in new properties file. */
593        public static final String PNAME_DESCRIPTION = "description";
594    
595        /**Prefix of property names for location information in new properties file. */
596        public static final String PNAME_LOCATION_PREFIX = "location";
597    
598        /**Shared static dictionary for use with in-memory Compact7BitString metadata.
599         * We expose this to help generate tuned dictionary values;
600         * this is safe to do since this dictionary is immutable.
601         */
602        public static final Compact7BitString.StaticDictionary sDict = new Compact7BitString.StaticDictionary("EPL",
603            // Most-common and largest early tokens first in list for maximum savings.
604            Arrays.asList(new String[]{
605                "the",    /* count=804, saving=1608, meanFirstPos=18 */
606                "Taken",    /* count=285, saving=1140, meanFirstPos=4 */
607                "and",    /* count=462, saving=924, meanFirstPos=30 */
608                "from",    /* count=211, saving=633, meanFirstPos=27 */
609                "with",    /* count=208, saving=624, meanFirstPos=26 */
610                ", ",    /* count=605, saving=605, meanFirstPos=25 */
611                "of",    /* count=541, saving=541, meanFirstPos=25 */
612                "The",    /* count=260, saving=520, meanFirstPos=20 */
613                "target",    /* count=102, saving=510, meanFirstPos=41 */
614                "Arrived",    /* count=82, saving=492, meanFirstPos=3 */
615                "in",    /* count=448, saving=448, meanFirstPos=27 */
616                "this",    /* count=146, saving=438, meanFirstPos=32 */
617                "http",    /* count=141, saving=423, meanFirstPos=29 */
618                "that",    /* count=136, saving=408, meanFirstPos=42 */
619                "blockquote",    /* count=45, saving=405, meanFirstPos=44 */
620                "called",    /* count=78, saving=390, meanFirstPos=17 */
621                "href",    /* count=130, saving=390, meanFirstPos=27 */
622                "This",    /* count=125, saving=375, meanFirstPos=17 */
623                "to",    /* count=375, saving=375, meanFirstPos=33 */
624                "for",    /* count=168, saving=336, meanFirstPos=36 */
625                "_top",    /* count=111, saving=333, meanFirstPos=43 */
626                "over",    /* count=111, saving=333, meanFirstPos=14 */
627                "is",    /* count=312, saving=312, meanFirstPos=28 */
628                "was",    /* count=149, saving=298, meanFirstPos=33 */
629                "on",    /* count=283, saving=283, meanFirstPos=27 */
630                "://",    /* count=141, saving=282, meanFirstPos=30 */
631                "afternoon",    /* count=33, saving=264, meanFirstPos=13 */
632                "distribution",    /* count=24, saving=264, meanFirstPos=12 */
633                " (",    /* count=252, saving=252, meanFirstPos=30 */
634                ". ",    /* count=240, saving=240, meanFirstPos=34 */
635                "www",    /* count=120, saving=240, meanFirstPos=32 */
636                "taken",    /* count=57, saving=228, meanFirstPos=22 */
637                "London",    /* count=45, saving=225, meanFirstPos=27 */
638                "are",    /* count=112, saving=224, meanFirstPos=38 */
639                "picture",    /* count=37, saving=222, meanFirstPos=29 */
640                "about",    /* count=55, saving=220, meanFirstPos=33 */
641                "Originally",    /* count=24, saving=216, meanFirstPos=0 */
642                "background",    /* count=24, saving=216, meanFirstPos=26 */
643                "which",    /* count=54, saving=216, meanFirstPos=45 */
644                "at",    /* count=212, saving=212, meanFirstPos=28 */
645                "email",    /* count=52, saving=208, meanFirstPos=16 */
646                "resolution",    /* count=23, saving=207, meanFirstPos=20 */
647                "Scanned",    /* count=34, saving=204, meanFirstPos=9 */
648                "2000",    /* count=67, saving=201, meanFirstPos=15 */
649                "scanned",    /* count=32, saving=192, meanFirstPos=12 */
650                "building",    /* count=27, saving=189, meanFirstPos=25 */
651                "have",    /* count=62, saving=186, meanFirstPos=50 */
652                "100dpi",    /* count=35, saving=175, meanFirstPos=18 */
653                "by",    /* count=175, saving=175, meanFirstPos=30 */
654                "people",    /* count=35, saving=175, meanFirstPos=43 */
655                "...",    /* count=85, saving=170, meanFirstPos=34 */
656                "colour",    /* count=34, saving=170, meanFirstPos=27 */
657                "compression",    /* count=17, saving=170, meanFirstPos=47 */
658                "England",    /* count=28, saving=168, meanFirstPos=31 */
659                "local",    /* count=42, saving=168, meanFirstPos=26 */
660                "\" ",    /* count=167, saving=167, meanFirstPos=34 */
661                ") ",    /* count=165, saving=165, meanFirstPos=35 */
662                "very",    /* count=55, saving=165, meanFirstPos=30 */
663                ".  ",    /* count=82, saving=164, meanFirstPos=31 */
664                "some",    /* count=54, saving=162, meanFirstPos=32 */
665                "Net",    /* count=78, saving=156, meanFirstPos=10 */
666                "more",    /* count=52, saving=156, meanFirstPos=34 */
667                "after",    /* count=38, saving=152, meanFirstPos=32 */
668                "2004",    /* MANUALLY ADDED: prefix count ~ 70 */
669                "</",    /* count=147, saving=147, meanFirstPos=48 */
670                "Possibly",    /* count=21, saving=147, meanFirstPos=1 */
671                "See",    /* count=73, saving=146, meanFirstPos=16 */
672                "can",    /* count=73, saving=146, meanFirstPos=39 */
673                " \"",    /* count=144, saving=144, meanFirstPos=24 */
674                "2003",    /* count=48, saving=144, meanFirstPos=17 */
675                "apparently",    /* count=16, saving=144, meanFirstPos=26 */
676                "=\"",    /* count=142, saving=142, meanFirstPos=28 */
677                "but",    /* count=71, saving=142, meanFirstPos=45 */
678                "says",    /* count=47, saving=141, meanFirstPos=16 */
679                "image",    /* count=35, saving=140, meanFirstPos=37 */
680                "original",    /* count=20, saving=140, meanFirstPos=40 */
681                "small",    /* count=34, saving=136, meanFirstPos=27 */
682                "like",    /* count=45, saving=135, meanFirstPos=28 */
683                "think",    /* count=33, saving=132, meanFirstPos=36 */
684                "through",    /* count=22, saving=132, meanFirstPos=42 */
685                "as",    /* count=130, saving=130, meanFirstPos=37 */
686                "rather",    /* count=26, saving=130, meanFirstPos=38 */
687                "html",    /* count=43, saving=129, meanFirstPos=41 */
688                "GMT",    /* count=64, saving=128, meanFirstPos=11 */
689                "; ",    /* count=127, saving=127, meanFirstPos=31 */
690                "one",    /* count=63, saving=126, meanFirstPos=37 */
691                "behind",    /* count=25, saving=125, meanFirstPos=38 */
692                " <",    /* count=123, saving=123, meanFirstPos=25 */
693                "images",    /* count=24, saving=120, meanFirstPos=33 */
694                "foreground",    /* count=13, saving=117, meanFirstPos=23 */
695                "camera",    /* count=22, saving=110, meanFirstPos=30 */
696                "University",    /* count=12, saving=108, meanFirstPos=13 */
697                "between",    /* count=18, saving=108, meanFirstPos=31 */
698                "just",    /* count=36, saving=108, meanFirstPos=27 */
699                "outside",    /* count=18, saving=108, meanFirstPos=18 */
700                "subject",    /* count=18, saving=108, meanFirstPos=17 */
701                "little",    /* count=21, saving=105, meanFirstPos=29 */
702                "buildings",    /* count=13, saving=104, meanFirstPos=25 */
703                "From",    /* count=34, saving=102, meanFirstPos=17 */
704                "station",    /* count=17, saving=102, meanFirstPos=18 */
705                "white",    /* count=25, saving=100, meanFirstPos=24 */
706                "yellow",    /* count=19, saving=95, meanFirstPos=22 */
707                "midnight",    /* count=13, saving=91, meanFirstPos=9 */
708                "June",    /* count=30, saving=90, meanFirstPos=18 */
709                "Routemaster",    /* count=9, saving=90, meanFirstPos=8 */
710                "blue",    /* count=30, saving=90, meanFirstPos=14 */
711                "green",    /* count=22, saving=88, meanFirstPos=16 */
712                "thanks",    /* count=17, saving=85, meanFirstPos=18 */
713                "Note",    /* count=28, saving=84, meanFirstPos=10 */
714                "Probably",    /* count=12, saving=84, meanFirstPos=13 */
715                "Canon",    /* count=18, saving=72, meanFirstPos=5 */
716                "Hattusha",    /* count=10, saving=70, meanFirstPos=0 */
717                "Bruno",    /* count=17, saving=68, meanFirstPos=7 */
718                "Brighton",    /* count=9, saving=63, meanFirstPos=9 */
719                "Courtesy",    /* count=8, saving=56, meanFirstPos=0 */
720                "Looking",    /* count=9, saving=54, meanFirstPos=2 */
721                "Cappadocia",    /* count=5, saving=45, meanFirstPos=0 */
722                "Southbound",    /* count=5, saving=45, meanFirstPos=0 */
723                "Guarameranga",    /* count=4, saving=44, meanFirstPos=0 */
724                "View",    /* count=14, saving=42, meanFirstPos=1 */
725                "Three",    /* count=8, saving=32, meanFirstPos=0 */
726            }));
727    
728        /**The description.
729         * This can be:
730         * <lu>
731         * <li>null, meaning no description,</li>
732         * <li>a non-empty String for the literal description</li>
733         * <li>a non-empty byte[] containing a maximally deflated
734         *     copy of the string without zlib or gzip headers and trailers</li>
735         * <li>a ROByteArray created with compressFromString()</li>
736         * <li>a Name (the preferred on-the-wire form)</li>
737         * <li>a Compact7BitString (the preferred in-memory form).</li>
738         * <p>
739         * Support for the byte[] and ROByteArray formats
740         * exists mainly for backwards compatibility,
741         * ie deserialising old values.
742         * All bytes in the source string are of value
743         * 0--127 (ie ASCII 7-bit).
744         * <p>
745         * Marked volatile to allow safe lockless update by compact().
746         */
747        private volatile Object description;
748    
749        /**Get the description; returns null if none, else non-empty String.
750         * This text may contain non-XML-safe and non-ASCII 7-bit characters.
751         */
752        public String getDescription()
753            {
754            // Capture local/fast snapshot.
755            final Object d = description;
756    
757            if(d == null) { return(null); }
758    
759            // If this is an ROByteArray or byte[] then attempt to uncompress it.
760            try
761                {
762                if(d instanceof ROByteArray)
763                    { return(ROByteArray.uncompressToString((ROByteArray) d)); }
764                if(d instanceof byte[])
765                    { return(_decompressASCII7Text((byte[]) d)); }
766                }
767            catch(final Exception e)
768                {
769                e.printStackTrace(); // Whinge...
770                return(null); // Return null in case of problem!
771                }
772    
773            // Assume that all other representations can use toString().
774            return(d.toString());
775            }
776    
777        /**Reports if there is a description; returns true if so.
778         * This is relatively inexpensive as it does not require
779         * inspecting or decompressing (etc) any message text.
780         */
781        public boolean hasDescription()
782            { return(description != null); }
783    
784    
785        /**Attempt to decompress some deflated bytes to an ASCII 7-bit String.
786         * (Actually, we won't mind if it happens to be 8-bit.)
787         */
788        private static String _decompressASCII7Text(final byte compressed[])
789            throws IOException
790            {
791            final byte raw7bitText[] = FileTools.decompressDeflatedData(compressed);
792            // Turn this into a String.
793            final StringBuilder sb = new StringBuilder(raw7bitText.length);
794            for(int i = 0; i < raw7bitText.length; ++i)
795                { sb.append((char) ((raw7bitText[i]) & 0xff)); }
796            return(sb.toString());
797            }
798    
799    //    /**Attempt to compress an ASCII 7-bit String to a maximally-deflated (no-header/no-trailer) byte[].
800    //     * The string must be pure ASCII 7-bit, ie all characters 0--127.
801    //     */
802    //    private static byte[] _compressASCII7Text(String in)
803    //        {
804    //        final byte raw7bitText[] = new byte[in.length()];
805    //        for(int i = raw7bitText.length; --i >= 0; )
806    //            {
807    //            final char c = in.charAt(i);
808    //            assert (c & ~0x7f) == 0;
809    //            raw7bitText[i] = (byte) c;
810    //            }
811    //        return(FileTools.compressDeflatableData(raw7bitText, 0, raw7bitText.length));
812    //        }
813    
814        /**Location information; null if none.
815         * Information here must always be marked "specific".
816         */
817        private final Location.Base location;
818    
819        /**Return location information; null if none.
820         * Always marked as "specific".
821         */
822        public Location.Base getLocation()
823            {
824            return(location);
825            }
826    
827        /**Accession data; null if none. */
828        private final AccessionData accession;
829    
830        /**Get exhibit accession metadata; null if none available.
831         * This data includes such items as:
832         * <ul>
833         * <li>When the exhibit was loaded into to the Gallery.
834         * <li>Its checksums, timestamp, length, etc, when added to the Gallery.
835         * </ul>
836         */
837        public AccessionData getAccessionMetadata()
838            {
839            return(accession);
840            }
841    
842        /**If true then defer compaction of metadata.
843         * If true then (any) compaction is <em>NOT</em> done
844         * during construction or deserialisation
845         * since it may place memory under more stress
846         * with old and new versions in memory simultaneously,
847         * but until some later point such as a call to compact().
848         * <p>
849         * Conversely, this could save a lot of time constructing/deserialising data
850         * before the first operations can be performed on it
851         * and where memory space is not the primary constraint.
852         */
853        private static final boolean DEFER_COMPACTION = true;
854    
855        /**Compact the internal representation of this instance (and its sub-objects) if possible.
856         * This has no effect on the logical content of this instance in-memory or serialised,
857         * is guaranteed to be safe to run concurrently with other uses of this instance
858         * (and will take any locks as needed to work incrementally),
859         * and may do nothing but consume some CPU cycles.
860         * <p>
861         * This may be able to convert some state to a more memory-efficient representation
862         * after construction or deserialisation,
863         * and is suitable to call in a background thread.
864         * <p>
865         * We don't prevent multiple concurrent calls to this routine,
866         * since they are at worst wasteful of CPU but not unsafe.
867         */
868        public void compact()
869            {
870            if(!DEFER_COMPACTION) { return; /* Nothing to do; already must be compacted. */ }
871    
872            final Object d = description; // Capture snapshot.
873    
874            // If already in preferred compact form (or null) then return.
875            if(d == null) { return; /* Nothing to do. */ }
876            if(d instanceof Compact7BitString) { return; /* Nothing to do. */ }
877    
878            // Replace metadata with intern()ed/compact form if possible.
879            // DHD20060815: About half the metadata instances are duplicates.
880            try { description = MemoryTools.intern(Compact7BitString.convertToCompact7BitString(getDescription(), sDict)); }
881            // Cope with non-7-bit-pure text by leaving as-is, though intern()ed.
882            catch(final IllegalArgumentException e) { description = MemoryTools.intern(d); }
883            }
884    
885        /**Get name of this Compactable instance for tracking purposes, or null if none. */
886        public String getCompactableInstanceName() { return("EPL|"+System.identityHashCode(this)); }
887    
888    
889        /**Method that lists exhibit-specific files containing loadable properties of the given exhibit.
890         * This takes the same arguments as getLoadableProperties() and returns
891         * a Set of the String names of all files (relative to exhibitBaseDir)
892         * that contain loadable-properties for this exhibit.
893         * <p>
894         * This may name more files than are currently used by the system,
895         * eg including legacy files.
896         * <p>
897         * This never returns null, though may return an empty set.
898         * <p>
899         * When backing up exhibit data these files should be backed up
900         * with the relevant exhibit.
901         */
902        public static Set<String> listPropsFiles(final Name.ExhibitFull exhibitName,
903                                                 final File exhibitBaseDir)
904            {
905            if(null == exhibitName) { throw new IllegalArgumentException(); }
906    
907    //        final File fullExhibitPath = new File(exhibitBaseDir, exhibitName);
908    //        if(!fullExhibitPath.exists())
909    //            { throw new FileNotFoundException("exhibit missing"); }
910    
911            // Generate a Set of String candidate names,
912            // and eliminate those that don't actually exist.
913            final Set<String> result = new HashSet<String>(11);
914            // New properties file.
915            result.add(relPathToNewProperties(exhibitName));
916            // Old description file.
917            result.add(relPathToOldDescription(exhibitName));
918            // New accession file.
919            result.add(relPathToNewAccession(exhibitName));
920            // Old accession file.
921            result.add(relPathToOldAccession(exhibitName));
922            // Old location file.
923            result.add(relPathToOldLocation(exhibitName));
924    
925            for(final Iterator<String> it = result.iterator(); it.hasNext(); )
926                {
927                final String f = it.next();
928                if(!(new File(exhibitBaseDir, f)).exists())
929                    {
930                    // We should remove this file as it doesn't exist.
931                    it.remove();
932                    }
933                }
934    
935            return(result);
936            }
937        }
938