001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore.datasource;
030    
031    import java.io.DataInputStream;
032    import java.io.File;
033    import java.io.FileFilter;
034    import java.io.FileInputStream;
035    import java.io.FileNotFoundException;
036    import java.io.FileWriter;
037    import java.io.IOException;
038    import java.io.InterruptedIOException;
039    import java.io.InvalidObjectException;
040    import java.io.ObjectInputStream;
041    import java.io.ObjectInputValidation;
042    import java.io.ObjectOutputStream;
043    import java.io.PrintWriter;
044    import java.io.RandomAccessFile;
045    import java.io.Serializable;
046    import java.nio.ByteBuffer;
047    import java.security.MessageDigest;
048    import java.security.NoSuchAlgorithmException;
049    import java.util.ArrayList;
050    import java.util.Arrays;
051    import java.util.BitSet;
052    import java.util.Collections;
053    import java.util.Comparator;
054    import java.util.Date;
055    import java.util.HashMap;
056    import java.util.HashSet;
057    import java.util.Hashtable;
058    import java.util.Iterator;
059    import java.util.List;
060    import java.util.Map;
061    import java.util.Properties;
062    import java.util.Set;
063    import java.util.SortedSet;
064    import java.util.TreeSet;
065    import java.util.concurrent.ArrayBlockingQueue;
066    import java.util.concurrent.ThreadPoolExecutor;
067    import java.util.concurrent.TimeUnit;
068    import java.util.concurrent.locks.ReentrantLock;
069    import java.util.concurrent.locks.ReentrantReadWriteLock;
070    
071    import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
072    import org.hd.d.pg2k.svrCore.AccessionData;
073    import org.hd.d.pg2k.svrCore.AllExhibitImmutableData;
074    import org.hd.d.pg2k.svrCore.AllExhibitProperties;
075    import org.hd.d.pg2k.svrCore.CoreConsts;
076    import org.hd.d.pg2k.svrCore.ExhibitFile;
077    import org.hd.d.pg2k.svrCore.ExhibitName;
078    import org.hd.d.pg2k.svrCore.ExhibitPropsComputableMutable;
079    import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
080    import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
081    import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
082    import org.hd.d.pg2k.svrCore.FileTools;
083    import org.hd.d.pg2k.svrCore.GenUtils;
084    import org.hd.d.pg2k.svrCore.HostUtils;
085    import org.hd.d.pg2k.svrCore.ImageUtils;
086    import org.hd.d.pg2k.svrCore.MemoryTools;
087    import org.hd.d.pg2k.svrCore.MemoryTools.RecurrentEmergencyFreeHandle;
088    import org.hd.d.pg2k.svrCore.MemoryTools.SoftReferenceMap;
089    import org.hd.d.pg2k.svrCore.Name;
090    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
091    import org.hd.d.pg2k.svrCore.PGMasterNotInServiceException;
092    import org.hd.d.pg2k.svrCore.ROByteArray;
093    import org.hd.d.pg2k.svrCore.Rnd;
094    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
095    import org.hd.d.pg2k.svrCore.Stratum;
096    import org.hd.d.pg2k.svrCore.TextUtils;
097    import org.hd.d.pg2k.svrCore.ThreadUtils;
098    import org.hd.d.pg2k.svrCore.Tuple;
099    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
100    import org.hd.d.pg2k.svrCore.location.LoadBalancingUtils;
101    import org.hd.d.pg2k.svrCore.props.GenProps;
102    import org.hd.d.pg2k.svrCore.props.LocalProps;
103    import org.hd.d.pg2k.svrCore.props.SecurityProps;
104    import org.hd.d.pg2k.svrCore.stats.StatsLogger;
105    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
106    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
107    import org.hd.d.pg2k.svrCore.vars.PipelineVarMgr;
108    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
109    import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
110    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
111    
112    import ORG.hd.d.IsDebug;
113    
114    /**Exhibit pipeline cache stage.
115     * This performs transparent persistent cacheing of exhibit data and variables.
116     * <p>
117     * The presence of an instance of this stage upstream of a tunnel or
118     * other potentially slow/expensive/unreliable data source should,
119     * for normal data access (eg sequential download of exhibits)
120     * significantly reduce upstream bandwidth requirements
121     * and reduce downstream latency by answering requests from local cache.
122     * <p>
123     * This also attempts to shelter its downstream callers/users from I/O errors
124     * upstream, by fulfilling requests from cache,
125     * or, when synchronous calls upstream have to be made,
126     * transforming some requests and replies into async forms where possible.
127     * <p>
128     * When caching exhibit data this class only does so as a continuous prefix
129     * from offset zero; other (random) accesses may have to read-through the cache.
130     * <p>
131     * This cache is also able to precache data likely to be valuable,
132     * such as thumbnails and the initial portions of exhibits,
133     * though this will only be attempted if the cache appears to be in use.
134     * Bandwidth/resource consumption used by precacheing are regulated.
135     * This cache regards thumbnails and meta-data as precious,
136     * and tries not to let them go once collected and cached
137     * because reasonable application performance will often depend on
138     * fast access to these data.
139     * <p>
140     * This attempts to cache data well enough that, especially if aggressive
141     * (pre)cacheing is available and the cache area is large enough,
142     * almost no reference should be needed to the backend server in response to
143     * a data request on the pipeline except in response to the tail end
144     * of very long exhibits; all requests are answered from the local
145     * cache where possible.  Ideally, it should be possible for the back-end
146     * server to go down altogether and have the front end still provide a
147     * high degree of functionality.  The front-end and back-end are almost
148     * completely decoupled in this cache design.
149     * <p>
150     * This includes limited in-memory cacheing, in many cases using
151     * soft references to allow peaceful coexistence with other (potentially
152     * heavy) users of memory.
153     * <p>
154     * This class relies mainly on the asynchronous calls to poll() to fetch
155     * meta-data updates such as GenProps and AllExhibitImmutableData.  These
156     * happen under a private lock and do not block cache access much or at all.
157     * <p>
158     * The full lock ordering (where multiple locks need to be taken at once) is:
159     * <ol>
160     * <li> ExhibitDataSimpleCache.rwl
161     * <li> ExhibitDataSimpleCache.metaData
162     * </ol>
163     * <p>
164     * This cache may serialise access to raw exhibit data (and may serialise
165     * accesses to back-end resources too).  No two live instances of this
166     * class should refer to the same cache directory at once
167     * else madness and corruption will almost certainly break out.
168     * <p>
169     * On disc, the files are some prefix of the full exhibit, retrieved if possible
170     * in MAX_TRANSFER_CHUNK_SIZE chunks.  They are touched every time
171     * accessed or updated and the timestamps can therefore be used as
172     * the basis of an LRU cache.  We expect almost all access to be
173     * sequential, starting at the beginning.
174     * <p>
175     * We satisfy requests wholly within already-cached data immediately,
176     * and will extend (up to the limit) by up to one
177     * chunk each time by downloading from the server to satisfy
178     * requests just beyond the current end.  Requests starting well beyond
179     * the current end of cache are punted directly to the server,
180     * which is rather ugly and slow, but there we go.
181     * <p>
182     * This cache only considers general precacheing until the low-water
183     * mark is reached, and by default only deletes existing entries if it has to
184     * in order to satisfy an incoming cacheable request.
185     * This means that stale entries for deleted/renamed exhibits
186     * may persist for a while, but this is mainly harmless.
187     * <p>
188     * This maintains a bidirectional cache of variable values and updates,
189     * and also some running parameters of the cache may be read as variables.
190     * <p>
191     * This cache supports a limited amount of peer-to-peer (P2P) data transfers
192     * to reduce load on the master.  The general policy is that any
193     * synchronous (and thus presumably time-sensitive) data request from an end user
194     * that cannot be satisfied from local cache is satisfied upstream from the master.
195     * Asynchronous data fetches, such as read-ahead and precache activity,
196     * can be fetched P2P.  Also if the master fails or is unavailable
197     * then it may be acceptable to use P2P if loops/cycles can be avoided.
198     * <p>
199     * TODO: re-analyse/reduce locking and possibly avoid locks on the metaData object.
200     */
201    public final class ExhibitDataSimpleCache implements SimpleExhibitPipelineIF
202        {
203        /**If true then trace/log all thumbnail activity; defaults to false/off.
204         * Usually only true while debugging/tuning.
205         * <p>
206         * Can be activated from the command-line.
207         */
208        private static final boolean TRACE_THUMBNAIL_ACTIVITY_ALL;
209    
210        /**If true then trace/log interesting/unusual thumbnail activity; defaults to false/off.
211         * Usually only true while debugging/tuning.
212         * <p>
213         * Generally shows rare events such as generation of thumbnails
214         * and request to save NO_THUMBNAILS values
215         * and other non-routine activity.
216         * <p>
217         * Forced to true if TRACE_THUMBNAIL_ACTIVITY_ALL is true.
218         * <p>
219         * Can be activated from the command-line.
220         */
221        private static final boolean TRACE_THUMBNAIL_ACTIVITY;
222    
223        static
224            {
225            boolean trace = false;
226            boolean traceAll = false;
227            try
228                {
229                traceAll = Boolean.getBoolean("org.hd.d.pg2k.svrCore.datasource.ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY_ALL");
230                trace = traceAll || Boolean.getBoolean("org.hd.d.pg2k.svrCore.datasource.ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY");
231                }
232            catch(final Exception e) { e.printStackTrace(); }
233            if(traceAll) { System.out.println("INFO: ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY_ALL enabled"); }
234            if(trace) { System.out.println("INFO: ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY enabled"); }
235            TRACE_THUMBNAIL_ACTIVITY = trace;
236            TRACE_THUMBNAIL_ACTIVITY_ALL = traceAll;
237            }
238    
239        /**If true then trace/log P2P activity; defaults to false/off.
240         * Usually only true while debugging/tuning.
241         * <p>
242         * Can be activated from the command-line.
243         */
244        private static final boolean TRACE_P2P_ACTIVITY;
245    
246        static
247            {
248            boolean trace = false;
249            try { trace = Boolean.getBoolean("org.hd.d.pg2k.svrCore.datasource.ExhibitDataSimpleCache.TRACE_P2P_ACTIVITY"); }
250            catch(final Exception e) { e.printStackTrace(); }
251            if(trace) { System.out.println("INFO: ExhibitDataSimpleCache.TRACE_P2P_ACTIVITY enabled"); }
252            TRACE_P2P_ACTIVITY = trace;
253            }
254    
255    
256    
257        /**If true then we may purge cached data for exhibits that appear to have been deleted (or renamed).
258         * These are exhibits that have been "orphaned" in the cache,
259         * ie they are not accessible because they do not logically exist
260         * (though they do potentially serve as a backup in case of disaster).
261         * <p>
262         * We will generally only do this if:
263         * <ul>
264         * <li>the exhibit data has not been used for a long time, and
265         * <li>if cache size is above the low-water mark that would prevent us
266         *     from doing useful precacheing.
267         * </ul>
268         * <p>
269         * We do not rush to delete exhibits' data in case a transient problem
270         * has made an exhibit disappear temporarily.
271         * (Data for deleted exhibits will in any case eventually be deleted LRU
272         * (Least-Recently-Used) if the cache becomes full.)
273         */
274        private static final boolean ORPHANED_EXHIBIT_EXPIRY_ALLOWED = true;
275    
276        /**The minimum time before we will preemptively purge orphaned cache entries (ms); strictly positive.
277         * We don't expect to delete or rename exhibits very often,
278         * and the only harm in NOT purging them may be to prevent precacheing of new exhibits,
279         * ie a minor performance issue rather than a correctness issue.
280         * <p>
281         * Add a random component so that all clients do not purge orphans at once!
282         * <p>
283         * A value of the order of a few days to a few months is probably reasonable.
284         */
285        private static final long ORPHANED_EXHIBIT_MIN_UNUSED_TIME_MS = 14 * 24 * 3600 * 1000L + // A fortnight.
286            Rnd.fastRnd.nextInt(4 * 24 * 3600 * 1000);
287    
288        /**Base dir within cache dir for all our exhibit data. */
289        private static final String CACHE_BASE_DIR = "_exhibitsCache";
290    
291        /**Name of file in which to persist immutable exhibit data. */
292        private static final String CACHE_EXPROPS_FILENAME = CACHE_BASE_DIR + "/_ExProps.dat";
293    
294        /**Name of file in which to persist cache meta data. */
295        private static final String CACHE_METADATA_FILENAME = CACHE_BASE_DIR + "/_metadata.dat";
296    
297        /**If true, store exprops data (and cache metadata) GZIPed to possibly save space and I/O time. */
298        private static final boolean STORE_EXPROPS_GZIPED = true;
299    
300        /**Base dir within cache dir for all our raw exhibit content data. */
301        private static final String CACHE_EXDATA_DIR = CACHE_BASE_DIR + "/exd";
302    
303        /**Base dir within cache dir for all our event history data. */
304        private static final String EVENT_HISTORY_DIR = CACHE_BASE_DIR + "/history";
305    
306        /**The prefix for all aux files associated with an exhibit file. */
307        private static final String CACHE_EXAUX_PREFIX = ".aux.";
308    
309        /**The keyword for the file containing the (decimal) exhibit timestamp. */
310        private static final String CACHE_EXAUX_TIMESTAMP_KW = "ts";
311    
312        /**The keyword for the file containing the serialised thumbnails object. */
313        private static final String CACHE_EXAUX_TN_KW = "tn";
314    
315        /**Maximum (normal) exhibit data transfer chunk size (bytes); strictly positive.
316         * Maximum chunk transferred in one call (bytes),
317         * to avoid creating huge gaps in other activity by
318         * jamming up transactions and/or hogging all I/O bandwidth.
319         * <p>
320         * Should probably be of the order of a few tens of kBytes
321         * to allow efficient transfers on the wire, and a power of
322         * two to be more likely to interact efficiently with other
323         * caches (and network protocols).
324         * <p>
325         * If we use (a small multiple of) the bulk data transfer chunk size
326         * this will be reasonably efficient in terms of disc/network traffic,
327         * and if we can also keep it aligned to whole chunk boundaries
328         * then we may get maximally efficient accesses into disc (etc) data.
329         */
330        private static final int MAX_TRANSFER_CHUNK_SIZE = Math.max(1<<16,
331            2 * CoreConsts.BULK_DATA_TRANSFER_SIZE);
332    
333        /**Maximum extended exhibit data transfer chunk size (bytes); strictly positive.
334         * When asked for data but we have a small gap in our cache before the request start
335         * normally we would have to pass the result upstream and not cache any.
336         * This is potentially very wasteful in P2P sharing of new exhibits.
337         * <p>
338         * So in this case we may allow the upstream request window to be moved back
339         * to patch up the hole from the end of our cached data to the start of the request,
340         * thus allowing us to capture the result.
341         * <p>
342         * This value should be a small multiple of the MAX_TRANSFER_CHUNK_SIZE.
343         */
344        private static final int MAX_EXTD_TRANSFER_CHUNK_SIZE = MAX_TRANSFER_CHUNK_SIZE << 2;
345    
346        /**The maximum number of bytes we will force a transfer of to force an immediate thumbnail generation.
347         * This might as well be at least one block,
348         * and might as well be a little bigger than we anticipate the
349         * resulting thumbnail size to be (limited to),
350         * but no larger than the maximum single transfer that we can make.
351         */
352        private static final int MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL =
353            Math.min(MAX_USER_READ_SIZE,
354                Math.max(MAX_TRANSFER_CHUNK_SIZE,
355                         ExhibitThumbnails.MAX_BYTES_EST));
356    
357        /**Minimum cache size to allow if GenProps is not set (bytes).
358         * This prevents thrashing on an empty cache and should be enough
359         * to store a few thumbnails and a few reasonable image chunks.
360         */
361        private static final int FALLBACK_MIN_CACHE_SIZE =
362            Math.max(1234567, 8 * MAX_TRANSFER_CHUNK_SIZE) +
363            (50 * ExhibitThumbnails.MAX_BYTES_EST);
364    
365        /**If true then accessing a thumbnail marks its exhibit as accessed.
366         * This means that for the purposes of avoiding eviction from
367         * cache, accessing a thumbnail is taken as being as significant
368         * as downloading (part of) the exhibit itself, if true.
369         * <p>
370         * If false then only actually reading part of the exhibit itself
371         * helps keep the exhibit ``fresh'' in the cache.
372         */
373        private static final boolean THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP = false;
374    
375        /**Approximate minimum interval between rechecks of on-disc cache.
376         * When a check is done, the in-memory record of disc cache status
377         * is reloaded from disc, any debris is removed, etc.
378         * <p>
379         * A period of the order of at least a day is probably about right;
380         * not being exactly a multiple helps to ensure that we do not hit the same time
381         * every day, which might otherwise collide with other regular activity.
382         * Note that this recheck may take at least several minutes,
383         * so we don't want to do it too often!
384         * <p>
385         * The chosen interval is less than 2D (including a random component).
386         * We use less than a day-multiple to tend to be waiting for energy
387         * to become available at a similar time each day, eg from solar PV,
388         * when the system is in energy-conserving mode.
389         * This seems frequent enough in practice.
390         */
391        private static final int DISC_RECHECK_INTERVAL_MS =
392            (36 * 3600 * 1000) + Rnd.fastRnd.nextInt(11 * 3600 * 1000);
393    
394        /**Approximate minimum interval between saves of the metadata; strictly positive.
395         * Since access to the exhibit/thumbnail data causes this to be updated
396         * (along with more significant changes to the cache),
397         * and saving can take significant time, we do not
398         * want to save this immediately we encounter a change.
399         * <p>
400         * We can postpone a save for a while at the risk that if the
401         * system crashes or shuts down during that time and there
402         * was a structural change to the cache, we might have to abandon
403         * the old metadata and start again, which could be slow and
404         * a bit messy (losing some useful though not vital information).
405         * <p>
406         * Taking our cue from the old UNIX sync interval of 30s,
407         * a value in the range 30s to a few minutes is probably reasonable.
408         * Larger values of several minutes help reduce disc (write) activity
409         * which may be important for (say) solid-state storage such as Flash.
410         */
411        private static final int METADATA_MIN_SAVE_INTERVAL_MS =
412            (5 * 60 * 1000) + Rnd.fastRnd.nextInt(60 * 1000);
413    
414    
415        /**Our local logger; never null. */
416        private final SimpleLoggerIF logger;
417    
418        /**The stats set to which we log general cache behaviour.
419         * The unique codes are the constants SCGNAME_XXX.
420         */
421        private final StatsLogger.StatsConfig statsIDSCGEN;
422    
423        /**General stats event name: an exhibit was evicted from the cache in LRU order. */
424        public static final String SCGNAME_MDSAVE = "metaDataSave";
425    
426        /**General stats event name: an exhibit was evicted from the cache in LRU order. */
427        public static final String SCGNAME_CACHEEVICTLRU = "exhibitEvictedFromCacheLRU";
428    
429        /**General stats event name: an exhibit was removed from the cache. */
430        public static final String SCGNAME_CACHEREM = "exhibitRemovedFromCache";
431    
432        /**General stats event name: an exhibit was added to the cache. */
433        public static final String SCGNAME_CACHEADD = "exhibitAddedToCache";
434    
435        /**General stats event name: a corrupt exhibit was removed from the cache. */
436        public static final String SCGNAME_CACHEREM_CORRUPT = "exhibitRemovedFromCacheCORRUPT";
437    
438        /**General stats event name: an exhibit in the cache was fully validated against checksums, etc. */
439        public static final String SCGNAME_CACHE_VALIDATION = "exhibitValidatedInCache";
440    
441        /**General stats event name: an exhibit in the cache was partially validated against checksums, etc. */
442        public static final String SCGNAME_CACHE_VALIDATION_PART = "exhibitValidatedInCachePartial";
443    
444        /**General stats event name: cache raw data read miss.
445         * We had to go upstream for at least part of the data.
446         * <p>
447         * (It is possible to have a hit and a miss on the same read
448         * if part is satisfied from cache and part not.)
449         */
450        public static final String SCGNAME_CACHERAWDATAMISS = "exhibitCacheRawDataMiss";
451    
452        /**General stats event name: cache raw data read hit.
453         * We satisfied at least part of the read from cache.
454         * <p>
455         * (It is possible to have a hit and a miss on the same read
456         * if part is satisfied from cache and part not.)
457         */
458        public static final String SCGNAME_CACHERAWDATAHIT = "exhibitCacheRawDataHit";
459    
460        /**General stats event name: on-disc cache hit for thumbnail. */
461        public static final String SCGNAME_CACHETNHIT = "exhibitCacheThumbnailHit";
462    
463        /**General stats event name: in-memory cache hit for thumbnail. */
464        public static final String SCGNAME_CACHETNMEMHIT = "exhibitCacheThumbnailMemoryHit";
465    
466        /**General stats event name: in-memory cache hit for thumbnail. */
467        public static final String SCGNAME_CACHETNMISS = "exhibitCacheThumbnailMiss";
468    
469        /**General stats event name: fetched a data block from a peer. */
470        public static final String SCGNAME_DATAFETCHFROMPEER_PREFIX = "exhibitPeerDataFetch-";
471    
472        /**General stats event name: restarted scanning all exhibits for precaching. */
473        public static final String SCGNAME_PRECACHERESTART = "exhibitPrecacheRestart";
474    
475        /**General stats event name: exhibits examined for precaching. */
476        public static final String SCGNAME_PRECACHEEXAMINED = "exhibitPrecacheExhibitsExamined";
477    
478        /**General stats event name: exhibit data block precached. */
479        public static final String SCGNAME_PRECACHEEXDATABLOCK = "exhibitPrecacheExhibitBlock";
480    
481        /**General stats event name prefix: exhibit data block precache source (if not from master/upstream). */
482        public static final String SCGPREF_PRECACHEEXDATABLOCKSRC = "exhibitPrecacheExhibitBlockSrc=";
483    
484        /**General stats event name prefix: exhibit data block precache source for error (if not from master/upstream). */
485        public static final String SCGPREF_PRECACHEEXDATABLOCKSRCERR = "exhibitPrecacheExhibitBlockSrcErr=";
486    
487        /**General stats event name prefix: exhibit data block precache (succesful) fetch time (log2 ms). */
488        public static final String SCGPREF_PRECACHEEXDATABLOCKFETCHTIME = "exhibitPrecacheExhibitBlockFetchTime=";
489    
490        /**General stats event name: errors encountered during precaching. */
491        public static final String SCGNAME_PRECACHEERROR = "exhibitPrecacheErrors";
492    
493        /**General stats event name: incoming request for exhibit data. */
494        public static final String SCGNAME_EXDATAREQIN = "exhibitRawDataRequestIn";
495    
496        /**General stats event name: incoming request for exhibit data with "dontCache" flag set. */
497        public static final String SCGNAME_EXDATAREQINDC = "exhibitRawDataRequestIn_dontCache";
498    
499        /**General stats event name: incoming request for exhibit thumbnails. */
500        public static final String SCGNAME_EXTHUREQIN = "exhibitThumbnailRequestIn";
501    
502        /**General stats event name: incoming request for exhibit thumbnails with "dontCreate" flag set. */
503        public static final String SCGNAME_EXTHUREQINDC = "exhibitThumbnailRequestIn_dontCreate";
504    
505        /**General stats event name: created thumbnails locally from cached data. */
506        public static final String SCGNAME_EXTHUCREATED = "exhibitThumbnailCreated";
507    
508        /**If true, assume that newly-loaded meta-data at is OK at start-up until proven otherwise. */
509        private static final boolean ASSUME_LOADED_METADATA_OK = true;
510    
511        /**If true, synchronously force a save of meta-data each time we add a new entry at least.
512         * May be slow (O(n^2) for n exhibits),
513         * especially where the cache is not large enough to hold all exhibits
514         * so items are continually coming and going,
515         * but potentially makes the system more robust against loss of data.
516         */
517        private static final boolean FORCE_IMMEDIATE_SAVE_ON_EXPANDED_METADATA = false;
518    
519    
520        /**Key in generic props of P2P-profiling flag. */
521        private static final String KEY_debugFlag_P2P_BLOCKXFER = CoreConsts.GEN_PREFIX_debugFlag + "P2P.profile.blockXfer";
522    
523    
524        /**Maximum number of "best" exhibits to get enhanced precaching; non-negative. */
525        private static final int MAX_BEST_EX_PRECACHED = 301;
526    
527        /**Contains the full exhibit names of the "best" few exhibits for enhanced precacheing; never null.
528         * Maintained/updated by _doPrecache().
529         * <p>
530         * This is a snapshot of what the "best" exhibits are estimated to be
531         * as each precacheing round starts,
532         * often based on "quick approximation" data.
533         * <p>
534         * The implementation is optimised for fast lookup with "contains()".
535         * <p>
536         * Thread-safe.
537         */
538        private final Set<Name.ExhibitFull> _bestExhibits =
539            Collections.synchronizedSet(new HashSet<Name.ExhibitFull>(2 * MAX_BEST_EX_PRECACHED));
540    
541        /**Cache meta-data class.
542         * Holds information about what exhibit data is cached on disc.
543         * <p>
544         * Is serialisable to be able to persist to disc and
545         * allow for a fast start-up.
546         * <p>
547         * Can be marked read-only to hold the cache in its current state.
548         * <p>
549         * This object is completely thread-safe and holds its instance
550         * lock to synchronise activity.
551         * <p>
552         * The names of all members affecting the state of cached exhibit data
553         * and meta-data start with `exhibit'.
554         * <p>
555         * All activity that actually affects disc operations must be
556         * passed the rwl and it will be held while disc
557         * I/O takes place, and most of these operations will throw
558         * and IOException.
559         * (Note that if the ExhibitDataSimpleCache instance lock
560         * must be held as well then it and the rwl must be
561         * grabbed in the appropriate order before one of these
562         * routines is called.)
563         */
564        private final static class MetaData implements Serializable,
565                                                       ObjectInputValidation
566            {
567            /**Build an empty, read-only set of meta-data. */
568            MetaData()
569                {
570                // Update derived data.
571                _recomputeDerivedValues();
572    
573                // Verify object state.
574                try { validateObject(); }
575                catch(final InvalidObjectException e)
576                    { throw new IllegalArgumentException(e.getMessage()); }
577                }
578    
579            /**Build a new meta-data set by examining the given cache directory.
580             * This is understood not to be perfect, because, for example,
581             * the last-accessed time may not be available on disc and
582             * we may have to approximate with an available last-modified-time;
583             * generally this is used when:
584             * <ol>
585             * <li>no persisted MetaData value is available, and we have
586             *     to do the best we can, or,
587             * <li>we want to load a copy of what is on disc to check that
588             *     what we have in memory is not badly wrong.
589             * </ol>
590             * <p>
591             * No locks (ie especially the rwl or cache's instance lock)
592             * should be held while this is running.
593             * <p>
594             * This does not modify the disc.
595             * <p>
596             * The constructed object is marked as needing to be saved.
597             * <p>
598             * This relies in part on being able to find the data files for
599             * the cached (prefixes of) exhibits by their valid full exhibit names.
600             * <p>
601             * We do also explicitly attempt to recover any plausible entries
602             * corresponding to current AEP entries.
603             * <p>
604             * Thus we may miss entries which have no current data file,
605             * eg because we had zero bytes cached for that file,
606             * and may have to recover such data later as we run across it.
607             * <p>
608             * We do also explicitly attempt to recover any plausible entries
609             * corresponding to current AEP entries.
610             *
611             * @param aepCurrent  reasonably current AEP as a hint for entries to look for;
612             *     never null
613             * @param cacheDir  the cache top-level directory; never null
614             * @param logger  to log to; never null
615             *
616             * @throws java.io.IOException  in case of major problems in reconstructing
617             *     the cache state (minor problems will be ignored)
618             */
619            MetaData(final AllExhibitProperties aepCurrent,
620                     final File cacheDir,
621                     final SimpleLoggerIF logger)
622                throws IOException
623                {
624                assert(aepCurrent != null);
625                assert(cacheDir != null);
626                assert(logger != null);
627    
628                // Compute the base dir of the exhibit data cache.
629                final File dataBaseDir = new File(cacheDir, CACHE_EXDATA_DIR);
630    
631                if(!dataBaseDir.isDirectory() ||
632                   !dataBaseDir.canRead() ||
633                   !dataBaseDir.canWrite())
634                    { throw new IOException("invalid or unusable cache directory ``"+dataBaseDir+"''"); }
635    
636                // Get the set of cached files,
637                // but not checking validity (eg magic numbers)
638                // because:
639                //  a) there may not have cached enough for the item to be strictly valid,
640                //  b) this should be efficient/quick.
641                final Set<Name.ExhibitFull> cachedFileNames = ExhibitFile.getFilesystemBasedExhibitNames(
642                    dataBaseDir, false);
643    
644                // We also throw in all names from the supplied AEP as candidates.
645                cachedFileNames.addAll(aepCurrent.aeid.getAllExhibitNamesSorted());
646    
647                // Now load all cached file stats into in-memory records.
648                final Iterator<Name.ExhibitFull> it = cachedFileNames.iterator();
649                while(it.hasNext())
650                    {
651                    final Name.ExhibitFull name = it.next();
652                    try {
653                        final CachedFile cf = CachedFile.recoverExtantCachedFileDetails(cacheDir, name);
654    
655                        // Verify that the candidate has something worth recovering,
656                        // (or at least inspecting and discarding to free up space)
657                        // ie exhibit data and/or thumbnails.
658                        // TODO: this may include other metadata (eg FEC) in future.
659                        if((cf.cachedLength > 0) || cf.hasThumbnails() ||
660                            (cf.lastAccessed > 0))
661                            { cachedExhibits.put(name, cf); }
662                        }
663                    catch(final Exception e)
664                        {
665                        // Since we seemed to encounter a problem with this putative
666                        // item, we ignore it.
667                        // FIXME: Possibly we should forcefully purge it.
668    logger.log("[ExhibitDataSimpleCache.MetaData: ERROR: trouble loading cache item '"+name+"': "+e.getMessage()+"]");
669                        }
670                    }
671    
672                // We should save this data that we have captured.
673                setNeedsSave();
674    
675                // Update derived data.
676                _recomputeDerivedValues();
677    
678                // Verify object state.
679                try { validateObject(); }
680                catch(final InvalidObjectException e)
681                    { throw new IllegalArgumentException(e.getMessage()); }
682                }
683    
684            /**Set of cached exhibits, from full exhibit name to CachedFile entry; never null after construction/deserialisation.
685             * We may like to optimise memory use by sharing the String values (etc)
686             * with those from AllExhibitImmutableData held elsewhere.
687             * <p>
688             * This is the primary store of data;
689             * other (transient) values are derived from it;
690             * this is saved to and restored from the serialised form
691             * in an efficient and defensive way.
692             * <p>
693             * This is a Hashtable for thread-safety.
694             * <p>
695             * Compound operations may be made atomic by a lock on this object,
696             * but no other lock may be taken while this happens.
697             * <p>
698             * Any access that updates this table should be under the instance lock.
699             */
700            private /*final*/ transient Map<Name.ExhibitFull, CachedFile> cachedExhibits = new Hashtable<Name.ExhibitFull, CachedFile>();
701    
702            /**Get Set of exhibits on which metadata is currently held in this cache.
703             * This takes a private atomic copy of the Set of exhibits
704             * for which we have some data, thumbnails, etc, in the cache.
705             */
706            /*synchronized*/ Set<Name.ExhibitFull> getKnownExhibits()
707                { return(new HashSet<Name.ExhibitFull>(cachedExhibits.keySet())); }
708    
709            /**LRU ordered set of the CachedFile values in cachedExhibits; never null after construction/deserialisation.
710             * This should be exactly the same set as cachedExhibits.valueSet().
711             * <p>
712             * The least-recently-accessed items are first in this list,
713             * and are the first to be discarded if the cache becomes over-full.
714             * <p>
715             * This is not of itself thread-safe and all access must be under
716             * the instance lock.
717             */
718            private /*final*/ transient SortedSet<CachedFile> exhibitsLRU;
719    
720            /**Estimated total bytes of disc used by cached entries.
721             * Computed as sum of estimates from CachedFile values.
722             * <p>
723             * Write access restricted to _setTotalBytes();
724             * marked volatile to allow unlocked <em>read</em> access.
725             */
726            private volatile transient long totalBytes;
727    
728            /**Get estimated total bytes of disc used by cached entries; never negative. */
729            long getTotalBytesCurrentlyUsedByCache() { return(totalBytes); }
730    
731            /**Set estimated total bytes of disc used by cached entries; never negative. */
732            private synchronized void _setTotalBytes(final long newTB)
733                {
734                assert(newTB >= 0);
735                totalBytes = newTB;
736                if(newTB > totalBytesHighWaterMark) { totalBytesHighWaterMark = newTB; }
737                }
738    
739            /**The highest value of totalBytes, ie the cache actual high-water mark; never negative.
740             * Computed as sum of estimates from CachedFile values.
741             * <p>
742             * Write access restricted to _setTotalBytes() and by _setTotalBytesHighWaterMark()
743             * and must be under the instance lock.
744             * <p>
745             * Marked volatile to allow lock-free <em>read</em> access.
746             */
747            private volatile transient long totalBytesHighWaterMark;
748    
749            /**Get the highest value of totalBytes, ie the cache actual high-water mark; never negative. */
750            long getTotalBytesHighWaterMark() { return(totalBytesHighWaterMark); }
751    
752            /**Resets totalBytesHighWaterMark to the current value of totalBytes. */
753    //        private synchronized void _setTotalBytesHighWaterMark() { totalBytesHighWaterMark = getTotalBytes(); }
754    
755            /**This private routine updates all (transient) data derived from the primary cachedExhibits map.
756             * This is used after deserialisation, construction or insertion where
757             * it is easier to recompute such derived data from scratch
758             * than change it incrementally as usual.
759             */
760            private synchronized void _recomputeDerivedValues()
761                {
762                final SortedSet<CachedFile> _new_exhibitsLRU = new TreeSet<CachedFile>();
763                long _new_totalBytes = 0;
764    
765                for(final Iterator<Name.ExhibitFull> it = cachedExhibits.keySet().iterator(); it.hasNext(); )
766                    {
767                    final Name.ExhibitFull name = it.next();
768                    final CachedFile cf = cachedExhibits.get(name);
769    
770                    _new_exhibitsLRU.add(cf);
771                    _new_totalBytes += cf.calcDiscSpace();
772                    }
773    
774                // Save new computed values.
775                exhibitsLRU = _new_exhibitsLRU;
776                _setTotalBytes(_new_totalBytes);
777                }
778    
779            /**If true, we need a save to disc (ie our state has changed).
780             * Is transient so always false when we recover from disc.
781             * <p>
782             * Is volatile (so accessed with no lock) only by
783             * setNeedsSave() (to set it true) and
784             * saveToDisc() (to clear it after a successful save to disc).
785             */
786            private transient volatile boolean needsSave;
787    
788            /**Get the 'needs to be saved to disc' value.
789             * Always false after deserialisation and construction of a new instance.
790             */
791            boolean getNeedsSave() { return(needsSave); }
792    
793            /**Set the `needs to be saved to disc' value to be true. */
794            void setNeedsSave() { needsSave = true; }
795    
796            /**Save (serialise) to disc; throws IOException in case of difficulty.
797             * Holds a lock on the metaData object while the save takes place;
798             * clears needsSave (to false) if the save appears to be successful.
799             * <p>
800             * FIXME: see if this and writeObject() should simply make the object read-only for the duration
801             *
802             * @param cacheDir  specified the top-level cache directory; never null
803             */
804            void saveToDisc(final File cacheDir,
805                            final SimpleLoggerIF logger,
806                            final StatsLogger.StatsConfig statsIDSCGEN)
807                throws IOException
808                {
809                assert(null != cacheDir);
810    
811                final File toSaveTo = new File(cacheDir, CACHE_METADATA_FILENAME);
812    
813                final long start = System.currentTimeMillis();
814    
815                // Do as little as possible with the lock held...
816                synchronized(this)
817                    {
818                    FileTools.serialiseToFile(this, toSaveTo, STORE_EXPROPS_GZIPED, !IsDebug.isDebug);
819                    needsSave = false; // OK, seems to have worked.
820                    }
821    
822    logger.log("[ExhibitDataSimpleCache.MetaData.saveToDisc() done: "+ (System.currentTimeMillis()-start) +"ms.]");
823    
824                // Note save of meta-data from poll()...
825                StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_MDSAVE);
826                }
827    
828            /**If true, this cache meta-data is read/write.
829             * This can be used to pin the state of the cache
830             * while we compare it to on-disc state and/or
831             * while we are unsure of its veracity.
832             * <p>
833             * Is transient so that it is deserialised as false,
834             * and is also false on construction of a new empty MetaData instance;
835             * should only be set true after comparing with
836             * or recreating from, real disc state.
837             * <p>
838             * Is volatile so that it can be read without taking a lock.
839             */
840            private volatile transient boolean readWrite;
841    
842            /**Get the `cache-is-read/write' value.
843             * Always false after deserialisation and after construction of a new instance.
844             */
845            boolean isReadWrite() { return(readWrite); }
846    
847            /**Set the `cache-is-read/write' value.
848             * Only the cache-checking routines should call this at all.
849             * <p>
850             * Insists on grabbing the instance lock to block changes to this
851             * without the instance lock held.
852             */
853            synchronized void setReadWrite(final boolean rw) { readWrite = rw; }
854    
855    
856            /**Get count of cached exhibits, including partially-cached ones; never negative. */
857            /*synchronized*/ int size() { return(cachedExhibits.size()); }
858    
859            /**Is our meta-data store empty, ie no exhibits cached at all? */
860            /*synchronized*/ boolean isEmpty() { return(cachedExhibits.isEmpty()); }
861    
862            /**Get cached-file details for a given exhibit; null if exhibit not cached.
863             * This may incrementally verify cache data against the disc copy,
864             * though for performance reasons this may not happen very often.
865             * <p>
866             * This is thread-safe and does not need nor grab any cache locks,
867             * but to ensure this data remains valid during any compound operation,
868             * at least a read lock should usually be obtained.
869             *
870             * @param name  the exhibit whose meta-data is required; never null
871             */
872            CachedFile exhibitGetInfo(final Name.ExhibitFull name)
873                { return(cachedExhibits.get(name)); }
874    
875            /**Check if the exhibit is fully loaded.
876             * <p>
877             * This does not grab any cache locks,
878             * but to ensure this data remains valid during any compound operation,
879             * at least a read lock should generally be obtained.
880             *
881             * @param esa  the exhibit whose meta-data is required; never null
882             *
883             * @return false if no such exhibit or it is not fully loaded
884             */
885            boolean exhibitIsFullyLoaded(final ExhibitStaticAttr esa)
886                //throws IOException
887                {
888                if(esa == null) { return(false); }
889                final CachedFile icf = exhibitGetInfo(esa.getExhibitFullName());
890                if((icf == null) || (icf.cachedLength != esa.length))
891                    { return(false); }
892                return(true); // Looks OK.
893                }
894    
895            /**Remove cached-file details, adjusting cache data in situ; does nothing if not present.
896             * The argument must not be null.
897             * Private to this instance.
898             * <p>
899             * Does not check the read/write status; should be checked
900             * before this is called.
901             * <p>
902             * Cache write lock must be held by current thread.
903             */
904            private synchronized void _remove(final ReentrantReadWriteLock rwl,
905                                              final Name.ExhibitFull name)
906                {
907                assert (rwl != null) || (name != null);
908    
909                // Write lock must be held by current thread.
910                assert(rwl.isWriteLockedByCurrentThread());
911    
912                // Data should be consistent when we start.
913                assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
914    
915                final CachedFile oldCf = exhibitGetInfo(name);
916                if(oldCf != null)
917                    {
918                    // If old data exists, carefully remove it
919                    // from all the places it affects.
920                    _setTotalBytes(getTotalBytesCurrentlyUsedByCache() - oldCf.cachedLength);
921                    exhibitsLRU.remove(oldCf);
922                    cachedExhibits.remove(oldCf.name);
923    
924                    // Note need for meta-data save.
925                    setNeedsSave();
926    
927                    // Data should be consistent after removing old metadata.
928                    assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
929                    }
930                }
931    
932            /**Update or add new cached-file details, adjusting cache data in situ.
933             * The argument must not be null.
934             * Private to this instance.
935             * <p>
936             * Does not check the read/write status; should be checked
937             * before this is called.
938             * <p>
939             * Marks the meta-data as needing to be saved.
940             * <p>
941             * Cache write lock must be held by current thread.
942             */
943            private synchronized void _update(final ReentrantReadWriteLock rwl,
944                                              final CachedFile newCf,
945                                              final SimpleLoggerIF logger)
946                {
947                assert (rwl != null) && (newCf != null);
948    
949                // Write lock must be held by current thread.
950                assert(rwl.isWriteLockedByCurrentThread());
951    
952                // Data should be consistent when we start.
953                assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
954    
955                // First, zap any extant entry.
956                _remove(rwl, newCf.name);
957    
958                // Now add new data incrementally.
959                cachedExhibits.put(newCf.name, newCf);
960                exhibitsLRU.add(newCf);
961                _setTotalBytes(getTotalBytesCurrentlyUsedByCache() + newCf.cachedLength);
962    
963                // Check for internal consistency...
964                if(cachedExhibits.size() != exhibitsLRU.size())
965                    {
966                    logger.log("ExhibitDataSimpleCache.MetaData._update(): ERROR: internal data structures incosistent: repairing...");
967    
968                    // Chuck away old values; insert new ones...
969                    exhibitsLRU.clear();
970                    exhibitsLRU.addAll(cachedExhibits.values());
971                    _recomputeDerivedValues();
972                    }
973    
974                // Note need for meta-data save.
975                setNeedsSave();
976    
977                // Data should be consistent when we finish.
978                assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
979                }
980    
981            /**Returns true if this metadata is essentially equivalent to another one.
982             * This has a sufficiently loose notion to allow for discrepancies
983             * in (for example) last-accessed timestamps which cannot be completely
984             * reconstructed from the disc copy, but vital differences are noted.
985             * <p>
986             * The result is false if the argument is null.
987             */
988            synchronized boolean isEquivalent(final MetaData other)
989                {
990                if(other == null) { return(false); }
991    
992                // Clearly not equivalent if number of cached exhibits differ...
993                if(size() != other.size()) { return(false); }
994    
995                // Check that each cached entry matches,
996                // except in the last-accessed timestamp values.
997                for(final Name.ExhibitFull name : cachedExhibits.keySet())
998                    {
999                    final CachedFile cf1 = exhibitGetInfo(name);
1000                    final CachedFile cf2 = other.exhibitGetInfo(name);
1001    
1002                    // If we find a significant discrepancy,
1003                    // these MetaData sets are not equivalent.
1004                    if(!cf1.isEquivalent(cf2)) { return(false); }
1005                    }
1006    
1007                return(true); // Seem to be essentially the same.
1008                }
1009    
1010            /**Install info from new cache object in this one.
1011             * This would allow us to incorporate the results
1012             * of a disc scan to see the actual state of the disc.
1013             * (Usually the argument is the best-efforts reconstruction
1014             * from the disc of the cache state, with the timestamps
1015             * arbitrarily too old.)
1016             * <p>
1017             * This essentially throws away the old state and
1018             * replaces it with that passed in,
1019             * though this may do things such as retain the best
1020             * notion of last-accessed timestamp from both.
1021             * <p>
1022             * Both this and the newData objects are locked for
1023             * the duration of this operation.
1024             * <p>
1025             * The readWrite status is not altered or updated
1026             * by this operation but needsSave may be;
1027             * therefore both objects must be read-only before this is called.
1028             * <p>
1029             * Only the cache-checking routines should call this at all.
1030             */
1031            synchronized void mergeWithNewMetaData(final MetaData newData,
1032                                                   final SimpleLoggerIF logger)
1033                throws IllegalStateException
1034                {
1035                synchronized(newData)
1036                    {
1037                    if(readWrite || newData.readWrite)
1038                        { throw new IllegalStateException("must both be read-only"); }
1039    
1040                    // By default, assume that this data will need saving.
1041                    setNeedsSave();
1042    
1043                    // Save the old data away.
1044                    final Map<Name.ExhibitFull,CachedFile> oldCachedExhibits = new HashMap<Name.ExhibitFull, CachedFile>(cachedExhibits);
1045    
1046                    // Replace old data with new.
1047                    // Don't preserve any data from the old set.
1048                    cachedExhibits.clear();
1049                    cachedExhibits.putAll(newData.cachedExhibits);
1050    
1051                    // Now for every new entry,
1052                    // if we have a usable old CachedFile entry
1053                    // use it but fix it up from disc.
1054                    for(final Iterator<Name.ExhibitFull> it = cachedExhibits.keySet().iterator(); it.hasNext(); )
1055                        {
1056                        final Name.ExhibitFull name = it.next();
1057                        final CachedFile newCf = cachedExhibits.get(name);
1058                        final CachedFile oldCf = oldCachedExhibits.get(name);
1059                        if(!newCf.isEquivalent(oldCf)) { continue; } // Old value not usable.
1060    
1061                        // Restore old value, fixed-up.
1062                        cachedExhibits.put(name, oldCf.fixup(newCf, logger));
1063                        }
1064    
1065                    // Update derived data.
1066                    _recomputeDerivedValues();
1067                    }
1068                }
1069    
1070            /**Mark exhibit as accessed (`touch' it) and returns the possibly-modified CachedFile record.
1071             * Returns null if the named exhibit does not exist in the cache at all,
1072             * else never returns null.
1073             * <p>
1074             * This call does not access disc (ie affects in-memory status only),
1075             * and silently does nothing if there is no entry for the named
1076             * exhibit.
1077             * <p>
1078             * If this metadata not read-write, this action is silently vetoed
1079             * as it is assumed not to be of vital importance.
1080             * <p>
1081             * This marks the last-accessed time in the MetaData as now,
1082             * to be preserved on disc at some point in the future.
1083             * <p>
1084             * This MetaData object is marked as needing to be saved if
1085             * an exhibit was `touched' successfully.
1086             * <p>
1087             * This is preferably used to:
1088             * <ul>
1089             * <li>Mark end-user access to an exhibit's data.
1090             * <li>Mark some expensive operation (such as computing thumbnails)
1091             *     which we would like to avoid throwing away too soon.
1092             * </ul>
1093             * <p>
1094             * This holds a write lock on the cache while updating state.
1095             *
1096             * @param cacheDir  if not null
1097             *     then we force an update of a timestamp on disc
1098             *     as a backup in case the serialised metadata is lost;
1099             *     this causes extra disc traffic though not necessarily synchronous
1100             */
1101            CachedFile exhibitMarkAsAccessed(final ReentrantReadWriteLock rwl,
1102                                             final Name.ExhibitFull name,
1103                                             final SimpleLoggerIF logger,
1104                                             final File cacheDir)
1105                throws IOException
1106                {
1107                _getWriteLock(rwl, "exhibitMarkAsAccessed()", logger);
1108                try
1109                    {
1110                    final CachedFile cf = exhibitGetInfo(name);
1111                    if(cf == null) { return(null); }
1112    
1113                    // Ignore `touch' if read-only.
1114                    if(!isReadWrite()) { return(cf); }
1115    
1116                    // Make new touched entry.
1117                    final long now = System.currentTimeMillis();
1118                    final CachedFile touched = cf.touchedEntry(now);
1119    
1120                    // Update with touched entry.
1121                    _update(rwl, touched, logger);
1122    
1123                    // Force secondary timestamp backup to disc if requested.
1124                    if(cacheDir != null)
1125                        {
1126                        final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
1127                        dataFile.setLastModified(now); // Enforce our clock's `now'.
1128                        }
1129    
1130                    // Return new entry.
1131                    return(touched);
1132                    }
1133                finally { rwl.writeLock().unlock(); }
1134                }
1135    
1136            /**If true, when creating cache entry if data found on disc use it rather than refuse to build meta-data entry.
1137             * This is useful for co-existing with old cache mechanism.
1138             */
1139            private static final boolean USE_EXTANT_DISC_ENTRY_ON_CREATE = true;
1140    
1141            /**Remove the least-recently-used exhibit cache item, if any.
1142             * We won't remove the given named item if non-null;
1143             * this may prevent us removing anything if it is the only cached item.
1144             * <p>
1145             * If the named item is the oldest then we'll remove the next oldest,
1146             * if there is one.
1147             *
1148             * @param justData  if true then we try to retain metadata/thumbnails
1149             *
1150             * @return true if we removed something
1151             */
1152            private boolean _exhibitRemoveLRUCacheEntry(final ReentrantReadWriteLock rwl,
1153                                                        final File cacheDir,
1154                                                        final Name.ExhibitFull dontRemoveName,
1155                                                        final boolean justData,
1156                                                        final SimpleLoggerIF logger,
1157                                                        final StatsLogger.StatsConfig statsIDSCGEN)
1158                throws IOException
1159                {
1160                // Grab both locks in correct order.
1161                _getWriteLock(rwl, "_exhibitRemoveLRUCacheEntry()", logger);
1162                try
1163                    {
1164                    synchronized(this)
1165                        {
1166                        final Iterator<CachedFile> it = exhibitsLRU.iterator();
1167    
1168                        // Get oldest entry.
1169                        if(!it.hasNext()) { return(false); } // Cache is empty.
1170                        final CachedFile cf = it.next();
1171                        if(!cf.name.equals(dontRemoveName))
1172                            {
1173                            // Good, we can try this one...
1174                            if(!exhibitRemoveCacheEntry(rwl, cacheDir, cf.name, justData, logger, statsIDSCGEN))
1175                                { return(false); }
1176                            }
1177                        else if(it.hasNext())
1178                            {
1179                            // Get second-oldest entry;
1180                            // this can't be the named exhibit,
1181                            // since we only get here if the oldest entry was
1182                            // the special named exhibit...
1183                            final CachedFile cf2 = it.next();
1184                            // Good, we can try this one...
1185                            if(!exhibitRemoveCacheEntry(rwl, cacheDir, cf2.name, justData, logger, statsIDSCGEN))
1186                                { return(false); }
1187                            }
1188                        else
1189                            { return(false); /* Nothing removed from the cache. */ }
1190                        }
1191                    }
1192                finally { rwl.writeLock().unlock(); }
1193    
1194                // Note eviction of old exhibit from cache...
1195                StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEEVICTLRU);
1196                return(true);
1197                }
1198    
1199            /**Remove an exhibit entry from cache, possibly including all its data and metadata.
1200             * Returns true if successful, false if not.
1201             * <p>
1202             * This will fail if the metaData is read only,
1203             * or if an entry for the exhibit does not exist
1204             * in the metadata and on disc.
1205             * <p>
1206             * Since these failures will be by returning false
1207             * rather than by causing an exception then this
1208             * can be used as a gentle unconditional way to make sure that
1209             * an entry does not exist whether or not one did before.
1210             * <p>
1211             * This needs to access disc and so will need to
1212             * hold the main write lock while it does so, grabbing
1213             * the metaData instance lock inside the main lock
1214             * where both need to be held simultaneously.
1215             * <p>
1216             * Can optionally attempt to just remove the exhibit data,
1217             * leaving metadata and any thumbnails intact.
1218             *
1219             * @param rwl  the main cache lock object; must not be null
1220             * @param justData  if true then we try to retain metadata/thumbnails
1221             */
1222            boolean exhibitRemoveCacheEntry(final ReentrantReadWriteLock rwl,
1223                                            final File cacheDir,
1224                                            final Name.ExhibitFull name,
1225                                            final boolean justData,
1226                                            final SimpleLoggerIF logger,
1227                                            final StatsLogger.StatsConfig statsIDSCGEN)
1228                throws IOException
1229                {
1230                assert (rwl != null) && (name != null);
1231    
1232                // Fail gently if read-only.
1233                if(!isReadWrite()) { return(false); }
1234    
1235                _getWriteLock(rwl, "MetaData.exhibitRemoveCacheEntry()", logger);
1236                try
1237                    {
1238                    synchronized(this)
1239                        {
1240                        // Double-check that we haven't gone read-only...
1241                        if(!isReadWrite()) { return(false); }
1242    
1243                        // Does not exist in metaData; fail gently.
1244                        final CachedFile cf = exhibitGetInfo(name);
1245                        if(cf == null) { return(false); }
1246    
1247    /* if(ORG.hd.d.IsDebug.isDebug) */ { logger.log("[MetaData.exhibitRemoveCacheEntry(): INFO: request to remove cache "+(justData?"data":"data and metadata")+" (with extant meta-data): " + name + ", last access: "+(new Date(cf.getLastAccessed()))+", cached size: "+cf.cachedLength+".]"); /* Thread.dumpStack(); */ }
1248    
1249                        // Only attempt removal of any extant data components.
1250                        //
1251                        // Update the exhibit timestamp regardless
1252                        // to try to ensure progress during purge() etc...
1253                        //
1254                        // This is "clock daemon" working-set management.
1255                        if(justData)
1256                            {
1257                            // To avoid leaving this exhibit in the LRU slot,
1258                            // the timestamp is updated
1259                            // not quite to "now" (less good than a real access)
1260                            // but enough to avoid being seen again immediately.
1261                            final long newStamp = (cf.lastAccessed + System.currentTimeMillis())/2 + 1;
1262    
1263                            // Explicitly get the file deleted if we have some data cached
1264                            // or if the file seems to exist anyway (eg is zero length).
1265                            if((cf.cachedLength > 0) ||
1266                               (new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString()).exists()))
1267                                {
1268                                final CachedFile newCf = cf.zapData(cacheDir);
1269                                _update(rwl, newCf.touchedEntry(newStamp), logger);
1270                                return(true);
1271                                }
1272    
1273                            _update(rwl, cf.touchedEntry(newStamp), logger);
1274                            return(false); // No exhibit data to remove.
1275                            }
1276    
1277                        // Does exist: remove metadata...
1278                        _remove(rwl, name);
1279                        // Does exist: remove file(s)/data...
1280                        cf.zapMe(cacheDir);
1281                        }
1282                    }
1283                finally { rwl.writeLock().unlock(); }
1284    
1285                // Note removal of exhibit from cache...
1286                StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEREM);
1287    
1288                return(true); // Done!
1289                }
1290    
1291            /**Create a new exhibit entry.
1292             * Returns true if successful, false if not.
1293             * <p>
1294             * This will fail if the metaData is read only,
1295             * or if an entry for the exhibit already exists
1296             * in the metaData or on disc.
1297             * <p>
1298             * Since these failures will be by returning false
1299             * rather than by causing an exception then this
1300             * can be used as a gentle unconditional way to make sure that
1301             * an entry does exist whether or not one did before.
1302             * <p>
1303             * This needs to access disc and so will need to
1304             * grab a write lock on the main cache lock while it does so, grabbing
1305             * the metaData instance lock inside the main lock
1306             * where both need to be held simultaneously.
1307             * <p>
1308             * Marks the meta-data as needing to be saved
1309             * if a new meta-data entry had to be created...
1310             *
1311             * @param rwl  the main lock object; must not be null
1312             * @param esa  the basic info on the exhibit to have an entry created
1313             *
1314             * @throws java.io.IOException  in case of serious problems
1315             *     accessing the on-disc cache state
1316             */
1317            boolean exhibitCreateNewCacheEntry(final ReentrantReadWriteLock rwl,
1318                                               final File cacheDir,
1319                                               final ExhibitStaticAttr esa,
1320                                               final SimpleLoggerIF logger,
1321                                               final StatsLogger.StatsConfig statsIDSCGEN)
1322                throws IOException
1323                {
1324                assert (rwl != null) && (esa != null);
1325    
1326                // Fail gently if read-only.
1327                if(!isReadWrite()) { return(false); }
1328    
1329    //if(ORG.hd.d.IsDebug.isDebug) { System.out.println("MetaData.exhibitCreateNewCacheEntry(): requested to create cache file: " + esa.filePath); }
1330    
1331                // Path to (possibly partially-) cached exhibit file.
1332                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), esa.getCharSequence().toString());
1333    
1334                // Grab both locks in correct order.
1335                _getWriteLock(rwl, "exhibitCreateNewCacheEntry()", logger);
1336                try
1337                    {
1338                    synchronized(this)
1339                        {
1340                        // Already exists in metaData; fail gently.
1341                        if(exhibitGetInfo(esa.getExhibitFullName()) != null) { return(false); }
1342    
1343                        CachedFile newCf; // = null;
1344                        if(dataFile.exists())
1345                            {
1346                            // Already exists on disc; fail gently.
1347                            if(!USE_EXTANT_DISC_ENTRY_ON_CREATE) { return(false); }
1348    
1349                            // MORE PERMISSIVE STYLE...
1350                            // Already exists on disc; try to use it!
1351                            try
1352                                {
1353                                newCf = CachedFile.recoverExtantCachedFileDetails(
1354                                    cacheDir, esa.getExhibitFullName());
1355                                }
1356                            catch(final IOException e)
1357                                {
1358                                newCf = CachedFile.makeNewDiscCacheFile(
1359                                    cacheDir, esa.getExhibitFullName(), esa.timestamp);
1360    
1361                                // Data on disc may be damaged,
1362                                // so ignore it and start again...
1363                                e.printStackTrace();
1364                                }
1365                            }
1366                        else
1367                            {
1368                            // OK, create new entry on disc ready for meta-data.
1369                            newCf = CachedFile.makeNewDiscCacheFile(
1370                                cacheDir, esa.getExhibitFullName(), esa.timestamp);
1371                            }
1372    
1373                        // Post updated entry to metaData.
1374                        assert(newCf != null);
1375                        _update(rwl, newCf, logger);
1376    
1377                        if(FORCE_IMMEDIATE_SAVE_ON_EXPANDED_METADATA)
1378                            { saveToDisc(cacheDir, logger, statsIDSCGEN); }
1379                        }
1380                    }
1381                finally { rwl.writeLock().unlock(); }
1382    
1383                // Note addition of exhibit to cache...
1384                StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEADD);
1385    
1386                return(true); // Done!
1387                }
1388    
1389            /**Minimum percentage free in cache filesystem for us to expand the cache; non-negative in range ]100,0].
1390             * Set to prevent accidental exhaustion of space in a shared filesystem
1391             * (eg inducing transient write errors for us or other storage users, etc)
1392             * due to incorrect usage estimates by us or unexpected use by others,
1393             * and to avoid very poor performance from a nearly-full filesystem.
1394             * <p>
1395             * A value in the range 1-10 is probably good.
1396             */
1397            private static final int MIN_FS_PERCENT_FREE = 3;
1398    
1399            /**Computes the target high-water mark (target maximum cache size); strictly positive.
1400             * If we currently only have a default (zero-timestamp) genProps
1401             * then we use the initial size of the cache as our high-water mark,
1402             * ie do not allow the cache to expand.
1403             * <p>
1404             * We may further restict cache size to leave a given percentage free
1405             * in the underlying filesystem.
1406             */
1407            long computeTargetHighWaterMark()
1408                {
1409                final long lpWMECB = LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES();
1410    
1411                // We don't necessarily trust the props value, eg if zero.
1412                final boolean weTrustProps = (lpWMECB > 0);
1413    
1414                // If we trust genProps then use its high-water mark
1415                // (which may have a local override),
1416                // else we use the actual high-water mark so far
1417                // (or the fallback if larger).
1418                final long highWaterMark = weTrustProps ? lpWMECB :
1419                    Math.max(FALLBACK_MIN_CACHE_SIZE, getTotalBytesHighWaterMark());
1420    
1421                return(highWaterMark);
1422                }
1423    
1424            /**Compute current definite free-space in cache.
1425             * This returns the amount of space that we have below the low-water mark;
1426             * if this returns a positive number then the cache is within
1427             * that low water mark and can be considered nowhere near full.
1428             * <p>
1429             * If we don't trust the GenProps value then the low-water mark
1430             * is the current actual high-water mark for the cache, ie
1431             * we try to maintain its size approximately.
1432             * <p>
1433             * Is synchronized to maintain consistency while working.
1434             */
1435            synchronized long computeFreeSpaceBelowLowWaterMark(final File cacheDir)
1436                {
1437                final long highWaterMark = computeTargetHighWaterMark();
1438    //            final long lowWaterMark = Math.max(0, (long) (highWaterMark * LOW_WATER_FRACTION));
1439                // How much below the high-water mark is the low-water mark,
1440                // ie how much more space has to be available to be below the low-water mark?
1441                final long lhWaterGap = Math.max(0, (long) (highWaterMark * (1-LOW_WATER_FRACTION)));
1442                assert(lhWaterGap >= 0);
1443                return(computeFreeSpaceBelowHighWaterMark(cacheDir) - lhWaterGap);
1444                }
1445    
1446            /**Returns true if we have lots of disc space free.
1447             * This means that usage is well below the low-water mark.
1448             * <p>
1449             * If we don't trust GenProps this always returns false.
1450             */
1451            boolean lotsFree(final File cacheDir)
1452                {
1453                final long max_cache_bytes = LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES();
1454                // We don't necessarily trust the props value, eg if zero.
1455                final boolean weTrustProps = (max_cache_bytes > 0);
1456    
1457                // Conservative fall-back; maintain cache size for now.
1458                if(!weTrustProps) { return(false); } // Can't be sure.
1459    
1460                // Lots of empty room below the low-water mark,
1461                // eg enough to run lots of concurrent cache-expanding threads
1462                // without then finding ourselves to be out of space.
1463                // (We leave a small %age fraction free too.)
1464                return(computeFreeSpaceBelowLowWaterMark(cacheDir) > Math.max(16*MAX_EXTD_TRANSFER_CHUNK_SIZE,
1465                            (max_cache_bytes >>> 8) /* ~1% */ ));
1466                }
1467    
1468            /**Returns true if we have some (not lots of) disc space free.
1469             * This means that usage is below the low-water mark.
1470             * <p>
1471             * If we don't trust GenProps this always returns false.
1472             */
1473            boolean someFree(final File cacheDir)
1474                {
1475                // We don't necessarily trust the props value, eg if zero.
1476                final boolean weTrustProps = (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0);
1477    
1478                // Conservative fall-back; maintain cache size for now.
1479                if(!weTrustProps) { return(false); } // Can't be sure.
1480    
1481                return(computeFreeSpaceBelowLowWaterMark(cacheDir) >= MAX_TRANSFER_CHUNK_SIZE);
1482                }
1483    
1484            /**Cache of estimated usable free space in the underlying cache filesystem; initially null.
1485             * Notes the cacheDir and time and free space of the last request.
1486             * <p>
1487             * It is in fact assumed that in general:
1488             * <ul>
1489             * <li>The presented cacheDir will the the same each time.</li>
1490             * <li>There will be relatively little thread-racing in any case.</li>
1491             * <li>That filesystem usable status can reasonably be cached for a reasonable time.</li>
1492             * </ul>
1493             * <p>
1494             * Marked volatile for thread-safe unlocked access.
1495             */
1496            private transient volatile Tuple.Triple<File, Long, Long> _cFSBHWM_cache;
1497    
1498            /**Maximum time in ms that we may retain a cached 'usable filespace' estimate; strictly positive.
1499             * If the file system in question doesn't contain much activity/data other than our cache,
1500             * and we are checking mainly to deal gracefully with internal usage-estimation errors
1501             * and from slowly-accumulated crud from our (and third-party) logs etc,
1502             * then we could probably cache the value for hours at a time without great loss,
1503             * but probably cacheing for tens of seconds would in practice eliminate much of the cost.
1504             * <p>
1505             * Pick a prime-ish value to minimise clashes with other activity...
1506             */
1507            private static final int MAX_FS_USABLE_SPACE_CACHE_MS = 61001;
1508    
1509            /**Compute available free space in cache (bytes); zero or negative if none.
1510             * This ignores the purge state of the system and simply reports what
1511             * space is available before the cache overflows.
1512             * <p>
1513             * This is thread-safe though may be expensive.
1514             */
1515            long computeFreeSpaceBelowHighWaterMark(final File cacheDir)
1516                {
1517                // Compute current free space.
1518                final long freeSpace =
1519                    (computeTargetHighWaterMark() - getTotalBytesCurrentlyUsedByCache());
1520                if(freeSpace <= 0) { return(freeSpace); }
1521    
1522                // Cap estimate of remaining space if filesystem is (getting) full.
1523                final File dataBaseDir = new File(cacheDir, CACHE_EXDATA_DIR);
1524                final long fsSpaceUsable;
1525                // See if we have a suitable non-stale cached usable-free-space estimate...
1526                final Tuple.Triple<File, Long, Long> cached = _cFSBHWM_cache;
1527                if((null != cached) &&
1528                   (cacheDir.equals(cached.first)) &&
1529                   (cached.second + MAX_FS_USABLE_SPACE_CACHE_MS >= System.currentTimeMillis()))
1530                    { fsSpaceUsable = cached.third; }
1531                else
1532                    {
1533                    fsSpaceUsable = FileTools.estimatedFreeSpaceBelowReserve(dataBaseDir, MIN_FS_PERCENT_FREE);
1534                    // Cache the new usable-space estimate.
1535                    _cFSBHWM_cache = new Tuple.Triple<File, Long, Long>(cacheDir, System.currentTimeMillis(), fsSpaceUsable);
1536                    }
1537                // If we *cannot* estimate the space free in the filesystem (-ve result)
1538                // then just use our internally-computed value.
1539                // We *do* want to use a zero result to suppress further data cacheing.
1540                if(fsSpaceUsable < 0) { return(freeSpace); }
1541    
1542                return(Math.min(freeSpace, fsSpaceUsable));
1543                }
1544    
1545            /**If true, we can pre-cache data blocks for (new or existing) exhibits.
1546             * POLICY: only true if we have (lots of) free space
1547             * and the cache is read/write,
1548             * and the server-slowdown factor is no greater than 2 (ie moderate or absent).
1549             */
1550            boolean canPrecacheExhibitData(final File cacheDir)
1551                {
1552                if(!isReadWrite()) { return(false); }
1553                if(LocalProps.getServerSlowdownFactor() > 2) { return(false); }
1554                return(lotsFree(cacheDir));
1555                }
1556    
1557            /**If true, we can pre-cache thumbnails for (existing) exhibits.
1558             * POLICY: only true if
1559             *         the free space is enough to store another thumbnail
1560             *         and fetch the data to do so first if necessary
1561             *         and the cache is read/write.
1562             * <p>
1563             * Basically we're prepared to go right up to the wire to generate
1564             * thumbnails because they are so useful to the user.
1565             * <p>
1566             * We leave a little bit of wiggle room on space.
1567             */
1568            boolean canPrecacheThumbnails(final File cacheDir)
1569                { return(isReadWrite() &&
1570                         (computeFreeSpaceBelowHighWaterMark(cacheDir) >
1571                          2 * (MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL + MAX_TRANSFER_CHUNK_SIZE))); }
1572    
1573            /**If true, some precaching may be possible, going by our generally-least-strict limit.
1574             * We will only allow precaching if we have a valid LocalProps value.
1575             */
1576            boolean canPrecache(final File cacheDir)
1577                {
1578                // We don't necessarily trust the props value, eg if zero.
1579                final boolean weTrustProps = (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0);
1580                if(!weTrustProps) { return(false); }
1581                return(canPrecacheThumbnails(cacheDir));
1582                }
1583    
1584    
1585            /**If true then try to retain metadata and thumbnails of exhibits we purge.
1586             * Thumbnails in particular may be expensive to regenerate/fetch,
1587             * and do not take much space.
1588             */
1589            private static final boolean TRY_TO_RETAIN_TN_IN_PURGE = true;
1590    
1591            /**Purges the cache of old entries to make space for new, if needed.
1592             * This will do nothing unless the space used by the cache is
1593             * above the low-water mark.  We compute the low-water mark
1594             * from GenProps, or from the actual high-water mark in
1595             * the current use of the cache if the passed GenProps is
1596             * the default (has a zero timestamp).  (We attempt to
1597             * roughly maintain the cache or possibly run it down slightly
1598             * while waiting for GenProps to arrive.)
1599             * <p>
1600             * This will avoid purging the cache of the item passed to it
1601             * by name in case it was a candidate for removal.
1602             * <p>
1603             * If the argument is zero this tries to ensure that space
1604             * for at least one new (empty) exhibit cache entry to be created.
1605             * <p>
1606             * If greater than zero this tries to ensure that an existing cache
1607             * entry can be extended by at least the given number of bytes
1608             * (either for the exhibit data or for thumbnail data).
1609             * <p>
1610             * This grabs the cache write lock and instance lock.
1611             * <p>
1612             * This will fail if the cache is marked read-only.
1613             *
1614             * @param howMuch  howMuch space we need to ensure is free; positive
1615             */
1616            void purge(final ReentrantReadWriteLock rwl,
1617                         final GenProps gp,
1618                         final File cacheDir,
1619                         long howMuch,
1620                         final Name.ExhibitFull dontPurgeName,
1621                         final SimpleLoggerIF logger,
1622                         final StatsLogger.StatsConfig statsIDSCGEN)
1623                throws IOException
1624                {
1625                assert (rwl != null) && (gp != null) && (cacheDir != null);
1626    
1627                if(howMuch < 0)
1628                    { throw new IllegalArgumentException(); }
1629    
1630                // If lots of space free then return immediately.
1631                if(lotsFree(cacheDir)) { return; }
1632    
1633                // We convert a zero-length request
1634                // (which is a request to make space for a new empty entry)
1635                // into the estimated space required for such.
1636                if(howMuch == 0)
1637                    { howMuch = CachedFile.MAX_EMPTY_ENTRY_BYTES_ON_DISC; }
1638    
1639                assert howMuch > 0;
1640    
1641                // We round up the (positive) request
1642                // to a disc block multiple.
1643                howMuch = FileTools.roundUpToFSBlockSize(howMuch);
1644    
1645                // Grab both locks in correct order.
1646                _getWriteLock(rwl, "MetaData.purge()", logger);
1647                try
1648                    {
1649                    synchronized(this)
1650                        {
1651                        if(!isReadWrite())
1652                            { throw new IllegalStateException("Cannot purge() read-only cache"); }
1653    
1654                        // We don't want to spend very long on this
1655                        // as we are probably blocking a user.
1656                        // However, as it probably involved real disc I/O,
1657                        // then we should not make this too short.
1658                        final long stopBy = System.currentTimeMillis() + 1001 +
1659                                Math.max(Rnd.fastRnd.nextInt(3000),
1660                                         3*CoreConsts.MAX_INTERACTIVE_DELAY_MS);
1661    
1662                        // When this gets set true
1663                        // we're getting desperate, eg due to lack of time.
1664                        boolean gettingDesperate = false;
1665    
1666                        // Zap files until we have made enough space
1667                        // (or we can't make any more because there are no files).
1668                        // We'll allow that the last entry might be the
1669                        // exhibit that we are trying to make (more) space for
1670                        // so we would have to exit the loop with (possibly us)
1671                        // left and rely on other mechanisms to zap this last item.
1672                        for(int itemsExamined = 0; size() > 1; ++itemsExamined)
1673                            {
1674                            // Recompute each time to see how far we've gotten...
1675                            final long freeSpace = computeFreeSpaceBelowHighWaterMark(cacheDir);
1676    
1677                            // Quit immediately if we have enough space...
1678                            if(freeSpace >= howMuch)
1679                                { break; }
1680    
1681                            // We may attempt to retain thumbnail/metadata
1682                            // unless running out of time.
1683                            final boolean justPurgeData =
1684                                TRY_TO_RETAIN_TN_IN_PURGE &&
1685                                !gettingDesperate;
1686    
1687                            // Purge the oldest extant cache entry if we can...
1688                            if(!_exhibitRemoveLRUCacheEntry(rwl,
1689                                              cacheDir,
1690                                              dontPurgeName,
1691                                              justPurgeData,
1692                                              logger, statsIDSCGEN))
1693                                { logger.log("INFO: ExhibitDataSimpleCache.purge() failed to remove exhibit entry on iteration #"+(1+itemsExamined)); }
1694    
1695                            // We were not desperate, but maybe we are now, due to:
1696                            //   * Having examined all the exhibits at least once.
1697                            //   * Having taken too long searching for a victim.
1698                            //   * Other threads queueing for the cache.
1699                            if(!gettingDesperate)
1700                                {
1701                                final int cacheSize = size();
1702                                if((itemsExamined > cacheSize) ||
1703                                   (System.currentTimeMillis() > stopBy) ||
1704                                   ((itemsExamined > cacheSize/8) && rwl.hasQueuedThreads()))
1705                                    {
1706                                    // We've spent quite long enough
1707                                    // trying to gently free up space,
1708                                    // so it's time to put our big boots on...
1709                                    gettingDesperate = true;
1710                                    }
1711                                }
1712                            }
1713                        }
1714                    }
1715                finally { rwl.writeLock().unlock(); }
1716                }
1717    
1718            /**If true, we NEVER ask upstream to cache for us.
1719             * If true, we try to avoid churning the master's cache
1720             * even if it has lots of space,
1721             * and we are assuming that our local caching is as good as it gets.
1722             * <p>
1723             * If false, we just ask it not to cache our precache requests
1724             * which do not reflect explicit user requests,
1725             * and assume that there will still be shared locality
1726             * between different slaves or that it might be expensive
1727             * for the master to fetch data.
1728             * <p>
1729             * Note that in either case, if data can be satisfied from the
1730             * local cache then it will be, and if we have lots of space we might
1731             * cache locally regardless.
1732             */
1733            private static final boolean NEVER_REQUEST_UPSTREAM_CACHEING = false;
1734    
1735            /**Approximate maximum time to wait for another thread to extend an exhibit cache entry (ms); strictly positive. */
1736            private static final int MAX_CONC_EXT_WAIT_TIME_MS = 25001 + Rnd.fastRnd.nextInt(10101);
1737    
1738            /**Reads data for an exhibit into the given buffer.
1739             * This will complain with an IOException if an
1740             * attempt is made to read beyond the bounds of the
1741             * exhibit, or if data is unavailable for any reason,
1742             * or if the read is far too long to be sensibly handled,
1743             * else the requested chunk of data is read into the
1744             * caller's buffer.
1745             * <p>
1746             * This will extend the underlying cache entry if necessary
1747             * (and possible) to satisfy the read, and may also
1748             * apply read-ahead to maximise user-perceived performance.
1749             * <p>
1750             * If the metaData is read-only, then any (portion of the) read that
1751             * cannot be satisfied from the cache is passed back up the pipeline.
1752             * <p>
1753             * This needs to access disc and so will need to
1754             * hold the rwl while it does so, grabbing
1755             * the metaData instance lock inside the rwl
1756             * where both need to be held simultaneously.
1757             * <p>
1758             * Note that we control caching to suit our precaching and any
1759             * downstream precache/random activity,
1760             * so at this level we have a 3-way choice.
1761             * <p>
1762             * This may only mark a cache entry as accessed
1763             * if dontCache is FALSE
1764             * and we actually read from or add data to the cache,
1765             * though we may mark it as updated at other times too.
1766             * <p>
1767             * Whenever we (easily) know that we can satisfy the request from cache,
1768             * we only take a read lock to improve concurrency.
1769             * <p>
1770             * We return as soon as we have satisfied some part of the request
1771             * so as minimise internal copying of data, etc.
1772             *
1773             * @param rwl  the main cache lock object; must not be null
1774             * @param fetchFromPeer  if non-null and we need to extend the cache
1775             *     then we will try to do so from the mirror with the given tag
1776             * @param dontCache  if FALSE, then cache here if possible and
1777             *     ask upstream to cache;
1778             *     if null then cache locally but not upstream
1779             *     (this is our precaching activity);
1780             *     if TRUE then don't cache here or upstream
1781             *     (this is downstream precaching or random activity);
1782             *     note therefore that we ask upstream to cache
1783             *     unless dontCache is FALSE
1784             *
1785             * @throws java.io.IOException  in case of serious problems
1786             *     accessing the on-disc cache state
1787             *     or if request is out of bounds of underlying exhibit
1788             *     or too big to handle
1789             */
1790            void exhibitRead(final ReentrantReadWriteLock rwl,
1791                             final String fetchFromPeer,
1792                             final File cacheDir,
1793                             final Name.ExhibitFull name,
1794                             final SimpleExhibitPipelineIF upstream,
1795                             final AllExhibitImmutableData aeid,
1796                             final GenProps gp,
1797                             final int dataStart,
1798                             final ByteBuffer buf,
1799                             final Boolean dontCache,
1800                             final SimpleLoggerIF logger,
1801                             final StatsLogger.StatsConfig statsIDSCGEN)
1802                throws IOException
1803                {
1804                assert (rwl != null) && (cacheDir != null) && (name != null);
1805                assert (upstream != null) && (aeid != null) && (gp != null);
1806                assert (buf != null);
1807    
1808                // Veto out-of-bounds request.
1809                if(dataStart < 0) // Current practical limit.
1810                    { throw new IllegalArgumentException(); }
1811    
1812                final ExhibitStaticAttr esa = aeid.getStaticAttr(name);
1813                // Immediately deflect requests for bogus exhibits.
1814                if(esa == null)
1815                    { throw new FileNotFoundException("exhibit " + name + " does not exist"); }
1816    
1817                // Maximum length of request implied by buffer size.
1818                final int maxLen = buf.remaining();
1819                assert(maxLen >= 0);
1820    
1821                // Compute actual implicit (max) request length; non-negative.
1822                final int len = Math.min(maxLen,
1823                                    (int) Math.min(MAX_USER_READ_SIZE,
1824                                                   esa.length - dataStart));
1825                assert(dataStart + len <= esa.length);
1826    
1827                // Immediately deal with trivial zero-length reads.
1828                // They don't even mark the cache entry as used.
1829                if(len == 0) { return; }
1830    
1831                // Decide whether we will ask upstream to cache for us.
1832                final boolean cacheEverywhere = Boolean.FALSE.equals(dontCache);
1833                final boolean dontCacheUpstream = NEVER_REQUEST_UPSTREAM_CACHEING ||
1834                    !cacheEverywhere;
1835                final boolean dontCacheLocally = Boolean.TRUE.equals(dontCache);
1836    
1837                // Is this a read of the last block/byte of the exhibit?
1838                final boolean finalByteRead = (dataStart + len == esa.length);
1839    
1840    
1841                // FAST PATH... (read-lock only)
1842                // Optimistically try for a read lock only to achieve good concurrency,
1843                // hoping that we don't have to go upstream.
1844                // Note that we'll have to take a write lock
1845                // for at least one read/access of the data if we want
1846                // to update timestamps to maintain the LRU cache;
1847                // we use the read of the last byte(s) to do this.
1848                //
1849                // We can use this read-only if ANY of the following holds:
1850                //   * we are not reading the last block/byte of the exhibit
1851                //   * we don't care about cacheing the file locally anyway
1852                //   * the metadata is currently read-only
1853                //   * enough of the exhibit is loaded to satisfy the request
1854                //   * the cache looks busy so it is worth taking this short-cut
1855                //   * this same exhibit is being extended by another thread
1856                //
1857                // If any "fast-path" condition is not met,
1858                // then simply fall through to the usual handler.
1859                //
1860                // Note: if the cache looks to be busy
1861                // (ie it might take us a long time to acquire a write lock)
1862                // then we'll not bother trying to update timestamps.
1863                // Our goal is to give fast responses to users.
1864                // This should particularly help performance for small exhibits
1865                // that may be used as their own thumbnails.
1866                do
1867                    {
1868                    if(!finalByteRead ||
1869                       dontCacheLocally ||
1870                       !isReadWrite() ||
1871                       rwl.hasQueuedThreads() || (rwl.getReadLockCount() > 0) || // Cache seems busy...
1872                       (_beingExtended.get(esa.getExhibitFullName()) != null)) // Being extended by another thread.
1873                        {
1874                        // Satisfy as much of the read from the cache as possible.
1875                        final int n = _readRawDataStartFromCache(rwl, name, dataStart, cacheDir, buf, logger, statsIDSCGEN);
1876                        // If we read anything then return it immediately.
1877                        if(n > 0) { return; }
1878                        }
1879                    else { break; /* No conditions met so drop out of "fast path"... */ }
1880    
1881    //                // Return immediately if request now completely satisfied...
1882    //                if(len == 0) { return; }
1883    
1884                    // If another thread is (now) extending this exhibit
1885                    // then we read/sleep again until they are done,
1886                    // or until we have been waiting a long time.
1887                    final Long extendedSince = _beingExtended.get(esa.getExhibitFullName());
1888                    if(extendedSince == null) { break; }
1889    
1890                    final long waitTime = Math.max(0,
1891                        System.currentTimeMillis() - extendedSince.longValue());
1892                    if(waitTime > MAX_CONC_EXT_WAIT_TIME_MS/2) { logger.log("[ExhibitDataSimpleCache: waiting (@"+dataStart+", for "+len+"bytes) while another thread extends ("+waitTime+"ms so far) exhibit "+esa+".]"); }
1893                    // Ensure that we don't block indefinitely...
1894                    if(waitTime > MAX_CONC_EXT_WAIT_TIME_MS)
1895                        {
1896                        logger.log("[ExhibitDataSimpleCache: WARNING: waited too long for another thread to finish, so dropping though to extend data concurrently...]");
1897                        break;
1898                        }
1899                    synchronized(extendedSince)
1900                        {
1901                        // Wait a little while (bearable for interactive users)
1902                        // or until we are signalled that the extending is done.
1903                        // Use a random wait to avoid multiple threads colliding.
1904                        // Wait in approximate proportion to time already waited
1905                        // to avoid wasting too much time spinning in this loop.
1906                        try { extendedSince.wait(CoreConsts.MAX_INTERACTIVE_DELAY_MS/2 + Rnd.fastRnd.nextInt(((int)(waitTime/4)) + CoreConsts.MAX_INTERACTIVE_DELAY_MS)); }
1907                        catch(final InterruptedException e) { throw new InterruptedIOException(e.getMessage()); }
1908                        }
1909                    } while(true);
1910    
1911    //            // Return if request now completely satisfied...
1912    //            if(len == 0) { return; }
1913    
1914                // Create the cache entry or update the timestamp on the extant one,
1915                // and purge enough stale data to make room for the new data.
1916                _getWriteLock(rwl, "exhibitRead()", logger); // Grab both locks in correct order.
1917                try
1918                    {
1919                    synchronized(this)
1920                        {
1921                        do
1922                            {
1923                            // Do we know about this exhibit?
1924                            // If so mark then it as accessed too,
1925                            // providing that this was not a "dontCache" request,
1926                            // ie probably not from a local client.
1927                            CachedFile cf = dontCacheLocally ? exhibitGetInfo(name) :
1928                                                    exhibitMarkAsAccessed(rwl, name, logger, finalByteRead ? cacheDir : null);
1929                            // If we don't, then we want to create it if possible.
1930                            if(cf == null)
1931                                {
1932                                // If the cache is not read/write,
1933                                // then prepare to duck through to upstream source.
1934                                if(!isReadWrite()) // Can't alter local cache.
1935                                    { break; }
1936    
1937                                // Make space for new cache entry if necessary.
1938                                purge(rwl, gp, cacheDir, 0, name, logger, statsIDSCGEN);
1939    
1940                                // Cache is read-write, so create a new entry.
1941                                if(!exhibitCreateNewCacheEntry(rwl, cacheDir, esa, logger, statsIDSCGEN))
1942                                    { throw new IOException("cannot create cache entry"); }
1943                                cf = exhibitGetInfo(name);
1944                                assert cf != null;
1945                                }
1946    
1947                            // Verify that the in-memory record of the cached length is correct.
1948                            // Fix-up the in-memory copy if wrong.
1949                            final long actualLength = CachedFile.getCachedDataLength(
1950                                cacheDir, name);
1951                            if(actualLength != cf.cachedLength)
1952                                {
1953                                final CachedFile rec =
1954                                    CachedFile.recoverExtantCachedFileDetails(
1955                                        cacheDir, name);
1956                                final CachedFile fixedUp = cf.fixup(rec, logger);
1957    
1958                                // If we actually had to fix-up,
1959                                // then replace the in-memory copy.
1960                                if(fixedUp != cf)
1961                                    {
1962                                    cf = fixedUp;
1963                                    _update(rwl, cf, logger);
1964                                    }
1965                                }
1966    
1967                            } while(false);
1968                        } // Drop out of metaData lock...
1969                    } // Drop cache write lock.
1970                finally { rwl.writeLock().unlock(); }
1971    
1972    //            // Return if request completely satisfied...
1973    //            if(len == 0) { return; }
1974    
1975                // Can we (now) satisfy any initial portion of the
1976                // request with data that we have in our cache?
1977                // This might be the case now because we just did a fix-up
1978                // or because the cache was extended while we were waiting
1979                // to acquire a write lock.
1980                // (This could happen with the cache read-only.)
1981                final int canDo = _readRawDataStartFromCache(rwl,
1982                                                         name,
1983                                                         dataStart,
1984                                                         cacheDir,
1985                                                         buf,
1986                                                         logger,
1987                                                         statsIDSCGEN);
1988                // If we managed to read from the "found" data
1989                // then we can return what we have immediately!
1990                // This avoids an upstream fetch for now
1991                // and might end up being all that is required.
1992                if(canDo > 0) { return; }
1993    
1994                // As long as:
1995                //   * the cache is not read-only,
1996                //   * we have an entry for this file in the cache,
1997                //   * the current request follows on from the cached data
1998                //     (or we can extend the request backwards a little to follow on from cached data),
1999                //   * we can cache locally (or there is lots of space free),
2000                // then we can fetch the next portion of the data from upstream
2001                // (NOT within the scope of any lock, ie NOT blocking other access)
2002                // and then write it into our cache (and return it to the caller too).
2003                CachedFile cf;
2004                if(isReadWrite() &&
2005                   ((cf = exhibitGetInfo(name)) != null) &&
2006                   (dataStart <= cf.cachedLength+(MAX_EXTD_TRANSFER_CHUNK_SIZE-MAX_TRANSFER_CHUNK_SIZE)) &&
2007                   (!dontCacheLocally || lotsFree(cacheDir)))
2008                    {
2009                    final long fetchStart = cf.cachedLength;
2010                    assert(fetchStart <= Integer.MAX_VALUE);
2011    
2012                    // Compute what initial portion we are prepared to cache of any one exhibit.
2013                    // Limited to a smallish fraction of total cache size,
2014                    // but a decent minimum chunk if no explicit local cache size has been set.
2015                    final int max_cacheable_bytes_per_exhibit = _getMaximumCacheableBytesForOneExhibit(gp);
2016    
2017                    // We will extend the cache entry
2018                    // by at most MAX_USER_READ_SIZE bytes at a time
2019                    // and we will round up small residues to
2020                    // MAX_TRANSFER_CHUNK_SIZE for efficiency if possible.
2021    
2022                    // We allow a small hole just before the current request
2023                    // to be backfilled so that we can cache the data fetched.
2024    
2025                    // Now impose the relevant limits...
2026                    // We impose the lowest of several potential limits.
2027                    final long maxExtendTo =
2028                        // No more than larger of a max-transfer-size block or the request length,
2029                        // but capped by the maximum user transfer size,
2030                        // starting from the caller's requested start.
2031                        Math.min(dataStart + Math.min(Math.max(len, MAX_TRANSFER_CHUNK_SIZE),
2032                                                      SimpleExhibitPipelineIF.MAX_USER_READ_SIZE),
2033    
2034                        // No more than the max-extended transfer from the current cache start.
2035                        Math.min(fetchStart + MAX_EXTD_TRANSFER_CHUNK_SIZE,
2036    
2037                        // Limit to a valid +ve int size.
2038                        Math.min(Integer.MAX_VALUE,
2039    
2040                        // Limit to no more than the actual exhibit length
2041                        // or the maximum initial portion of any exhibit that can be cached.
2042                        Math.min(esa.length,
2043                                 max_cacheable_bytes_per_exhibit))));
2044                    // How much would we extend the cache entry by?
2045                    final int extendBy = (int) (maxExtendTo - fetchStart);
2046                    assert(extendBy >= 0);
2047    
2048                    // Fetch data from upstream to cache locally
2049                    // if we have enough space for the data...
2050                    // Indicate to other threads that we are extending this exhibit.
2051                    final Object exhibitExtendKey = new String(esa.getCharSequence().toString()); // Create new String instance as a unique key.
2052                    if(_beingExtended.put(exhibitExtendKey, new Long(System.currentTimeMillis())) != null)
2053                        {
2054                        logger.log("[ExhibitDataSimpleCache: WARNING: exhibit already being extended by another thread: "+esa+".]"); // Probably should not happen often.
2055                        }
2056                    try
2057                        {
2058                        // Briefly grab a cache lock and purge some old data
2059                        // if we don't seem to have enough space to grow the exhibit.
2060                        if((extendBy > 0) && (computeFreeSpaceBelowHighWaterMark(cacheDir) < extendBy))
2061                            {
2062                            _getWriteLock(rwl, "exhibitRead()-purge", logger); // Grab both locks in right order...
2063                            try
2064                                {
2065                                synchronized(this)
2066                                    { purge(rwl, gp, cacheDir, extendBy, esa.getExhibitFullName(), logger, statsIDSCGEN); }
2067                                }
2068                            finally { rwl.writeLock().unlock(); }
2069                            }
2070    
2071                        if((extendBy > 0) && (computeFreeSpaceBelowHighWaterMark(cacheDir) >= extendBy))
2072                            {
2073                            // Note raw data cache miss...
2074                            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHERAWDATAMISS);
2075    
2076                            // Get the data from upstream
2077                            // WITHOUT ANY CACHE LOCK HELD.
2078    
2079                            // Create a short-lived buffer (as non-direct).
2080                            final ByteBuffer tmpbuf = ByteBuffer.allocate(extendBy);
2081    
2082                            // We may allow a direct fetch from our peers
2083                            // rather than from upstream
2084                            // so as to lighten the load on the master.
2085                            if(ALLOW_DATA_FETCH_FROM_PEERS &&
2086                               (fetchFromPeer != null))
2087                                {
2088                                // Create temporary tunnel to the selected peer.
2089                                final String url = "http://" +
2090                                    LoadBalancingUtils.makeMirrorNameFromTag(fetchFromPeer) +
2091                                    CoreConsts.TUNNEL_URI;
2092                                // Suppress logging by these short-lived tunnels...
2093                                final SimpleLoggerIF fakeLogger = GenUtils.nullLogger;
2094                                final ExhibitDataHTTPTunnelSource tempTunnel =
2095                                    new ExhibitDataHTTPTunnelSource(url, fetchFromPeer, fakeLogger);
2096                                // Get the data or abort with an IOException...
2097                                try
2098                                    {
2099                                    tempTunnel.getRawFile(tmpbuf,
2100                                                          esa.getExhibitFullName(),
2101                                                          (int) fetchStart,
2102                                                          dontCacheUpstream);
2103                                    // Log the (non-aborted) fetch.
2104                                    StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_DATAFETCHFROMPEER_PREFIX+fetchFromPeer);
2105                                    }
2106                                finally
2107                                    { tempTunnel.destroy(); }
2108                                }
2109                            else // Fetch the data from our upstream source...
2110                                { upstream.getRawFile(tmpbuf, esa.getExhibitFullName(), (int) fetchStart, dontCacheUpstream); }
2111    
2112                            if(tmpbuf.position() == 0)
2113                                {
2114                                // We failed to fetch anything...
2115                                return;
2116                                }
2117    
2118                            // Prepare to read the data fetched from upstream.
2119                            tmpbuf.flip();
2120    
2121                            // Grab a write lock and cache the data fetched from upstream.
2122                            // Re-acquire both locks in the right order.
2123                            _getWriteLock(rwl, "exhibitRead()-data-save", logger);
2124                            try
2125                                {
2126                                synchronized(this)
2127                                    {
2128                                    // Check that the cache has not changed
2129                                    // too radically for this fetched data to be usable.
2130                                    final CachedFile newEntry = exhibitGetInfo(name);
2131                                    if((newEntry != null) &&
2132                                       (newEntry.cachedLength >= fetchStart) &&
2133                                       (newEntry.cachedLength < fetchStart + tmpbuf.remaining()) &&
2134                                       isReadWrite())
2135                                        {
2136                                        cf = newEntry.extendCacheFile(cacheDir,
2137                                                    fetchStart,
2138                                                    tmpbuf);
2139                                        // Update our records.
2140                                        _update(rwl, cf, logger);
2141                                        }
2142                                    }
2143                                }
2144                            finally { rwl.writeLock().unlock(); }
2145    
2146                            // Now read what we can from the extended on-disc cache.
2147                            // We do this under the read lock.
2148                            _readRawDataStartFromCache(rwl,
2149                                                         name,
2150                                                         dataStart,
2151                                                         cacheDir,
2152                                                         buf,
2153                                                         logger,
2154                                                         statsIDSCGEN);
2155    
2156                            // Return with whatever we got (if anything).
2157                            return;
2158                            }
2159                        }
2160                    finally
2161                        {
2162                        synchronized(exhibitExtendKey)
2163                            {
2164                            // Wake up at any thread waiting for us to finish
2165                            // extending this exhibit.
2166                            // If this notification is lost then any waiting threads
2167                            // will soon wake/continue anyway.
2168                            exhibitExtendKey.notifyAll();
2169    
2170                            // Allow someone else to extend this exhibit...
2171                            _beingExtended.remove(exhibitExtendKey);
2172                            }
2173                        }
2174                    }
2175    
2176    //            // Return if request completely satisfied...
2177    //            if(len == 0) { return; }
2178    
2179                // Fall-through catch-all case:
2180                // If we have not managed to satisfy the request,
2181                // then note a raw-data cache miss,
2182                // and out of the scope of any lock,
2183                // directly satisfy the request from upstream.
2184                //
2185                // This should only happen for the tail of huge exhibits,
2186                // or if the cache is read-only and we can't extend/expand it,
2187                // or if requests have been made in a non-sequential order from 0,
2188                // or when a client needs to (re)fetch the tail of something we now don't have cached.
2189                // This is more likely to happen on the master than on slaves.
2190                StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHERAWDATAMISS);
2191    //if(cacheEverywhere && isReadWrite()) { logger.log("[ExhibitDataCache: WARNING: having to go upstream to complete request: dataStart/len/name=" + dataStart+"/"+len+"/"+name+"]"); }
2192                assert(buf.remaining() == maxLen); // No data read yet...
2193                upstream.getRawFile(buf, name, dataStart, dontCacheUpstream);
2194                assert(buf.remaining() <= maxLen); // Maybe we have fetched some data...
2195                }
2196    
2197            /**Fill as much as possible of the read request from disc cache, returning the number of bytes read; non-negative.
2198             * This is done grabbing a read lock on the entire cache,
2199             * but no exclusive lock so that we can do multiple concurrent reads,
2200             * ie so that we can queue concurrent reads at the OS/disc level
2201             * for maximum throughput.
2202             * <p>
2203             * Likely to be heavily used, so efficiency is important here.
2204             */
2205            private int _readRawDataStartFromCache(final ReentrantReadWriteLock rwl,
2206                                                   final Name.ExhibitFull name,
2207                                                   final int dataStart,
2208                                                   final File cacheDir,
2209                                                   final ByteBuffer buf,
2210                                                   final SimpleLoggerIF logger,
2211                                                   final StatsLogger.StatsConfig statsIDSCGEN)
2212                throws IOException
2213                {
2214                assert(dataStart >= 0);
2215    
2216                _getReadLock(rwl, "_readRawDataStartFromCache()", logger);
2217                try
2218                    {
2219                    // With cache locked read-only with main rwl lock
2220                    // then we don't need the MetaData instance lock,
2221                    // and not taking this allows concurrent disc reads.
2222                    final CachedFile cf = exhibitGetInfo(name);
2223    
2224                    // Can we satisfy some of the request from cache?
2225                    // This can happen with the cache read-only.
2226                    if((cf != null) && (dataStart < cf.cachedLength))
2227                        {
2228    //System.out.println("Satisfying leading part of exhibitRead() from cache r/o ["+dataStart+","+canDo+"] for " + name);
2229                        // Note raw data cache hit...
2230                        StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHERAWDATAHIT);
2231                        // Do the read as fast as possible; no timestamps...
2232                        final int posBefore = buf.position();
2233                        cf.getCachedData(cacheDir, dataStart, buf, true);
2234                        return(buf.position() - posBefore); // # bytes read.
2235                        }
2236                    }
2237                finally { rwl.readLock().unlock(); }
2238                return(0); // Couldn't read anything.
2239                }
2240    
2241            /**Thread-safe Map of which exhibits we are currently extending and when we started extending.
2242             * Used to help reduce the probability of redundant concurrent fetches
2243             * of the same data by multiple threads.
2244             * <p>
2245             * The key is a unique (new) String of the full exhibit name.
2246             * <p>
2247             * We notify on this key object instance when removing an entry.
2248             */
2249            private transient /*final*/ Map<Object, Long> _beingExtended =
2250                new Hashtable<Object, Long>();
2251    
2252    
2253            /**Get the cached thumbnails for an exhibit, or null if none cached.
2254             * This needs to access disc and so will need to
2255             * hold the main cache while it does so, grabbing
2256             * the metaData instance lock inside the main lock
2257             * where both need to be held simultaneously.
2258             * <p>
2259             * If a problem is encountered deserialising thumbnails,
2260             * the cache is not-read-only, and AUTO_REPAIR_DURING_IO is true,
2261             * we may zap the offending serialised file to try
2262             * to recreate it later.
2263             * <p>
2264             * We will need to hold a write lock if updating timestamps
2265             * when accessing the thumbnails; this may greatly reduce concurrency.
2266             *
2267             * @param rwl  the main cache lock object; must not be null
2268             *
2269             * @throws java.io.IOException  in case of serious problems
2270             *     accessing the on-disc cache state
2271             */
2272            ExhibitThumbnails exhibitGetThumbnails(final ReentrantReadWriteLock rwl,
2273                                                   final File cacheDir,
2274                                                   final Name.ExhibitFull name,
2275                                                   final SimpleLoggerIF logger)
2276                throws IOException
2277                {
2278    //System.out.println("MetaData.exhibitGetThumbnails(): requested to get thumbnail for: " + name);
2279    
2280                // Start by grabbing the right flavour of main cache lock.
2281                // We don't need a lock on this metadata instance,
2282                // and not taking one allows concurrent disc reads.
2283                if(THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP) { _getWriteLock(rwl, "exhibitGetThumbnails()", logger); }
2284                else { _getReadLock(rwl, "exhibitGetThumbnails()", logger); }
2285                try
2286                    {
2287                    // Do we know about this exhibit?
2288                    // Also mark exhibit entry as accessed
2289                    // if that is our policy.
2290                    final CachedFile cf = THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP ?
2291                        exhibitMarkAsAccessed(rwl, name, logger, cacheDir) : exhibitGetInfo(name);
2292    
2293                    if((cf == null) || (cf.tnBytes == 0))
2294                        { return(null); } // None cached.
2295    
2296                    // OK, try to get extant thumbnail,
2297                    return(cf.getThumbnails(cacheDir));
2298                    }
2299                finally
2300                    {
2301                    if(THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP) { rwl.writeLock().unlock(); }
2302                    else { rwl.readLock().unlock(); }
2303                    }
2304                }
2305    
2306            /**Saves the thumbnails for an exhibit.
2307             * Returns true if successful, false if not.
2308             * <p>
2309             * This will fail if the metaData is read only.
2310             * <p>
2311             * If the thumbnails argument is null, this will purge
2312             * any extant thumbnails for this exhibit.
2313             * <p>
2314             * This will create a new zero-length exhibit entry if
2315             * necessary.
2316             * <p>
2317             * Since these failures will be by returning false
2318             * rather than by causing an exception then this
2319             * can be used as a gentle unconditional way to make sure that
2320             * an entry does exist whether or not one did before.
2321             * <p>
2322             * This needs to access disc and so will need to
2323             * hold the main lock while it does so, grabbing
2324             * the metaData instance lock inside the main lock
2325             * where both need to be held simultaneously.
2326             * <p>
2327             * Note that this will have to grab a write lock.
2328             *
2329             * @param rwl  the main lock object; must not be null
2330             * @param esa  the basic info on the exhibit to have an entry created
2331             *
2332             * @throws java.io.IOException  in case of serious problems
2333             *     accessing the on-disc cache state
2334             */
2335            boolean exhibitSaveThumbnails(final ReentrantReadWriteLock rwl,
2336                                          final GenProps gp,
2337                                          final File cacheDir,
2338                                          final ExhibitStaticAttr esa,
2339                                          final ExhibitThumbnails tns,
2340                                          final SimpleLoggerIF logger,
2341                                          final StatsLogger.StatsConfig statsIDSCGEN)
2342                throws IOException
2343                {
2344                assert (rwl != null) && (gp != null) && (cacheDir != null) && (esa != null);
2345    
2346                // Fail gently if read-only.
2347                if(!isReadWrite()) { return(false); }
2348    
2349    if(TRACE_THUMBNAIL_ACTIVITY && ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) { (new Throwable("asked to save NO_THUMBNAILS")).printStackTrace(); }
2350    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("MetaData.exhibitSaveThumbnails(): requested to "+(tns==null?"remove":(ExhibitThumbnails.NO_THUMBNAILS.equals(tns)?"save NO_THUMBNAILS":"save"))+" thumbnails for: " + esa.getCharSequence() + ": "+tns); }
2351    
2352                // Grab both locks in correct order.
2353                _getWriteLock(rwl, "exhibitSaveThumbnails()", logger);
2354                try
2355                    {
2356                    synchronized(this)
2357                        {
2358                        // Do we know about this exhibit?
2359                        CachedFile cf = exhibitGetInfo(esa.getExhibitFullName());
2360    
2361                        if(cf == null)
2362                            {
2363                            // Apparently would be a new exhibit...
2364    
2365                            // If asked to purge thumbnails, nothing to do!
2366                            if(tns == null) { return(true); } // Easy!
2367    
2368                            // Make space for new cache entry...
2369                            purge(rwl,
2370                                 gp,
2371                                 cacheDir,
2372                                 0,
2373                                 esa.getExhibitFullName(),
2374                                 logger, statsIDSCGEN);
2375    
2376                            // If asked to save thumbnails
2377                            // then we have to make a new cache entry.
2378                            if(!exhibitCreateNewCacheEntry(rwl,
2379                                                           cacheDir,
2380                                                           esa,
2381                                                           logger,
2382                                                           statsIDSCGEN))
2383                                { return(false); } // Could not make new entry.
2384                            }
2385    
2386                        // Mark the cache entry as accessed
2387                        // (since we want to avoid discarding what we just saved!)
2388                        cf = exhibitMarkAsAccessed(rwl, esa.getExhibitFullName(), logger, THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP ? cacheDir : null);
2389                        assert(cf != null);
2390    
2391                        // If zapping thumbnails,
2392                        // or if they already exist,
2393                        // zap them now.
2394                        if((tns == null) || cf.hasThumbnails())
2395                            { cf = cf.zapThumbnails(cacheDir); }
2396    
2397                        // If saving new thumbnails...
2398                        if(tns != null)
2399                            {
2400                            // Make space for new thumbnails,
2401                            // using worst-case size estimate.
2402                            purge(rwl,
2403                                 gp,
2404                                 cacheDir,
2405                                 Math.max(1, ExhibitThumbnails.MAX_BYTES_EST),
2406                                 esa.getExhibitFullName(),
2407                                 logger, statsIDSCGEN);
2408    
2409                            // Now actually do the save.
2410                            cf = cf.saveThumbnails(cacheDir, tns);
2411                            }
2412    
2413                        // Save updated metadata...
2414                        _update(rwl, cf, logger);
2415    
2416                        return(true); // Done!
2417                        }
2418                    }
2419                finally { rwl.writeLock().unlock(); }
2420                }
2421    
2422    
2423    
2424    
2425            /**My serial ID. */
2426            private static final long serialVersionUID = -7596195262594968694L;
2427    
2428            /**Write out a less-redundant form of our internal information.
2429             * Prevent (lazy-evaluation-caused) state changes while serialising
2430             * by being synchronized.
2431             * <p>
2432             * The more-efficient on-the-wire format also makes defensive
2433             * reading easier.
2434             */
2435            private synchronized void writeObject(final ObjectOutputStream oos)
2436                throws IOException
2437                {
2438                // Write the fields that we are not trying to optimise.
2439                // Note that this includes our length field.
2440                oos.defaultWriteObject();
2441    
2442                // Send the primary map as a count and then the CachedFile objects
2443                // in name-sorted order to attempt to enhance compression
2444                // (by improving locality, with similar items close to one another).
2445                final int size = cachedExhibits.size();
2446                final CachedFile outValues[] = new CachedFile[size];
2447                cachedExhibits.values().toArray(outValues);
2448                Arrays.sort(outValues, (new Comparator<CachedFile>(){
2449                    public final int compare(final CachedFile o1, final CachedFile o2)
2450                        { return(TextUtils.compare(o1.name, o2.name)); }
2451                    }));
2452                oos.writeInt(size);
2453                for(int i = size; --i >= 0; )
2454                    { oos.writeObject(outValues[i]); }
2455                }
2456    
2457            /**Deserialise. */
2458            private synchronized void readObject(final ObjectInputStream in)
2459                throws IOException, ClassNotFoundException
2460                {
2461                in.defaultReadObject();
2462    
2463                // Defensively (and efficiently) read our primary map.
2464                final int size = in.readInt();
2465                if(size < 0)
2466                    { throw new InvalidObjectException("bad stream: cachedExhibits.size() < 0"); }
2467                // Make suitably-sized new map.
2468                cachedExhibits = new Hashtable<Name.ExhibitFull, CachedFile>(size * 2 + 1);
2469                // Read in entries from stream.
2470                for(int i = size; --i >= 0; )
2471                    {
2472                    final CachedFile cf = (CachedFile) in.readObject();
2473                    cachedExhibits.put(cf.name, cf);
2474                    }
2475                assert(size == cachedExhibits.size()); // Should be no duplicates.
2476    
2477                // Update derived data.
2478                _recomputeDerivedValues();
2479    
2480                // Nothing currently being extended.
2481                _beingExtended = new Hashtable<Object, Long>();
2482    
2483                validateObject(); // Validate state immediately.
2484                }
2485    
2486            /**Validate fields/state.
2487             * Called in the constructor and possibly after de-serialising.
2488             * <p>
2489             * Barf if something bad is found.
2490             * (Maybe allow some extra info in debug version.)
2491             * <p>
2492             * Assumes _recomputeDerivedValues() has been called
2493             * before this is called since deserialisation or construction.
2494             */
2495            public synchronized void validateObject()
2496                throws InvalidObjectException
2497                {
2498                // Check that all components are sane and safe.
2499    
2500                if(_beingExtended == null)
2501                    { throw new InvalidObjectException("bad object: _beingExtended == null"); }
2502    
2503                // cachedExhibits must be a Map from String to CachedFile.
2504                if(cachedExhibits == null)
2505                    { throw new InvalidObjectException("bad object: cachedExhibits == null"); }
2506                // Check that the Map entries are correct and consistent.
2507                for(final Name.ExhibitFull name : cachedExhibits.keySet())
2508                    {
2509    //                if(!ExhibitName.validNameSyntax(name))
2510    //                    { throw new InvalidObjectException("bad object: cachedExhibits has invalid name as key"); }
2511                    final CachedFile cf = cachedExhibits.get(name);
2512                    if((cf == null) || !name.equals(cf.name))
2513                        { throw new InvalidObjectException("bad object: cachedExhibits has invalid value"); }
2514                    }
2515    
2516                // Do some simple validation of derived (transient) values.
2517                if(totalBytes < 0)
2518                    { throw new InvalidObjectException("bad object: totalBytes < 0"); }
2519                if(totalBytesHighWaterMark < 0)
2520                    { throw new InvalidObjectException("bad object: totalBytesHighWaterMark < 0"); }
2521                if((exhibitsLRU == null) || (exhibitsLRU.size() != cachedExhibits.size()))
2522                    { throw new InvalidObjectException("bad object: exhibitsLRU is invalid"); }
2523                }
2524    
2525            /**Cache of estimate of full-cached exhibits; non-negative.
2526             * Private to getFullyCachedCount().
2527             * <p>
2528             * Marked volatile to allow thread-safe lock-free access.
2529             * <p>
2530             * Initially zero to force recomputation.
2531             * <p>
2532             * Not serialised.
2533             */
2534            private transient volatile int _fullyCachedCount;
2535    
2536            /**Return estimated count of known-fully-cached exhibits; non-negative.
2537             * It would be wise to force recomputation when a new AEP is loaded,
2538             * or after a precacheing round is complete,
2539             * or after any other major change in cache status.
2540             * <p>
2541             * Note that since the cached count is not serialised with the metadata
2542             * then it will need to be recomputed when metadata is loaded/deserialised.
2543             * <p>
2544             * Even without a recomputation being forced,
2545             * we may decide to do one if it seems that the value might be stale.
2546             * <p>
2547             * This will take time O(n) for an n-exhibit AEP to (re)compute its result,
2548             * but does minimal or no locking, and none for the duration of the method,
2549             * so is safe to launch in a background thread for example.
2550             * <p>
2551             * May be expensive (continually recomputing) if/while no exhibit
2552             * is fully locally cached, but this is assumed to be unlikely in practice.
2553             *
2554             * @param aep  if non-null, allow recomputation against this AEP
2555             * @param force  if true, force immediate recomputation against this AEP if non-null
2556             *
2557             * @return the estimate of exhibits whose data is entirely cached locally;
2558             *     the count does not take account of locally-recreatable data
2559             *     such as thumbnails,
2560             *     and may be too low or too high
2561             */
2562            int getFullyCachedCount(final AllExhibitProperties aep,
2563                                    final boolean force)
2564                {
2565                final int fCC = _fullyCachedCount;
2566    
2567                // Recompute if an AEP is available (non-null) and:
2568                //   * the 'force' parameter is true, OR
2569                //   * the cached value is zero or clearly invalid/stale.
2570                if(aep != null)
2571                    {
2572                    if(force || (fCC < 1) || (fCC > aep.aeid.length))
2573                        {
2574                        int newFCC = 0;
2575                        for(final Name.ExhibitFull name : aep.aeid.getAllExhibitNamesSorted())
2576                            {
2577                            final CachedFile info = this.exhibitGetInfo(name);
2578                            if(info == null) { continue; }
2579                            final ExhibitStaticAttr staticAttr = aep.aeid.getStaticAttr(name);
2580                            if(staticAttr == null) { continue; }
2581                            if(info.cachedLength != staticAttr.length) { continue; }
2582                            ++newFCC;
2583                            }
2584                        // Save the newly-computed value.
2585                        _fullyCachedCount = newFCC;
2586                        // Return the new value.
2587    if(IsDebug.isDebug) { System.out.println("INFO: fully-cached exhibits: " + newFCC); }
2588                        return(newFCC);
2589                        }
2590                    }
2591    
2592                // Return the cached answer.
2593                return(fCC);
2594                }
2595            }
2596    
2597        /**In-memory copy of whole-cache meta-data; never null.
2598         * Note that the read/write status may change at any time.
2599         * <p>
2600         * The instance is never replaced; the state is replaced
2601         * in-situ if need be to ensure that we never have two
2602         * instances of this that believe they control the disc cache.
2603         */
2604        private final MetaData metaData;
2605    
2606        /**Return directly the number of partly- or fully- cached exhibits; never negative.
2607         * This may be more than the number of exhibits,
2608         * for example before deleted/renamed exhibits are removed.
2609         */
2610        public int getLiveCachedExhibitCount()
2611            {
2612            return(metaData.size());
2613            }
2614    
2615    
2616        /**Get the an instance copy of this class; may be a singleton.
2617         * If operating as a singleton then this
2618         * creates an instance on the first call; all subsequent requests/calls
2619         * are vetoed (at least in this servlet context and thus namespace)
2620         * unless the cacheDir matches that for the extant instance
2621         * in which case the new dataSource is ignored and the extant cache
2622         * instance is returned.
2623         * <p>
2624         * If the upstream source is an ExhibitDataFileSource
2625         * then this instance may assume that data access from the ExhibitDataFileSource
2626         * is only slightly more expensive than accessing its own local cache
2627         * (accessing the file source may involve powering-up bulk storage).
2628         * This will typically be the case on the master for example.
2629         *
2630         * @throws java.lang.IllegalStateException  if this is a singleton,
2631         *     and a request to create with a different cache dir to an
2632         *     extant instance is made
2633         *
2634         * @throws IOException  if cache directory does not exist
2635         *     and/or cannot be created
2636         *     (the containing directory passed in must always exist)
2637         */
2638        public static ExhibitDataSimpleCache cacheFactory(
2639                                        final SimpleExhibitPipelineIF dataSource,
2640                                        final File cacheDir,
2641                                        final SimpleLoggerIF logger)
2642            throws IOException,
2643                   IllegalStateException
2644            { return(new ExhibitDataSimpleCache(dataSource, cacheDir, logger)); }
2645    
2646    
2647        /**Remove a persistent cache.
2648         * Pass in the same _cacheDir as for a call to cacheFactory(),
2649         * but this must not be called if a cache instance is using the cache.
2650         * <p>
2651         * Will not remove entries in cacheDir unrelated to the cache.
2652         * <p>
2653         * Do not use this lightly; it may discard gigabytes of useful state.
2654         *
2655         * @throws IOException in case of I/O error
2656         */
2657        public static void rmCache(final File cacheDir)
2658            throws IOException
2659            {
2660            if(cacheDir == null)
2661                { throw new IllegalArgumentException(); }
2662    
2663            // Potentially very expensive/significant event,
2664            // so always report it.
2665            System.out.println("ExhibitDataSimpleCache: removing cache below: " + cacheDir);
2666    
2667            final File baseDir = new File(cacheDir, CACHE_BASE_DIR);
2668            FileTools.rmRecursively(baseDir);
2669            }
2670    
2671    
2672    
2673        /**The upstream data source; never null. */
2674        private final SimpleExhibitPipelineIF source;
2675    
2676        /**The cache dir, else null.
2677         * If this is not a valid dir at class creation time we ensure
2678         * that we save a null here.
2679         */
2680        private final File cacheDir;
2681    
2682        /**The read/write lock for the whole cache except system variables; never null.
2683         * Any access that may update the cache state in memory or on disc
2684         * must be protected by a write lock.
2685         * <p>
2686         * Any access that may just read the cache need only have a read lock.
2687         * <p>
2688         * Most accesses may have to start by taking a write lock
2689         * (for example because they may have to fetch data from upstream
2690         * and insert it into the cache)
2691         * but can downgrade it to a read lock as soon as they know that they
2692         * will not be altering the cache at all or any further.
2693         * <p>
2694         * The main exception is any state component held in volatile fields.
2695         * <p>
2696         * Note that the variable store is internally thread-safe and does
2697         * not require protection by this lock.
2698         * <p>
2699         * Ideally we want performance (ie best throughput) rather than fairness,
2700         * but starvation of some users is not good.
2701         * <p>
2702         * We generally do not want lock attempts to block forever,
2703         * which means that we give up attempting to obtain a lock after
2704         * a given number of attempts (with a maximum time per attempt).
2705         * While this is intended to prevent to limit delays in the face of I/O problems,
2706         * this <em>may</em> rescue us from logic errors in extremis.
2707         */
2708        private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock(true);
2709    
2710        /**Maximum number of consecutive attempts to obtain lock for read or write; strictly positive. */
2711        private static final int MAX_LOCK_ATTEMPTS = 10;
2712    
2713        /**Stack offset for _getXXXXLock() to find caller's stack frame. */
2714        private static final int _gXL_offset = 3;
2715    
2716        /**Get the cache write lock, complaining/aborting if we have to wait for a long time.
2717         * @param rwl  cache lock; never null
2718         * @throws InterruptedIOException  if the thread is interrupted or locking is aborted
2719         */
2720        private static void _getWriteLock(final ReentrantReadWriteLock rwl,
2721                                          final String detail,
2722                                          final SimpleLoggerIF logger)
2723            throws InterruptedIOException
2724            {
2725            int lockAttemptsLeft = MAX_LOCK_ATTEMPTS;
2726            try
2727                {
2728                while(!rwl.writeLock().tryLock(30, TimeUnit.SECONDS))
2729                    {
2730                    final StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
2731                    final String caller =
2732                        ((stackTrace == null) || (stackTrace.length <= _gXL_offset) || (stackTrace[_gXL_offset] == null)) ? "[no trace]" :
2733                        (stackTrace[_gXL_offset].toString());
2734                    logger.log("[ExhibitDataSimpleCache: waiting to obtain write lock: "+rwl.writeLock()+" "+detail+": "+caller+" isWriteLocked()="+rwl.isWriteLocked()+" getReadLockCount()="+rwl.getReadLockCount()+"]");
2735                    if(--lockAttemptsLeft < 0) { throw new InterruptedIOException("too write many lock attempts"); }
2736                    }
2737                }
2738            catch(final InterruptedException x)
2739                {
2740                x.printStackTrace();
2741                throw new InterruptedIOException("ExhibitDataSimpleCache: interrupted while waiting to obtain write lock..." +detail+": " + x.getMessage());
2742                }
2743            }
2744    
2745        /**Get a cache read lock, complaining/aborting if we have to wait for a long time.
2746         * We complain sooner waiting for a read lock rather than the write lock,
2747         * since read locks are expected to be easier/quicker to obtain.
2748         *
2749         * @param rwl  cache lock; never null
2750         * @throws InterruptedIOException  if the thread is interrupted or locking is aborted
2751         */
2752        private static void _getReadLock(final ReentrantReadWriteLock rwl,
2753                                         final String detail,
2754                                         final SimpleLoggerIF logger)
2755            throws InterruptedIOException
2756            {
2757            int lockAttemptsLeft = MAX_LOCK_ATTEMPTS;
2758            try
2759                {
2760                while(!rwl.readLock().tryLock(11, TimeUnit.SECONDS))
2761                    {
2762                    final StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
2763                    final String caller =
2764                        ((stackTrace == null) || (stackTrace.length <= _gXL_offset) || (stackTrace[_gXL_offset] == null)) ? "[no trace]" :
2765                        (stackTrace[_gXL_offset].toString());
2766                    logger.log("[ExhibitDataSimpleCache: waiting to obtain read lock: "+rwl.writeLock()+" "+detail+": "+caller+"]");
2767                    if(--lockAttemptsLeft < 0) { throw new InterruptedIOException("too many read lock attempts"); }
2768                    }
2769                }
2770            catch(final InterruptedException x)
2771                {
2772                x.printStackTrace();
2773                throw new InterruptedIOException("ExhibitDataSimpleCache: interrupted while waiting to obtain read lock..." +detail+": " + x.getMessage());
2774                }
2775            }
2776    
2777    
2778        /**Wrap a new cache instance around a data source.
2779         * This is private so that we can enforce a singleton pattern
2780         * and avoid multiple simultaneous users of the underlying
2781         * file-based cache.
2782         * <p>
2783         * We try to load the cache meta-data and exhibit properties
2784         * from persisted copies.  We can survive without the exhibit
2785         * properties, but if we can't load our meta data the default
2786         * value we use is read-only so that we don't trust it until
2787         * it's been checked against disc, presumably in the background.
2788         *
2789         * @throws IOException  if cache directory does not exist
2790         *     and/or cannot be created
2791         *     (the containing directory passed in must always exist)
2792         */
2793        private ExhibitDataSimpleCache(final SimpleExhibitPipelineIF dataSource,
2794                                       final File cacheDir,
2795                                       final SimpleLoggerIF logger)
2796            throws IOException
2797            {
2798            if(dataSource == null)
2799                { throw new IllegalArgumentException("null source"); }
2800            if(cacheDir == null)
2801                { throw new IllegalArgumentException("null cacheDir"); }
2802            if(logger == null)
2803                { throw new IllegalArgumentException("null logger"); }
2804    
2805            final long startTime = System.currentTimeMillis();
2806    
2807            source = dataSource;
2808    
2809            this.logger = logger;
2810            statsIDSCGEN =
2811                new StatsLogger.StatsConfig("SIMPLECACHE-GENERAL",
2812                                            logger,
2813                                            false, // Only dump summaries...
2814                                            8 * 3600, // About every 8 hours.
2815                                            true); // Adaptive.
2816    
2817            // Set up our variable manager.
2818            varMgr = new PipelineVarMgr(dataSource, false);
2819    if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [1] "+(System.currentTimeMillis()-startTime)+".]"); }
2820    
2821            // Use the cache dir only if it looks reasonably plausible.
2822            final String message1 = "[ExhibitDataSimpleCache: cache dir = " + cacheDir + ", max cache size = " + TextUtils.sizeAsText(LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES(), true) + "]";
2823            logger.log(message1);
2824    if(IsDebug.isDebug) { System.out.println(message1); }
2825            if(!cacheDir.isDirectory())
2826                { throw new IOException("missing cache directory " + cacheDir); }
2827            this.cacheDir = cacheDir;
2828    
2829    if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [2] "+(System.currentTimeMillis()-startTime)+".]"); }
2830    
2831            // If things are still looking rosy then try to load the
2832            // exhibit properties and cache meta-data immediately...
2833            // We quietly give up if we have any problems with loading,
2834            // eg due to a corrupt file or a changed class definition.
2835    
2836            // Attempt to make our base cache directory if necessary.
2837            final File baseDir = new File(this.cacheDir, CACHE_BASE_DIR);
2838            if(!baseDir.isDirectory()) { baseDir.mkdirs(); }
2839            if(!baseDir.isDirectory())
2840                { throw new IOException("cannot make cache data store directory: " + baseDir); }
2841    
2842    
2843            // Load any persisted/cached event history data that we can find
2844            // *iff* our upstream source is not already 'local' and thus fast to access
2845            // which would make any cacheing here redundant.
2846            // Any such cached state may be too stale to be usable,
2847            // but if so then that will be dealt with automatically.
2848            // Absorb but report any errors encountered...
2849            // We may have to make the history dir, eg if this is the first run.
2850            if(!upstreamSourceIsLocal())
2851                {
2852                final File evhd = new File(cacheDir, EVENT_HISTORY_DIR);
2853                if(!evhd.isDirectory()) { evhd.mkdirs(); }
2854                // We need this done before we can do much else...
2855                varMgr.loadEventHistories(evhd, true);
2856                }
2857    if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [3] "+(System.currentTimeMillis()-startTime)+".]"); }
2858    
2859    
2860            // Initialise cache metadata state.
2861            MetaData md = new MetaData(); // Read-only empty cache info.
2862            // Try to load cached meta-data...
2863            try {
2864                final File f = new File(this.cacheDir, CACHE_METADATA_FILENAME);
2865                if(f.isFile() && f.canRead())
2866                    {
2867                    final MetaData tmpMd = (MetaData) FileTools.deserialiseFromFile(f, STORE_EXPROPS_GZIPED);
2868                    if(tmpMd != null)
2869                        {
2870                        md = tmpMd;
2871                        logger.log("[ExhibitDataSimpleCache: loaded metaData, exhibit count: "+tmpMd.size()+".]");
2872                        }
2873                    }
2874                }
2875            catch(final Exception e) { e.printStackTrace(); } // Complain!
2876            metaData = md; // Capture cache meta-data.
2877            if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [4] "+(System.currentTimeMillis()-startTime)+".]"); }
2878    
2879    
2880            // Try to load cached exhibit properties ASAP
2881            // (after everything else is set up and ready).
2882            // This may take significant time so is worth doing asynchronously.
2883            // We allow this to continue asynchronously after the constructor returns
2884            // to overlap with other work,
2885            // and we attempt to exclude polling upstream for a new AEP until it is finished.
2886            // We assume that this is significantly CPU-bound.
2887            // FIXME : exposes this not-completely-constructed instance to the pool thread, so move out of the thread to the factory method.
2888            ThreadUtils.computeIntensiveThreadPool.submit(new Runnable(){
2889                public final void run()
2890                    {
2891                    try {
2892                        final File f = new File(cacheDir, CACHE_EXPROPS_FILENAME);
2893                        if(f.isFile() && f.canRead())
2894                            {
2895                            logger.log("[ExhibitDataSimpleCache: reloading cached AEP from disc...]");
2896                            _gAEP_lock.lock();
2897                            try
2898                                {
2899                                final AllExhibitProperties aep =
2900                                    (AllExhibitProperties) FileTools.deserialiseFromFile(f, STORE_EXPROPS_GZIPED);
2901                                if(aep != null)
2902                                    {
2903                                    // Now cache in memory.
2904                                    _getAllExhibitProperties_postUpdate(aep, true);
2905                                    logger.log("[ExhibitDataSimpleCache: loaded AEP, exhibit count: "+aep.aeid.size()+".]");
2906                                    }
2907                                }
2908                            finally { _gAEP_lock.unlock(); }
2909                            }
2910                        }
2911                    catch(final Exception e) { e.printStackTrace(); } // Complain!
2912                    }
2913                });
2914    if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [5] "+(System.currentTimeMillis()-startTime)+".]"); }
2915    
2916    
2917            // If we loaded some exhibit properties but no metadata,
2918            // then this may indicate a nasty problem...
2919            // We may not be able to tell if the AEP has not yet completed loading,
2920            // so we do this test as late as possible.
2921            if(md.isEmpty() && (_AEP.aeid.size() > 0))
2922                {
2923                logger.log("WARNING: ExhibitDataSimpleCache: NOT loaded metaData.");
2924                // Leave metadata read-only, and check it ASAP...
2925                }
2926            // Else, if we being optimistic, mark meta-data read/write immediately.
2927            else if(ASSUME_LOADED_METADATA_OK)
2928                {
2929                md.setReadWrite(true);
2930                // Put off first metadata check until after warm-up as all is probably OK,
2931                // and there's no point doing this for a short-lived process anyway.
2932                _checkMetaData_notBefore = System.currentTimeMillis() +
2933                    (15 * 60 * 1000) + Rnd.fastRnd.nextInt(30 * 60 * 1000);
2934                }
2935    
2936            final long endTime = System.currentTimeMillis();
2937    if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons time "+(endTime-startTime)+".]"); }
2938    
2939            // A little logging of cache/filesystem state.
2940            final long fsSpaceUsable = FileTools.estimatedFreeSpaceBelowReserve(cacheDir, MetaData.MIN_FS_PERCENT_FREE);
2941            logger.log("INFO: ExhibitDataSimpleCache: cache filesystem ["+cacheDir+"] usable space (bytes): " + fsSpaceUsable);
2942    if(IsDebug.isDebug) { System.out.println("ExhibitDataSimpleCache: cache filesystem ["+cacheDir+"] usable space (bytes): " + fsSpaceUsable); }
2943            final long freeSpace = md.computeFreeSpaceBelowHighWaterMark(cacheDir);
2944            if(freeSpace < 0) { logger.log("WARNING: ExhibitDataSimpleCache: cache (over)full (bytes): " + (-freeSpace)); }
2945            else { logger.log("INFO: ExhibitDataSimpleCache: free space in cache (bytes): " + freeSpace); }
2946    if(freeSpace <= 0) { System.err.println("WARNING: ExhibitDataSimpleCache: cache full: " + freeSpace); }
2947    if(IsDebug.isDebug) { System.out.println("ExhibitDataSimpleCache: cache space free (bytes): " + freeSpace); }
2948            logger.log("INFO: ExhibitDataSimpleCache: estimated cache size (bytes): " + md.getTotalBytesCurrentlyUsedByCache());
2949    if(IsDebug.isDebug) { System.out.println("ExhibitDataSimpleCache: estimated cache size (bytes): " + md.getTotalBytesCurrentlyUsedByCache()); }
2950            }
2951    
2952        /**An AllExhibitProperties.ExhibitDataSource wrapping ourselves; never null. */
2953        private final AllExhibitProperties.ExhibitDataSource exhibitDataSource =
2954            (new AllExhibitProperties.ExhibitDataSource(){
2955                    @Override public final void getRawFile(final ByteBuffer buf, final ExhibitFull exhibitName, final int position)
2956                        throws IOException
2957                        { ExhibitDataSimpleCache.this.getRawFile(buf, exhibitName, position, false); }
2958                    /**Fully loaded if all in cache.
2959                     * False if exhibit not present or not full length.
2960                     */
2961                    @Override public final boolean isExhibitFullyLoaded(final ExhibitStaticAttr esa)
2962                        //throws IOException
2963                        {
2964                        assert(ExhibitDataSimpleCache.this != null);
2965                        assert(metaData != null);
2966                        return(metaData.exhibitIsFullyLoaded(esa));
2967                        }
2968                    });
2969    
2970    
2971        /**Maximum number of threads that may run in _asyncTNFetch() and other local discardable data read-ahead tasks; strictly positive.
2972         * We limit the amount of threading by:
2973         * <ul>
2974         * <li>The number of available CPUs (as sometimes this work may be CPU intensive).
2975         * <li>The likely limit on back-end connectivity (eg HTTP-connection-count limited)
2976         *     (given that this cache will often by upstream of an HTTP tunnel).
2977         * <li>Likely limits on local and upstream disc throughput.
2978         * <li>The likely strain on other resources, such as memory.
2979         * </ul>
2980         * This limit/count/cap should generally be &gt;1 since the work is mainly I/O bound
2981         * and may be subject to significant latency,
2982         * but should generally be not much more than (say)
2983         * half the maximum simultaneous outbound tunnel HTTP connection count
2984         * since overuse of concurrency for such connections may be vetoed anyway.
2985         */
2986        private static final int MAX_THREADS_aTWQ = 2; // Math.max(2, Math.min((ExhibitDataHTTPTunnelSource.MAX_CONCURRENT_CONNECTIONS/2), ThreadUtils.AVAILABLE_PROCESSORS));
2987    
2988        /**Maximum number of async thumbnail fetches to queue; strictly positive.
2989         * Enough to allow all of (say) one of the 'new' or 'best' pages' thumbnails to be queued.
2990         */
2991        private static final int MAX_QUEUED_TN_FETCHES = 128;
2992    
2993        /**Shared thread pool for I/O-bound activities (for thumbnail fetching).
2994         * Suitable for mainly-I/O-bound threads, thus we have a fixed thread limit.
2995         * This ceiling also protects upstream servers from excess load.
2996         * <p>
2997         * A limited amount of work can be queued,
2998         * but excess is handled by discarding the oldest queued items silently.
2999         * <p>
3000         * The threads in the pool are daemon threads,
3001         * so will not prevent the JVM from exiting.
3002         * <p>
3003         * All threads can time out (and thus release resources) when idle.
3004         */
3005        private final ThreadPoolExecutor discardableReadAheadTaskThreadPool =
3006            new ThreadPoolExecutor(Math.min(2, MAX_THREADS_aTWQ), MAX_THREADS_aTWQ,
3007                120L, TimeUnit.SECONDS, // Keep worker threads alive for 2 minutes...
3008                new ArrayBlockingQueue<Runnable>(MAX_QUEUED_TN_FETCHES), // Allow some work to be queued.
3009                new ThreadUtils.DaemonThreadFactory("ExhibitDataSimpleCache.discardableReadAheadTaskThreadPool", false),
3010                new ThreadPoolExecutor.DiscardOldestPolicy());
3011        { discardableReadAheadTaskThreadPool.allowCoreThreadTimeOut(true); }
3012    
3013    
3014        /**Get a chunk of the raw exhibit binary.
3015         * The call may return less than the the buffer capacity,
3016         * though will block until it has read at least one byte unless at EOF or for a zero-byte request;
3017         * this will be clear from the state of the buffer.
3018         * <p>
3019         * The name, start byte offset/position and a buffer to fill are supplied.
3020         *
3021         * @param position  must be non-negative and less than the exhibit size in bytes
3022         * @param dontCache  if true do not cache locally, unless we have lots of free space
3023         */
3024        public void getRawFile(final ByteBuffer buf,
3025                               final Name.ExhibitFull exhibitName, final int position,
3026                               final boolean dontCache)
3027            throws IOException
3028            {
3029            // Note inbound request for raw exhibit data.
3030            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXDATAREQIN);
3031            if(dontCache)
3032                { StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXDATAREQINDC); }
3033    
3034    //        try
3035    //            {
3036                final AllExhibitImmutableData aeid = _AEP.aeid;
3037                boolean succeeded = false;
3038                try
3039                    {
3040                    metaData.exhibitRead(rwl,
3041                        null, // From upstream/master.
3042                        cacheDir,
3043                        exhibitName,
3044                        source,
3045                        aeid,
3046                        genProps,
3047                        position,
3048                        buf,
3049                        dontCache ? Boolean.TRUE : Boolean.FALSE,
3050                        logger, statsIDSCGEN);
3051                    succeeded = true;
3052    
3053                    // Any (successful) activity against this cache instance spurs us into life.
3054                    if(!_userRequestedDataFromCache)
3055                        {
3056                        _userRequestedDataFromCache = true;
3057                        logger.log("INFO: user requested data from cache: precacheing may start");
3058                        }
3059                    }
3060                finally
3061                    {
3062                    // Be prepared to spin off a read-ahead thread for any uncached tail portion.
3063                    // Only do this if precacheing is currently allowed,
3064                    // and if this appears to be a normal data request from an end user,
3065                    // eg via the exhibit servlet.
3066                    // This prevents precache requests (ie not directly human-requested)
3067                    // from being wastefully routed through many intermediate servers
3068                    // and/or causing a huge avalanche of unnecessary activity.
3069                    //
3070                    // IF THE NORMAL FETCH FAILED (eg disc trouble, or master down)
3071                    // THEN RETRY THIS FETCH FROM A PEER TO ATTEMPT RECOVERY OF THE DATA.
3072                    // (This recovery attempt happens in the background, ie asynchronously.)
3073                    if(ALLOW_DATA_FETCH_FROM_PEERS && (!dontCache) &&
3074                       ((!succeeded) || metaData.canPrecacheExhibitData(cacheDir)))
3075                        {
3076                        final boolean peerFetchForRecovery = !succeeded;
3077    
3078                        final ExhibitStaticAttr esa = aeid.getStaticAttr(exhibitName);
3079                        final CachedFile cf = metaData.exhibitGetInfo(exhibitName);
3080    
3081                        // Only proceed if it looks like there is something useful to be done.
3082                        if((esa != null) && (cf != null))
3083                            {
3084                            // Compute maximum number of tail bytes of exhibit we could logically cache.
3085                            final int tailBytesStillCacheable =
3086                                (int) Math.max(0, Math.min(esa.length, _getMaximumCacheableBytesForOneExhibit(genProps)) - cf.cachedLength);
3087                            // Only actually do the precacheing if the end of user's request
3088                            // (and thus where we anticipate that the next one may start)
3089                            // is close to (or beyond) the end of what we have already cached.
3090                            // (Or if we are recovering from an error.)
3091                            if((tailBytesStillCacheable > 0) &&
3092                               (!succeeded || (position + 4*MAX_TRANSFER_CHUNK_SIZE >= cf.cachedLength)))
3093                                {
3094                                // Never block.
3095                                // Don't do this speculative work at all if the pool is full.
3096                                ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
3097                                    public final void run()
3098                                        {
3099                                        try
3100                                            {
3101                                            // Recheck just before running that this is still appropriate.
3102                                            if(!peerFetchForRecovery && !metaData.canPrecacheExhibitData(cacheDir)) { return; /* Abort. */ }
3103                                            final CachedFile cf2 = metaData.exhibitGetInfo(exhibitName);
3104                                            if((cf2 == null) || (cf2.cachedLength != cf.cachedLength)) { return; /* Abort. */ }
3105    
3106                                            // Cache from the end of what we have already cached,
3107                                            // regardless of the caller's start position.
3108                                            _getExhibitDataFromUpstreamToPrecache(esa,
3109                                                        aeid,
3110                                                        genProps,
3111                                                        cf.cachedLength,
3112                                                        Math.min(tailBytesStillCacheable, MAX_TRANSFER_CHUNK_SIZE),
3113                                                        peerFetchForRecovery); // Is this an error-recovery attempt?
3114                                            }
3115                                        catch(final IOException e)
3116                                            { /* Silently discard errors from this speculative work. */ }
3117                                       }
3118                                    });
3119                                }
3120                            }
3121                        }
3122                    }
3123    //            }
3124    //        finally
3125    //            {
3126    //            // We must not be holding a (write) lock on exit.
3127    //            // We log any held lock and break it and propagate an Error.
3128    //            // Of course, this should NEVER happen,
3129    //            // but the cost of checking for it is low, so we'll always check.
3130    //            if(rwl.isWriteLockedByCurrentThread())
3131    //                {
3132    //                rwl.writeLock().unlock();
3133    //                System.err.println("SEVERE ERROR: getRawFile(): holding write lock on exit from cache");
3134    //                throw new Error("SEVERE ERROR: getRawFile(): holding write lock on exit from cache");
3135    //                }
3136    //            }
3137            }
3138    
3139        /**Computes the maximum number of bytes to cache from (the start of) any one exhibit; strictly positive.
3140         * Ensures that no one exhibit can monopolise the entire cache,
3141         * but also that at least a small chunk of the start of any exhibit
3142         * <em>is</em> logically permitted,
3143         * <p>
3144         * No one exhibit is allowed to grow to more than a few percent of the cache space,
3145         * though this limit may only be checked at each point that an exhibit might be extended in cache.
3146         */
3147        static int _getMaximumCacheableBytesForOneExhibit(final GenProps gp)
3148            {
3149            assert(gp != null);
3150            return(Math.min(gp.getWEBSVR_MAX_CACHEABLE_EX_BYTES(), Math.max(MAX_EXTD_TRANSFER_CHUNK_SIZE,
3151                    (int) Math.min(LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() >> 5, Integer.MAX_VALUE))));
3152            }
3153    
3154        /**Get the static attributes for a given exhibit; null if no such exhibit.
3155         * We get this from our cache of the immutable data rather than
3156         * going to the source directly.
3157         * We don't block or hold any locks to fetch this.
3158         * <p>
3159         * Returns null if the named exhibit does not exist.
3160         */
3161        public final ExhibitStaticAttr getStaticAttr(final ExhibitFull name)
3162            //throws IOException
3163            {
3164            // Return value from cache without blocking...
3165            return(_AEP.aeid.getStaticAttr(name));
3166            }
3167    
3168        /**Gets all static exhibit data if its timestamp is not that specified.
3169         * If the time specified is negative the object will be returned unconditionally.
3170         * <p>
3171         * If no exhibits are currently installed then a default set with a zero
3172         * timestamp is returned.
3173         * <p>
3174         * If the caller's copy appears to be up-to-date (eg the oldStamp
3175         * matches that that would have been returned) null is returned.
3176         * <p>
3177         * We get this from our cache of the immutable data rather than
3178         * going to the source directly.
3179         * We don't block or hold any locks to fetch this.
3180         */
3181        public final AllExhibitImmutableData getAllExhibitImmutableData(final long oldStamp)
3182            //throws IOException
3183            {
3184            final AllExhibitImmutableData cached = _AEP.aeid;
3185    
3186            if((oldStamp < 0) || (oldStamp != cached.timestamp))
3187                { return(cached); }
3188    
3189            // Caller seems to have up-to-date copy already.
3190            return(null);
3191            }
3192    
3193        /**Gets set of all exhibit properties if its hash is not that specified.
3194         * If the hash specified is negative the object will be returned unconditionally.
3195         * <p>
3196         * If no exhibits are currently installed a default set with a zero
3197         * timestamp is returned.
3198         * <p>
3199         * If the caller's copy appears to be up-to-date (eg the oldHash
3200         * matches that that would have been returned) null is returned.
3201         * <p>
3202         * We get this from our cache rather than
3203         * going to the source directly.
3204         * We don't block or hold any locks to fetch this.
3205         */
3206        public AllExhibitProperties getAllExhibitProperties(final long oldHash)
3207            //throws IOException
3208            {
3209            final AllExhibitProperties cached = _AEP;
3210    
3211            if((oldHash < 0) || (oldHash != cached.longHash))
3212                { return(cached); }
3213    
3214            // Caller seems to have up-to-date copy already.
3215            return(null);
3216            }
3217    
3218        /**Private lock for _getAllExhibitProperties()/constructor to prevent re-entry and multiple concurrent AEP fetches. */
3219        private final ReentrantLock _gAEP_lock = new ReentrantLock();
3220    
3221        /**Minimum time before attempting to poll again for AEP while we don't have a real one loaded (ms). */
3222        private static final int MIN_AEP_POLL_TIME_UNTIL_LOADED_MS = LocalProps.fastStartMode() ? 9991 : 41291;
3223    
3224        /**Attempts to get all exhibit properties if our cached copy may be stale.
3225         * Because fetching/computing this value can take a very long time
3226         * (upwards of several tens of minutes)
3227         * we attempt to split the activity into two parts, and
3228         * have the actual computation/fetch done in the background,
3229         * and then an atomic post of the results back to the cache proper.
3230         * <p>
3231         * We also adaptively attempt to use an AEP diff fetch if one is available
3232         * (ie if the underlying connection is a tunnel).
3233         */
3234        private void _getAllExhibitProperties()
3235            {
3236            // Quit immediately if we're actually busy doing a fetch...
3237            if(_gAEP_lock.isLocked())
3238                { return; } // Still busy...
3239    
3240            // Normally we try polling for the AEP much more frequently
3241            // while we don't yet have a non-empty instance.
3242            final boolean aepNotYetLoaded = (_AEP.aeid.length == 0);
3243    
3244            final int targetMaxAEPAge =
3245                LocalProps.getServerSlowdownFactor() * genProps.getWEBSVR_MIN_EX_IMATTR_RECHECK_MS();
3246            // Absolute max time we are prepared to postpone recheck/poll.
3247            // Should be plenty of time to warm up (many minutes).
3248            final int absMaxAge = Math.max(21*60*1000, CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S/2);
3249            // Postpone somewhat if not wallowing in free memory or if (temporarily) conserving power.
3250            // TODO: postpone also if CPU is busy.
3251            final int maxAge = (MemoryTools.lotsFree() && !upstreamStratum.isUpstreamConserving() && !GenUtils.mustConservePower()) ?
3252                    targetMaxAEPAge : Math.max(targetMaxAEPAge, absMaxAge);
3253            // If we haven't yet loaded any AEP, poll quickly until we get one...
3254            if(System.currentTimeMillis() - _lastPollAEP <=
3255                (aepNotYetLoaded ? MIN_AEP_POLL_TIME_UNTIL_LOADED_MS : maxAge))
3256                { return; }
3257    
3258            // Use low-priority background thread to get AEP...
3259            // Use 'discardable' pool to avoid starting poll if we're busy with other work.
3260            // The poll is memory and , and CPU intensive at least in parts.
3261            ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable(){
3262                public final void run()
3263                    {
3264                    final long startTime = System.currentTimeMillis();
3265    
3266                    // If we can't get the lock immediately
3267                    // then give up immediately to try again later.
3268                    if(!_gAEP_lock.tryLock()) { return; }
3269                    try
3270                        {
3271                        // Postpone next fetch now just in case we fail to start new thread
3272                        // or the old one is still running, ie very slow.
3273                        _lastPollAEP = System.currentTimeMillis();
3274    
3275                        // Capture current AEP.
3276                        final AllExhibitProperties curAEP = _AEP;
3277    
3278                        try
3279                            {
3280                            final AllExhibitProperties new_AEP;
3281                            // Try to fetch new value; this might take a while or even fail...
3282                            logger.log("Polling for new AEP: current hash " + curAEP.longHash);
3283    //System.out.println("*** EDSC: POLLING FOR NEW AEP: current hash " + curAEP.longHash);
3284                            if(source instanceof ExhibitDataTunnelSource)
3285                                {
3286                                // New, potentially faster way to fetch AEP.
3287                                new_AEP = ((ExhibitDataTunnelSource) source).getAllExhibitProperties(curAEP, true);
3288                                }
3289                            else // Use generic mechanism.
3290                                {
3291                                new_AEP = source.getAllExhibitProperties(curAEP.longHash);
3292                                }
3293                            // Post the update back to the cache...
3294                            // even if the value is null
3295                            // so as to let us the save current values to disc.
3296                             _getAllExhibitProperties_postUpdate(new_AEP, false);
3297                            }
3298                        // If a transient error occurs then we won't post an update
3299                        // of the AEP value and will have to try again later.
3300                        catch(final InterruptedIOException e) // We'll note that an error occurred, but otherwise ignore it.
3301                            {
3302                            logger.log("[ExhibitDataSimpleCache: transient error trying to fetch and update AEP in background: " + e.getMessage() + ".]");
3303                            }
3304                        // If an error occurs then we won't post an update
3305                        // of the AEP value and will have to try again later.
3306                        catch(final Throwable e) // We'll note that an error occurred, with full trace, but otherwise ignore it.
3307                            {
3308                            logger.log("[ExhibitDataSimpleCache: error trying to fetch and update AEP in background: " + e.getMessage() + ".]");
3309                            e.printStackTrace();
3310                            }
3311                        finally
3312                            {
3313                            final long endTime = System.currentTimeMillis();
3314                            _lastPollAEP = endTime; // Schedule next poll.
3315    
3316                            // For the record note how long we took to get AEP
3317                            // unless it took very little time at all.
3318                            final long ms = endTime - startTime;
3319                            if(ms >= 10001) { logger.log("[ExhibitDataSimpleCache: fetch/update of AEP in background took "+ms+"ms.]"); }
3320                            }
3321    
3322                        }
3323                    finally { _gAEP_lock.unlock(); }
3324                    }
3325                });
3326            }
3327    
3328        /**Accepts a (new) AEP value posted from a background thread.
3329         * May be called at initialisation to reload cached state,
3330         * and when a poll of the upstream source returns a new AEP.
3331         * <p>
3332         * Must grab a write lock to (potentially) update/change the cache.
3333         * <p>
3334         * If the static exhibit data was stale then we also clear our in-memory
3335         * raw-exhibit data cache entirely, to be refilled by slower means.
3336         *
3337         * @param inCons  if true then this was called from the constructor
3338         *     so we don't save the AEP nor do some other expensive things
3339         *     that may rely on external mechanisms not yet set up
3340         */
3341        private void _getAllExhibitProperties_postUpdate(final AllExhibitProperties new_AEP,
3342                                                         final boolean inCons)
3343            throws IOException
3344            {
3345            // If the size of the new exhibit list is zero
3346            // then discard it on the grounds that it is almost certainly an error.
3347            if((new_AEP != null) && (new_AEP.aeid.size() == 0))
3348                {
3349                logger.log("[ExhibitDataSimpleCache: rejecting null/zero-size AEP update at "+(new Date())+".]");
3350                return;
3351                }
3352    
3353            // We only need to consider doing an update if we got
3354            // a non-null response.
3355            if(new_AEP != null)
3356                {
3357                // OK, we got an updated value.
3358    
3359                // Log the receipt of a new exhibit set.
3360                final String message = "[ExhibitDataSimpleCache: new AEP: " +
3361                    "old/new timestamp|count|hash: " +
3362                    (new Date(_AEP.aeid.timestamp)) + "|" + _AEP.aeid.length + "|" + _AEP.longHash +
3363                    " / " +
3364                    (new Date(new_AEP.aeid.timestamp)) + "|" + new_AEP.aeid.length + "|" + new_AEP.longHash +
3365                    " at " +(new Date()) + ".]";
3366                logger.log(message);
3367    
3368                // Just in case we've somehow been sent a duplicate of what we already have,
3369                // whinge loudly and discard it to avoid lots of unnecessary work downstream.
3370                if(new_AEP.equals(_AEP))
3371                    {
3372                    logger.log("ERROR: duplicate AEP received and discarding...");
3373                    System.err.println("ERROR: duplicate AEP received and discarded: " + message);
3374                    return;
3375                    }
3376    
3377                // We attempt to recover some expensive-to-recompute data...
3378                // This has to be done before any other AEP activity
3379                // else it may be blocked for security reasons
3380                // and any extant expensive-to-compute old EPCM may be lost.
3381                new_AEP.recoverOldExhibitPropsComputableMutableData(_AEP, logger);
3382    
3383                // Register the new AEP to be automatically compacted when memory is stressed.
3384                MemoryTools.registerCompactable(new_AEP);
3385    
3386                // OK, post the new cache-visible AEP value (atomically)...
3387                // This should cause some AEP-linked downstream caches to be cleared soon.
3388                _AEP = new_AEP;
3389                logger.log("[ExhibitDataSimpleCache: posted new AEP.]");
3390    
3391                // Compact the AEP now to try to free some memory before saving
3392                // and other work that will consume resources...
3393                new_AEP.compact();
3394                logger.log("[ExhibitDataSimpleCache: compacted AEP.]");
3395    
3396                // Do some speculative warm-up work if we have spare resources...
3397                // Always do this warm-up work if in fast-start mode
3398                // since this may be an especially-important customer-facing site
3399                // for which we want to minimise time to see the first page
3400                // after any AEP update.
3401                if(LocalProps.fastStartMode() || !GenUtils.mustConservePower())
3402                    {
3403                    // Try to bring the vote cache up-to-date
3404                    // using a pool thread to allow other activities to proceed.
3405                    // This is likely to be I/O bound and may take seconds-->minutes.
3406                    // If this doesn't run it's not fatal, just annoying to whoever waits for it.
3407                    ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
3408                        public final void run()
3409                            { try { new_AEP.updateVoteCache(varMgr, true); } catch(final Throwable t) { t.printStackTrace(); } }
3410                        });
3411    
3412                    // In a pool thread precompute/cache some often-used values.
3413                    // This should result in better performance seen by users.
3414                    // But if this doesn't run it's not fatal, just annoying to whoever waits for it.
3415                    ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable(){
3416                        public final void run()
3417                            {
3418                            new_AEP.getTotalExhibitBytes(); // Used on all HTML catalogue pages: high priority.
3419                            new_AEP.getCategoryExhibitCounts(); // Used on the front page: lower priority.
3420                            // Force recompute of 'fully-loaded' measure for this new AEP.
3421                            metaData.getFullyCachedCount(new_AEP, true);
3422                            }
3423                        });
3424    
3425                    logger.log("[ExhibitDataSimpleCache: warming up AEP...]");
3426                    }
3427    
3428                // Save new AEP to disc after posting to memory
3429                // so that even if the save causes an OOM we still see the new AEP!
3430                if(!inCons)
3431                    {
3432                    _save_AEP();
3433                    logger.log("[ExhibitDataSimpleCache: saved new AEP to disc, hash "+new_AEP.longHash+".]");
3434                    }
3435                }
3436            }
3437    
3438        /**Save AllExhibitProperties to disc.
3439         * We should do this when we receive a new set from downstream
3440         * so that we can restart with the appropriate set,
3441         * and periodically to save any cached state that we have
3442         * accumulated.
3443         * <p>
3444         * In principle this needs a write lock to alter state on disc.
3445         * In practice at most one thread at once will ever try to call this,
3446         * and the serialiseToFile() routine attempts to atomically replace the file,
3447         * and this can take a long time and thus needlessly block cache activity,
3448         * so we do not take a cache lock here.
3449         */
3450        private void _save_AEP()
3451            throws IOException
3452            {
3453    //        _getWriteLock(rwl, "_save_AEP()");
3454    //        try
3455                {
3456                // Replace any extant file atomically if possible.
3457                final AllExhibitProperties aep = _AEP;
3458                if((aep != null) && (aep.aeid.length > 0) &&
3459                   (cacheDir != null) &&
3460                   (cacheDir.isDirectory()) &&
3461                   (cacheDir.canWrite()))
3462                    {
3463                    // Abort at last moment if cache instance shut down.
3464                    if(destroyed) { return; }
3465    
3466                    final File f = new File(cacheDir, CACHE_EXPROPS_FILENAME);
3467    if(ORG.hd.d.IsDebug.isDebug) { logger.log("[_save_AEP() start: "+ System.currentTimeMillis() +".]"); }
3468                    FileTools.serialiseToFile(aep, f, STORE_EXPROPS_GZIPED, true);
3469    if(ORG.hd.d.IsDebug.isDebug) { logger.log("[_save_AEP() end:   "+ System.currentTimeMillis() +".]"); }
3470                    }
3471                }
3472    //        finally { rwl.writeLock().unlock(); }
3473            }
3474    
3475        /**Cached AllExhibitProperties; never null.
3476         * Volatile so that it can be safely accessed without a lock.
3477         */
3478        private volatile AllExhibitProperties _AEP = new AllExhibitProperties();
3479    
3480        /**Last time we polled for AllExhibitImmutableData; initially 'now' to postpone first poll.
3481         * Private to _getAllExhibitProperties().
3482         */
3483        private volatile transient long _lastPollAEP = System.currentTimeMillis();
3484    
3485    
3486        /**Time before which next _checkMetaData() call should not be initiated.
3487         * The initial check is usually put off a few minutes
3488         * since the system is often very busy on start-up,
3489         * and we don't expect significant problems anyway most of the time.
3490         * <p>
3491         * Volatile for thread-safe access without a lock.
3492         * <p>
3493         * Private to _checkMetaData() and _checkMetaData_postResults().
3494         */
3495        private volatile transient long _checkMetaData_notBefore;
3496    
3497        /**Private lock for _checkMetaData() to avoid starting more than one thread; non-null. */
3498        private final ReentrantLock _cMD_lock_ = new ReentrantLock();
3499    
3500        /**Initiates, in the background, a check of the in-memory cache meta data against disc.
3501         * Grabs a write lock on the cache and makes the cache read-only
3502         * while it works.
3503         * <p>
3504         * Refuses to do the check if it is too soon since the last one
3505         * or if the cache seems busy.
3506         * <p>
3507         * May postpone a check if the system is short of power or otherwise stressed.
3508         */
3509        private void _checkMetaData()
3510            {
3511            // Don't try to check the cache if there isn't one!
3512            if(cacheDir == null) { return; }
3513    
3514            // Check that it is not too soon for another check;
3515            // if so, quit for another try later.
3516            // Compute how many ms too early this call is...
3517            // This value will be positive when it is too early for another check
3518            // and then progressively more negative as the check is more and more overdue.
3519            final long tooEarlyMs = _checkMetaData_notBefore - System.currentTimeMillis();
3520            if(tooEarlyMs > 0)
3521                { return; } // Too soon; try again later!
3522    
3523            // We can postpone the check a long time (maybe several days)
3524            // if the system is (temporarily) short of power,
3525            // eg if running on solar PV then we may have to wait for the sun to come out...
3526            // This gives us a chance to run as soon as power is available.
3527            if(GenUtils.mustConservePower() &&
3528               (-tooEarlyMs < Math.max(CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS, 2*DISC_RECHECK_INTERVAL_MS)))
3529                { return; } // Try again later...
3530    
3531            // Don't try to start the check now if the cache seems busy.
3532            if((rwl.getReadLockCount() > 0) ||
3533               (rwl.isWriteLocked()))
3534                { return; }
3535    
3536            // If a run already seems to be in progress
3537            // then don't try to start a new one...
3538            if(_cMD_lock_.isLocked())
3539                { return; }
3540    
3541            // Start (I/O-bound) background thread for immediate execution if possible,
3542            // discardable so as never to block the calling poll() thread.
3543            // This starts unencumbered by locks.
3544            ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
3545                public final void run()
3546                    {
3547                    if(!_cMD_lock_.tryLock()) { return; } // Quit immediately if already in progress.
3548                    try
3549                        {
3550                        // Note when we start...
3551                        final long startTime = System.currentTimeMillis();
3552                        logger.log("[ExhibitDataSimpleCache: cache metadata check starting at "+(new Date(startTime))+".]");
3553    
3554                        try
3555                            {
3556                            // Compute the base dir of the exhibit data cache.
3557                            final File dataBaseDir = new File(cacheDir, CACHE_EXDATA_DIR);
3558                            // Try to make sure that the cache directory actually exists
3559                            // and is a directory and is writable;
3560                            // else zap it and try to remake it.
3561                            if(!dataBaseDir.isDirectory() || !dataBaseDir.canWrite())
3562                                {
3563                                dataBaseDir.delete();
3564                                dataBaseDir.mkdirs();
3565                                }
3566    
3567                            // Go and compute meta-data from what is on disc
3568                            // (and knowing what current exhibits to look for),
3569                            final MetaData newMd = new MetaData(_AEP, cacheDir, logger);
3570    
3571                            // Abort if cache is being shut down.
3572                            if(destroyed) { return; }
3573    
3574                            // Lock out other cache activity while we make sure
3575                            // that the meta-data is correct,
3576                            // or fix it if not.
3577                            // Grab both locks in correct order...
3578                            _getWriteLock(rwl, "_checkMetaData() thread", logger);
3579                            try
3580                                {
3581                                synchronized(metaData)
3582                                    {
3583                                    try
3584                                        {
3585                                        // Set working meta-data read-only just before we before
3586                                        // we cross-check in-memory and reconstructed versions.
3587                                        metaData.setReadWrite(false);
3588    
3589                                        // If the in-memory meta-data is empty,
3590                                        // mergeWithNewMetaData the new one if it is not empty.
3591                                        if(metaData.isEmpty())
3592                                            {
3593                                            if(!newMd.isEmpty())
3594                                                {
3595                                                logger.log("[ExhibitDataSimpleCache: loading in-memory cache metadata: exhibits/bytes = "+newMd.size()+"/"+newMd.getTotalBytesCurrentlyUsedByCache()+".]");
3596                                                metaData.mergeWithNewMetaData(newMd, logger);
3597                                                }
3598                                            }
3599                                        // ELSE if there is a serious inconsistency,
3600                                        // (and it's not just that the in-memory copy is empty)
3601                                        // complain and try to fix the in-memory copy.
3602                                        else if(!newMd.isEquivalent(metaData))
3603                                            {
3604                                            logger.log("[ExhibitDataSimpleCache: WARNING: on-disc and in-memory cache metadata inconsistent: attempting to fix.]");
3605                                            metaData.mergeWithNewMetaData(newMd, logger);
3606                                            }
3607                                        }
3608                                    finally
3609                                        {
3610                                        // OK, the cache should be sane now,
3611                                        // so make it read/write,
3612                                        // and allow normal cache operations to resume.
3613                                        metaData.setReadWrite(true);
3614                                        }
3615                                    }
3616                                }
3617                            finally { rwl.writeLock().unlock(); }
3618    
3619                            // Note when we finish main work...
3620                            final long endTimeMainWork = System.currentTimeMillis();
3621                            final long mainWorkTime = endTimeMainWork - startTime;
3622    
3623                            logger.log("[ExhibitDataSimpleCache: cache metadata check ended at "+(new Date(endTimeMainWork))+".]");
3624    
3625    // NOW NORMAL CACHE OPERATION CAN RESUME WHILE WE DO SOME OTHER TIDY-UP...
3626    
3627                            // Spend a proportion of the time that we spent checking the metadata
3628                            // now actually checking the data (if the AEP is not empty).
3629                            // Give up if we find that the cache has become write locked,
3630                            // ie if we may not be able to get in.
3631                            // Reduce this time considerably if (temporarily) conserving power,
3632                            // but don't entirely eliminate it to ensure that it gets done.
3633                            final long finishDataCheckBy = System.currentTimeMillis() + 1 +
3634                                (GenUtils.mustConservePower() ? (mainWorkTime/2) : (mainWorkTime*3));
3635                            final Set<Name.ExhibitFull> done = new HashSet<Name.ExhibitFull>();
3636                            if(!metaData.isEmpty())
3637                                {
3638                                    do
3639                                        {
3640                                        // Check one more cache entry for data corruption.
3641                                        _incrCheckMRUExhibitEntries(done);
3642                                        } while((System.currentTimeMillis() < finishDataCheckBy) &&
3643                                                !rwl.isWriteLocked() &&
3644                                                (done.size() < _AEP.aeid.length) /* Give up when no more to check. */ );
3645                                    }
3646                            logger.log("[ExhibitDataSimpleCache: cached exhibit files fully/partially validated: "+done.size()+"]");
3647    
3648                            // Remove orphaned temporary files
3649                            // and empty directories.
3650                            // Anything not modified for over a month is fair game.
3651                            // TODO: Zap any other crud that builds up.
3652                            final long old = System.currentTimeMillis() - (32 * 24 * 3600 * 1000L);
3653                            FileTools.rmRecursively(new File(cacheDir, CACHE_EXDATA_DIR),
3654                                new FileFilter(){
3655                                /**Tests whether or not the file with the specified abstract pathname should be removed.
3656                                 * If a directory then everything within it
3657                                 * is dealt with (eg possibly removed) first.
3658                                 * <p>
3659                                 * Only remove plain files that appear to be old
3660                                 * temporary files (starting with the temp prefix),
3661                                 * and empty dirs (can be recreated on demand).
3662                                 *
3663                                 * @param pathname The abstract pathname to be tested
3664                                 * @return true iff <code>pathname</code>
3665                                 *         should be deleted
3666                                 */
3667                                public final boolean accept(final File pathname)
3668                                    {
3669                                    // Must be "old" (not modified for ~1M).
3670                                    // This avoids races with other components
3671                                    // actively creating/using temporary files
3672                                    // and allows some grace time to allow for
3673                                    // glitches in the exhibit set, etc.
3674                                    if(pathname.lastModified() >= old)
3675                                        { return(false); }
3676    
3677                                    // Empty directories can be zapped.
3678                                    if(pathname.isDirectory())
3679                                        {
3680                                        final String[] files = pathname.list();
3681                                        if((files != null) && (files.length == 0))
3682                                            {
3683                                            logger.log("INFO: ExhibitDataSimpleCache: found empty cache directory for removal: " + pathname);
3684                                            return(true);
3685                                            }
3686                                        return(false); // Cannot zap this dir.
3687                                        }
3688    
3689                                    // Anything not a dir and not a plain file
3690                                    // is skipped/rejected.
3691                                    if(!pathname.isFile())
3692                                        { return(false); }
3693    
3694                                    // Temporary files start with a known prefix.
3695                                    if(pathname.getName().startsWith(FileTools.F_tmpPrefix))
3696                                        {
3697                                        logger.log("WARNING: ExhibitDataSimpleCache: found orphaned/foreign cached temporary file for removal: " + pathname);
3698                                        return(true);
3699                                        }
3700    
3701                                    // Whatever this is, we can't zap it...
3702                                    return(false);
3703                                    }
3704                                });
3705    
3706                            // Note when we finish all the work...
3707                            final long endTime = System.currentTimeMillis();
3708    
3709    if(IsDebug.isDebug) { logger.log("[ExhibitDataSimpleCache._checkMetaData(): took "+(endTime - startTime)+"ms.]"); }
3710    
3711                            // Compute a postponement long enough
3712                            // that we don't spend more than about
3713                            // 1% of system (CPU/IO) time in the scan,
3714                            // though bounded from above and below
3715                            // in case of unusual events.
3716                            final long nextDiscCacheRecheckDue =
3717                                endTime +
3718                                Math.min(DISC_RECHECK_INTERVAL_MS * 3L,
3719                                    Math.max(((endTime - startTime) * 100), // Max 1% of wall-clock time.
3720                                             DISC_RECHECK_INTERVAL_MS / 3L)) +
3721                                Rnd.fastRnd.nextInt(0xfff | (DISC_RECHECK_INTERVAL_MS / 8));
3722    
3723                            // Set the (volatile) 'do-not-try-again-before' value...
3724                            _checkMetaData_notBefore = nextDiscCacheRecheckDue;
3725    
3726                            logger.log("[ExhibitDataSimpleCache: next cache metadata check due not before "+(new Date(nextDiscCacheRecheckDue))+".]");
3727                            }
3728                        catch(final IOException e)
3729                            {
3730                            // Complain lots if we could not even reload the cache.
3731                            e.printStackTrace();
3732                            }
3733                        }
3734    //                catch(final IllegalStateException e)
3735    //                    {
3736    //                    e.printStackTrace();
3737    //                    }
3738                    finally { _cMD_lock_.unlock(); }
3739                    }
3740                });
3741            }
3742    
3743    
3744        /**Time before which not to to save metaData again; private to _cleanAndSaveMetaData(). */
3745        private transient long _saveMetaData_notBefore; // Initially zero for immediate save.
3746    
3747        /**Lock to prevent concurrent attempts to save metadata; non-null. */
3748        private final ReentrantLock _metadataSave_lock_ = new ReentrantLock();
3749    
3750        /**Saves the cache metadata if needed.
3751         * Grabs a write lock to update disc (and memory) state.
3752         * <p>
3753         * Aims to avoid saving the metaData more than once every
3754         * METADATA_MIN_SAVE_INTERVAL_MS, though if no save has taken
3755         * place for a while then the next save will happen on the
3756         * next call.
3757         * <p>
3758         * If saving the meta-data is taking a long time this aims
3759         * to postpone the next save at least a reasonable multiple of that
3760         * to avoid wasting too much system/CPU/disc bandwidth,
3761         * though we do put a cap on the maximum delay in case of weirdness...
3762         * <p>
3763         * This may also incrementally purge stale meta-data and data
3764         * just before the save to avoid the need for an extra meta-data save
3765         * to account for the purge-induced changes themselves.
3766         * <p>
3767         * This may also pick one or more exhibits at random to spot-check
3768         * for consistency with the master copy (eg looking for data corruption).
3769         *
3770         * @param force  if true, force an immediate save to disc before return,
3771         *     else run asynchronously (if possible, else discard) and never block
3772         */
3773        private void _cleanAndSaveMetaData(final boolean force)
3774            {
3775            // If no save is needed then return immediately.
3776            if(!metaData.getNeedsSave()) { return; }
3777    
3778            // Too soon to consider another save, so return.
3779            if(!force && (_saveMetaData_notBefore >= System.currentTimeMillis())) { return; }
3780    
3781            // Abort if apparently already in progress even if forcing...
3782            if(_metadataSave_lock_.isLocked()) { return; }
3783    
3784            // Attempt to run this (I/O-bound thread) immediately
3785            // or if not forcing a synchronous immediate save
3786            // then if need be discard the attempt so as never to block the poll() thread.
3787            final Runnable r = (new Runnable() { public void run() {
3788                if(!_metadataSave_lock_.tryLock()) { return; } // Save already in progress.
3789                try
3790                    {
3791    logger.log("Starting ExhibitDataSimpleCache metadata cleanup and save...");
3792    
3793                    // Attempt to clean the metadata before saving.
3794                    // If this produces an error,
3795                    // don't let it stop us doing the save anyway.
3796                    final long start = System.currentTimeMillis();
3797                    try
3798                        {
3799                        // Incrementally purge any orphaned entries...
3800                        _incrPurgeOrphanedExhibits();
3801                        // Incrementally test integrity of most-recently-used entries...
3802                        _incrCheckMRUExhibitEntries(null);
3803                        }
3804                    catch(final Exception e)
3805                        { e.printStackTrace(); } // Unexpected error, so log, but don't abort the save.
3806    
3807                    // Abort at last moment if cache instance shut down.
3808                    if(destroyed) { return; }
3809    
3810                    // Now actually save the cleaned-up metadata to disc.
3811                    try { metaData.saveToDisc(cacheDir, logger, statsIDSCGEN); }
3812                    catch(final IOException e) { e.printStackTrace(); } // Log error but don't abort.
3813    
3814                    // Since the metadata needed saving,
3815                    // then the fully-cached count may be stale.
3816                    metaData.getFullyCachedCount(_AEP, true);
3817    
3818                    // Postpone the next metadata save.
3819                    // Limit from above and below the time before the next save.
3820                    //
3821                    // Push the next save back a little longer if (temporarily) economising on power,
3822                    // though since this is semi-critical data
3823                    // (we can only partially reconstruct it if absent/corrupt)
3824                    // we save it more often than non-critical info.
3825                    final long end = System.currentTimeMillis();
3826                    final long took = end - start;
3827                    final long notBefore = System.currentTimeMillis() +
3828                        METADATA_MIN_SAVE_INTERVAL_MS +
3829                        Rnd.fastRnd.nextInt(1 + METADATA_MIN_SAVE_INTERVAL_MS/3) +
3830                        (GenUtils.mustConservePower() ? Math.max(CoreConsts.ASYNC_MIN_POWER_SAVE_NON_CRITICAL_DATA_FLUSH_MS/4, METADATA_MIN_SAVE_INTERVAL_MS) : 0) +
3831                        Math.min(79*took, DISC_RECHECK_INTERVAL_MS/11);
3832                    _saveMetaData_notBefore = notBefore;
3833    
3834    logger.log("Finished ExhibitDataSimpleCache metadata cleanup and save in "+took+"ms; next not before: "+(new Date(notBefore)));
3835                    }
3836                finally { _metadataSave_lock_.unlock(); }
3837                } });
3838    
3839            // Do the save blocking (when forced) or non-blocking.
3840            if(force) { r.run(); } // Blocking.
3841            else { ThreadUtils.nonCPUThreadPoolDiscardable.submit(r); } // Non-blocking.
3842            }
3843    
3844        /**Private iterator over all cached full exhibit names for _incrCheckMRUExhibitEntries().
3845         * Must only be accessed under a lock on the metadata object
3846         * to prevent concurrent/unsafe access to the iterator object.
3847         * The underlying data being iterated over is guaranteed not to change,
3848         * though may become stale wrt the metadata and cache,
3849         * so some items returned by the iterator may no longer be relevant.
3850         * <p>
3851         * May be null.
3852         * <p>
3853         * Marked transient to avoid being serialised.
3854         */
3855        private transient Iterator<Name.ExhibitFull> _iCMEE_iterator;
3856    
3857        /**Incrementally check cached exhibits for integrity.
3858         * This will attempt to remove any entry it finds that is corrupt.
3859         * <p>
3860         * This concentrates on the most-recently-used cache entries
3861         * as data corruption in these would probably be the most serious,
3862         * though may also attempt to systematically scan all cache entries.
3863         * <p>
3864         * This may examine any cached entry.
3865         * <p>
3866         * This may not examine any entry at all if the cache seems to be busy.
3867         * <p>
3868         * This will stop after removing at most one corrupt entry.
3869         *
3870         * @param done  if not null then this routine adds to this Set
3871         *     the full name any exhibit that it checks (just before checking)
3872         *     and avoid checking any exhibit in this Set;
3873         *     this need not be thread-safe for one unshared instance
3874         *
3875         * @throws IOException
3876         */
3877        private void _incrCheckMRUExhibitEntries(final Set<Name.ExhibitFull> done)
3878            throws IOException
3879            {
3880            final AllExhibitProperties aep = _AEP;
3881            final ExhibitStaticAttr esa;
3882    
3883            // We need only a cache read lock to find an exhibit to test.
3884            // We may then later need to grab a different lock to test data
3885            // or to remove corrupt data.
3886    
3887            // Grab both locks in correct order.
3888            // But give up if we can't get a cache lock immediately...
3889            if(!rwl.readLock().tryLock()) { return; }
3890            try
3891                {
3892                synchronized(metaData)
3893                    {
3894                    // We directly access the LRU list to get the most-recently-used
3895                    // (and thus, probably, the most-frequently-used)
3896                    // cached exhibits.
3897                    //
3898                    // We must hold the metaData instance lock
3899                    // while reading this LRU data,
3900                    // but we do *not* directly update it.
3901                    //
3902                    // The LRU data may change if we update/remove a corrupt entry.
3903                    final SortedSet<CachedFile> lru = metaData.exhibitsLRU;
3904                    int size = lru.size();
3905                    // If nothing to do then return immediately.
3906                    if(size < 1) { return; }
3907    
3908                    // Half the time get the next exhibit name from a comprehensive list.
3909                    if(Rnd.fastRnd.nextBoolean())
3910                        {
3911                        Iterator<Name.ExhibitFull> sit = _iCMEE_iterator;
3912                        // If the iterator is not live then attempt to create a new one.
3913                        if((sit == null) || !sit.hasNext())
3914                            {
3915                            // If the AEP is empty (no work to do) then return immediately.
3916                            if(aep.aeid.isEmpty()) { return; }
3917                            // Set iterator up with ALL the (full) exhibit names
3918                            // as a working cache should have some data for ALL exhibits.
3919                            // The iterator is over an immutable view to avoid accidents.
3920                            // The ordering is likely to be semi-random
3921                            // which should help avoid systematic failures to check thoroughly.
3922                            _iCMEE_iterator = sit = aep.aeid.getAllExhibitNamesSorted().iterator();
3923                            }
3924    
3925                        assert(sit.hasNext());
3926                        // If exhibit has gone away since iterator was created,
3927                        // then esa will be null and we'll skip any test attempt below...
3928                        esa = aep.aeid.getStaticAttr(sit.next());
3929                        }
3930                    // Pick amongst all exhibits, weighted towards MRU.
3931                    else
3932                        {
3933                        // Get as list of all cached exhibits, MRU last.
3934                        final List<CachedFile> cfLRU = new ArrayList<CachedFile>(lru);
3935                        // Remove any already inspected.
3936                        if((done != null) && !done.isEmpty())
3937                            {
3938                            cfLRU.removeAll(done);
3939                            // Recompute the size...
3940                            size = lru.size();
3941                            // If nothing to do then return immediately.
3942                            if(size < 1) { return; }
3943                            }
3944    
3945                        // Pick an entry, heavily weighted towards the MRU/popular,
3946                        // since the most recently/frequently used entries
3947                        // are assumed to be the most important to keep healthy.
3948                        //
3949                        // We work hard to distribute these checks well.
3950                        final int entry = size - 1 -
3951                            Rnd.goodRnd.nextInt(
3952                                Rnd.goodRnd.nextInt(
3953                                    Rnd.goodRnd.nextInt(size) + 1) + 1);
3954                        final CachedFile cf = cfLRU.get(entry);
3955                        esa = aep.aeid.getStaticAttr(cf.name);
3956                        }
3957                    }
3958                }
3959            finally
3960                { rwl.readLock().unlock(); }
3961    
3962            // If we have been handed a legitimate exhibit to inspect
3963            // then test it for validity
3964            // outside any locks as far as possible.
3965            if(esa != null)
3966                {
3967                if(done != null)
3968                    {
3969                    // Note that we are about to check this exhibit,
3970                    // and abort (return immediately) if it is already checked.
3971                    if(!done.add(esa.getExhibitFullName())) { return; }
3972                    }
3973    
3974                // Test validity of entry.
3975                _doCacheDataValidityTest(aep, esa);
3976                }
3977            }
3978    
3979        /**Do incremental purge of orphaned cache entries conditions are right.
3980         * Tries to grab a write lock to do its work;
3981         * if it can't get one immediately (ie the cache is busy)
3982         * then it returns immediately.
3983         * <p>
3984         * Doesn't attempt any purging if there is an empty exhibit set
3985         * or if there is no cache size currently set
3986         * since this cache may not even be properly initialised yet...
3987         * <p>
3988         * We clear at most one orphaned entry on each call.
3989         */
3990        private void _incrPurgeOrphanedExhibits()
3991            throws IOException
3992            {
3993            if(ORPHANED_EXHIBIT_EXPIRY_ALLOWED &&
3994                (_AEP.aeid.length > 0) &&
3995                (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0))
3996                {
3997                if(!rwl.writeLock().tryLock()) { return; }
3998                try
3999                    {
4000                    // We have run out of space to freely precache (new) exhibits,
4001                    // so a preemptive orphaned-exhibit purge may well be worthwhile.
4002                    if(!metaData.someFree(cacheDir))
4003                        {
4004                        // Now look for exhibits in the metadata not in the current exhibit set.
4005                        final Set<Name.ExhibitFull> orphaned = metaData.getKnownExhibits();
4006                        orphaned.removeAll(_AEP.aeid.getAllExhibitNamesSorted());
4007    
4008                        // There are some candidate orphaned exhibits.
4009                        if(orphaned.size() > 0)
4010                            {
4011    logger.log("[ExhibitDataSimpleCache: WARNING: orphaned exhibits in cache: " + orphaned.size() + ".]");
4012    
4013                            // Work out minimum time since last use that we will purge.
4014                            // Note that for a small cache with a fast turnover of exhibits,
4015                            // we may have to purge sooner than we'd consider ideal.
4016                            final long thresholdAccessTime;
4017                            synchronized(metaData)
4018                                {
4019                                final CachedFile leastRecentlyUsed = metaData.exhibitsLRU.first();
4020                                final CachedFile mostRecentlyUsed = metaData.exhibitsLRU.last();
4021                                // Make threshold for purge much closer to now than to LRU exhibit.
4022                                thresholdAccessTime = (leastRecentlyUsed.timestamp + 3*mostRecentlyUsed.timestamp) / 4;
4023                                }
4024                            final long purgeThreshold = Math.max(thresholdAccessTime,
4025                                System.currentTimeMillis() - ORPHANED_EXHIBIT_MIN_UNUSED_TIME_MS);
4026    
4027                            // Only consider removing any that have not been used/updated in a while.
4028                            for(final Name.ExhibitFull name : orphaned)
4029                                {
4030                                // Get info on cache entry, if any.
4031                                final CachedFile info = metaData.exhibitGetInfo(name);
4032    
4033                                // Found unloved orphan, so purge it.
4034                                if((info != null) && (info.getLastAccessed() < purgeThreshold) &&
4035                                    (info.getLastAccessed() != 0)) // Ensure that this is not broken/partial meta-data.
4036                                    {
4037                                    // Only remove one orphaned exhibit each time
4038                                    // so as to avoid taking too long,
4039                                    // thus stop if successful in removing this one.
4040                                    if(metaData.exhibitRemoveCacheEntry(rwl, cacheDir, name, false, logger, statsIDSCGEN))
4041                                        {
4042    logger.log("[ExhibitDataSimpleCache: completely removed orphaned exhibit from cache: " + name + ".]");
4043                                        break;
4044                                        }
4045                                    }
4046                                }
4047                            }
4048                        }
4049                    }
4050                finally { rwl.writeLock().unlock(); }
4051                }
4052            }
4053    
4054    
4055        /**Gets the general properties as a GenProps object if its timestamp is not that specified.
4056         * If the time specified is negative the object will be returned unconditionally.
4057         * <p>
4058         * If no fresh props have yet been fetched
4059         * then a default set with a zero timestamp is returned.
4060         * <p>
4061         * If the caller's copy appears to be up-to-date (eg the oldStamp
4062         * matches that that we would have been returned) null is returned.
4063         * <p>
4064         * We get this from our cache of the immutable data rather than
4065         * going to the source directly.
4066         * So we don't block or grab any lock to fetch this value.
4067         * <p>
4068         * We do not attempt to persist this data since carrying old GenProps values
4069         * across a restart may be a very poor idea.
4070         */
4071        public org.hd.d.pg2k.svrCore.props.GenProps getGenProps(final long oldStamp)
4072            /* throws IOException */
4073            {
4074            final GenProps cached = genProps;
4075    
4076            if((oldStamp < 0) || (oldStamp != cached.timestamp))
4077                { return(cached); }
4078    
4079            // Caller seems to have up-to-date copy already.
4080            return(null);
4081            }
4082    
4083        /**Last time we polled for genProps.
4084         * Private to _getGenProps() and is volatile to avoid needing locked access.
4085         */
4086        private transient volatile long _lastPollGp;
4087    
4088        /**Attempts to get sysprops if our cached copy may be stale.
4089         * Slightly strange is that we use our cached sys props value
4090         * to determine the frequency at which we recheck the cache; the
4091         * default value is short so we should initially poll quickly until
4092         * we get a kosher value.
4093         * <p>
4094         * A special case here: if we have a GenProps object with a non-zero
4095         * timestamp (presumably pulled over from a running master) and then
4096         * we get one with a zero timestamp, we ignore the new, zero, instance
4097         * since it probably means that the master has just been restarted
4098         * and has not yet loaded new GenProps.
4099         * <p>
4100         * This does not need to hold any locks since all the values it touches
4101         * are volatile.
4102         */
4103        private void _getGenProps()
4104            throws IOException
4105            {
4106            final GenProps currentGenProps = genProps;
4107    
4108            // We return immediately if no need to poll,
4109            // because last poll was recent enough.
4110            if(System.currentTimeMillis() - _lastPollGp <=
4111               (currentGenProps.getWEBSVR_SYSPROPS_RECHECK_MS() * LocalProps.getServerSlowdownFactor()))
4112                { return; }
4113    
4114            // Postpone next poll, whether this succeeds or not.
4115            _lastPollGp = System.currentTimeMillis();
4116    
4117            // Spin off work into new thread if possible to avoid delaying poll() unduly.
4118            ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
4119                public final void run()
4120                    {
4121                    // Capture old props timestamp.
4122                    final long currentGPTimestamp = currentGenProps.timestamp;
4123    
4124                    // Try to fetch new value; this might take a while or even fail...
4125                    final GenProps newGp;
4126                    try { newGp = source.getGenProps(currentGPTimestamp); }
4127                    catch(final IOException e) { e.printStackTrace(); return; }
4128                    // ...but don't let an uninitialised master GenProps displace
4129                    // an initialised one that we already hold.
4130                    if((newGp != null) &&
4131                       ((currentGPTimestamp == 0) || (newGp.timestamp != 0)))
4132                        {
4133                        // OK, we got an updated value.
4134                        // No lock required to update this volatile value.
4135                        genProps = newGp;
4136    
4137                        // Log the receipt of a changed props set.
4138                        logger.log("[ExhibitDataSimpleCache: new GenProps: " +
4139                            "old/new timestamp: " +
4140                            (new Date(currentGPTimestamp)) + " / " +
4141                            (new Date(newGp.timestamp)) +
4142                            " at " +(new Date()) + ".]");
4143                        }
4144                    _lastPollGp = System.currentTimeMillis(); // Schedule next poll.
4145                    }
4146                });
4147            }
4148    
4149    
4150    
4151    
4152    
4153    
4154    
4155    
4156    
4157    
4158    
4159        /**Gets the security properties as a Properties object if its timestamp is not that specified.
4160         * If the time specified is negative the object will be returned unconditionally.
4161         * <p>
4162         * If no props are currently installed/available a default set with a zero
4163         * timestamp is returned.
4164         * <p>
4165         * If the caller's copy appears to be up-to-date (eg the oldStamp
4166         * matches that that would have been returned) null is returned.
4167         * <p>
4168         * We get this from our cache of the immutable data rather than
4169         * going to the source directly.
4170         * We don't block or grab any locks to fetch this.
4171         * <p>
4172         * We do not attempt to persist this data since carrying old values
4173         * across a restart may be a very poor idea.
4174         * <p>
4175         * We wrap this as the defaults to a new Properties object
4176         * to protect our copy against accidental alteration.
4177         */
4178        public java.util.Properties getGenSecProps(final long oldStamp)
4179            /* throws IOException */
4180            {
4181            final Properties cached = genSecProps;
4182    
4183            if((oldStamp < 0) || (oldStamp != Long.parseLong(cached.getProperty(SecurityProps.PNAME_GENSECPROPS_TIMESTAMP))))
4184                { return(new Properties(cached)); }
4185    
4186            // Caller seems to have up-to-date copy already.
4187            return(null);
4188            }
4189    
4190        /**Last time we polled for genSecProps.
4191         * Private to _getGenSecProps(); is volatile to avoid the need for locking.
4192         */
4193        private volatile transient long _lastPollGSp;
4194    
4195        /**Our record of the current GenProps; never null.
4196         * Maintained by poll(); is volatile to avoid the need for locking.
4197         */
4198        private volatile Properties genSecProps = (new SecurityProps()).getGenSecProps(); // Start with defaults.
4199    
4200        /**Attempts to get gensecprops if our cached copy may be stale.
4201         * This does not need any locks since the state is mainatined
4202         * in volatile values.
4203         */
4204        private void _getGenSecProps()
4205            throws IOException
4206            {
4207            // We return immediately if no need to poll,
4208            // because last poll was very recent.
4209            if(System.currentTimeMillis() - _lastPollGSp <=
4210                   (genProps.getWEBSVR_SYSPROPS_RECHECK_MS() * LocalProps.getServerSlowdownFactor()))
4211                { return; }
4212    
4213            // Put off next poll for a while, even if this one fails...
4214            _lastPollGSp = System.currentTimeMillis(); // Schedule/postpone next poll.
4215    
4216            ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
4217                public final void run()
4218                    {
4219                    try
4220                        {
4221                        // Capture old props timestamp.
4222                        final Properties currentGSP = genSecProps;
4223                        final String timestamp = currentGSP.getProperty(SecurityProps.PNAME_GENSECPROPS_TIMESTAMP, "0");
4224                        final long currentGSPTimestamp = Long.parseLong(timestamp);
4225    
4226                        // Try to fetch new value; this might take a while or even fail...
4227                        final Properties newGSp = source.getGenSecProps(currentGSPTimestamp);
4228                        // ...but don't let an uninitialised master GenSecProps displace
4229                        // any initialised one that we already hold.
4230                        if((newGSp != null) &&
4231                           ((currentGSPTimestamp == 0) || (Long.parseLong(newGSp.getProperty(SecurityProps.PNAME_GENSECPROPS_TIMESTAMP, "0")) != 0)))
4232                            {
4233                            // OK, we got an updated value.
4234                            // No lock is needed to update this volatile value.
4235                            genSecProps = newGSp;
4236                            }
4237                        _lastPollGSp = System.currentTimeMillis(); // Schedule/postpone next poll.
4238                        }
4239                    catch(final Exception e)
4240                        {
4241                        e.printStackTrace(); // Whinge and stop in case of error.
4242                        }
4243                    }
4244                });
4245            }
4246    
4247        /**Maximum wait time between attempts to fetch or generate thumbnails (ms) by long-running cache; strictly positive.
4248         * We have this in order to avoid pestering a master server unnecessarily
4249         * or wasting CPU cycles attempting to build a thumbnail.
4250         * <p>
4251         * A value of several times the allowed system latency/skew
4252         * up to of the order of a day in the expectation of a daily
4253         * exhibit-accession and thumbnail-build cycle on the server
4254         * is probably reasonable.
4255         * <p>
4256         * We randomise the value so that different clients will not conflict
4257         * with one another unduly.
4258         * <p>
4259         * We may wait longer than this when resource-constrained.
4260         * <p>
4261         * We may wait less than this when the cache is relatively young.
4262         */
4263        private static final long MAX_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS =
4264            Math.min(25 * 3600 * 1000L, // Retries no more than ~1 day apart...
4265                1013L * (3*CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S +
4266                    Rnd.fastRnd.nextInt(4*CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S)));
4267    
4268        /**Normal wait time between attempts to fetch or generate thumbnails (ms) by long-running cache; strictly positive.
4269         * We have this in order to avoid pestering a master server unnecessarily
4270         * or wasting CPU cycles attempting to build a thumbnail;
4271         * this is used where the upstream server doesn't seem too busy
4272         * or in case of apparent transient network error.
4273         * <p>
4274         * A value of a few minutes is good for this purpose.
4275         * <p>
4276         * We randomise the value so that different clients will not conflict
4277         * with one another unduly.
4278         * <p>
4279         * We may wait longer than this when resource-constrained.
4280         * <p>
4281         * We may wait less than this when the cache is relatively young.
4282         */
4283        private static final long NORMAL_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS =
4284            (5 * 60 * 1000L) + Rnd.fastRnd.nextInt(1 * 60 * 1000);
4285    
4286    //    /**Enqueueable SoftReference for cached-in-memory thumbnails. */
4287    //    private final static class TIMSoftReference extends SoftReference<ExhibitThumbnails>
4288    //        {
4289    //        /**Factory method that also enqueues automatically... */
4290    //        static TIMSoftReference create(final Name.ExhibitFull name, final ExhibitThumbnails tns, final ReferenceQueue<ExhibitThumbnails> refQ)
4291    //            {
4292    //            final TIMSoftReference result = new TIMSoftReference(name, tns, refQ);
4293    //            return(result);
4294    //            }
4295    //        /**Create an instance with the thumbnail name and thumbnails (all non-null); only call from factory method. */
4296    //        private TIMSoftReference(final Name.ExhibitFull name, final ExhibitThumbnails tns, final ReferenceQueue<ExhibitThumbnails> refQ)
4297    //            {
4298    //            super(tns, refQ);
4299    //            this.name = name;
4300    //            assert(name != null);
4301    //            assert(tns != null);
4302    //            assert(refQ != null);
4303    //            }
4304    //        /**Valid full exhibit name; never null. */
4305    //        final Name.ExhibitFull name;
4306    //        }
4307    
4308        /**Private in-memory cache of deserialised thumbnails; never null.
4309         * This Map is guaranteed thread-safe and highly-concurrent.
4310         * <p>
4311         * Holding a lock on this object <em>will not</em> prevent updates to it.
4312         * <p>
4313         * This is a mapping:
4314         * <ul>
4315         * <li>from String name to TIMSoftReference (wrapping ExhibitThumbnails), or
4316         * <li>from String name to Long last time when we failed to generate a thumbnail.
4317         * </ul>
4318         * If we run very low on space then discard the thumbnails and just keep the do-not-retry Long timestamps.
4319         */
4320        private final MemoryTools.SoftReferenceMap<Name.ExhibitFull,Object> _thumbnailsInMemory =
4321            MemoryTools.SoftReferenceMap.create(Math.max(256, MAX_QUEUED_TN_FETCHES), true, "_thumbnailsInMemory");
4322    //    ConcurrentMap<Name.ExhibitFull,Object> _thumbnailsInMemory =
4323    //        new ConcurrentHashMap<Name.ExhibitFull, Object>(Math.max(256, MAX_QUEUED_TN_FETCHES));
4324        /**If we run very low on space then discard the thumbnails and just keep the do-not-retry Long timestamps.
4325         * This will free up the bulk of the memory but may continue to save lots of nugatory effort.
4326         * <p>
4327         * We have to hold a reference to the handle to prevent it expiring.
4328         */
4329        private final RecurrentEmergencyFreeHandle _timREFH = (new RecurrentEmergencyFreeHandle(){
4330            public void run()
4331                {
4332                int thumbnailsCleared = 0;
4333                for(final Name.ExhibitFull key : _thumbnailsInMemory.keySet())
4334                    {
4335                    final Object o = _thumbnailsInMemory.get(key);
4336                    if(o == null) { continue; }
4337                    if(o.getClass() == Long.class) { continue; }
4338                    _thumbnailsInMemory.remove(key);
4339                    ++thumbnailsCleared;
4340                    }
4341    System.err.println("WARNING: emergency-freed thumbnails: " + thumbnailsCleared);
4342                }
4343            });
4344        { MemoryTools.registerRecurrentEmergencyFreeHandle(_timREFH); }
4345    
4346        /**If true then allow missing thumbnails to be fetched synchronously at the risk of blocking for extened periods. */
4347        private static final boolean ALLOW_SYNC_TN_FETCH = false;
4348    
4349        /**Gets the thumbnails for an exhibit.
4350         * A data source is at liberty to refuse to compute thumbnails
4351         * in which case it may return null, else it returns a
4352         * non-null value which may include the `could-not-compute'
4353         * value to indicate that a thumbnail/sample can never be made
4354         * for this exhibit and no attempt need be made again.
4355         * <p>
4356         * This retains a private in-memory cache of
4357         * deserialised thumbnails held by SoftReference,
4358         * and it tries first to recover them from there.
4359         * <p>
4360         * This tries to retrieve thumbnails from the cache,
4361         * and returns them if they are there.
4362         * <p>
4363         * Else, if create is true, this tries to create the thumbnails,
4364         * cache them, and return the value.  But we won't bother unless
4365         * the main image is fully loaded.
4366         * <p>
4367         * Note that only the read and write of cache is done under lock;
4368         * the thumbnail generation is unlocked and concurrency is restricted,
4369         * if at all, by the handler routine(s).
4370         * <p>
4371         * Partly because this routine is called by our own
4372         * precache routines, we do not regard reading a thumbnail
4373         * as proving user access to the cache
4374         * (exhibit data has to be read for that).
4375         *
4376         * @param create  if true, and no thumbnail yet exists, try to
4377         *     create one if possible; else if create is false
4378         *     only return an existing one and return null if none is to hand
4379         *     (or possibly allow fetch of pre-built remote one)
4380         */
4381        public ExhibitThumbnails getThumbnails(final ExhibitFull name,
4382                                               final boolean create)
4383            {
4384            // Note inbound request for exhibit thumbnails.
4385            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXTHUREQIN);
4386            if(!create)
4387                { StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXTHUREQINDC); }
4388    
4389    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("getThumbnails("+name+", "+create+")..."); }
4390    
4391    //        try
4392    //            {
4393                // Call our internal fetcher/generator.
4394                final ExhibitThumbnails exhibitThumbnails = _getThumbnails(name,
4395                        create, ALLOW_SYNC_TN_FETCH);
4396    
4397                // If we could not get the requested thumbnails
4398                // (or any firm indication that they do not exist)
4399                // then try again in the background as they may still be useful.
4400                if(exhibitThumbnails == null)
4401                    { _asyncTNFetch(name); }
4402    
4403    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("getThumbnails("+name+", "+create+") = "+exhibitThumbnails); }
4404    
4405                return(exhibitThumbnails);
4406    //            }
4407    //        finally
4408    //            {
4409    //            // We must not be holding a (write) lock on exit.
4410    //            // We log any held lock and break it and propagate an Error.
4411    //            // Of course, this should NEVER happen,
4412    //            // but the cost of checking for it is low, so we'll always check.
4413    //            if(rwl.isWriteLockedByCurrentThread())
4414    //                {
4415    //                rwl.writeLock().unlock();
4416    //                System.err.println("SEVERE ERROR: getThumbnails(): holding write lock on exit from cache");
4417    //                throw new Error("SEVERE ERROR: getThumbnails(): holding write lock on exit from cache");
4418    //                }
4419    //            }
4420            }
4421    
4422    
4423        /**Get the thumbnails for an exhibit; null if not available.
4424         * A data source is at liberty to refuse to compute thumbnails
4425         * in which case it may return null, else it returns a
4426         * non-null value which may include the `could-not-compute'
4427         * value to indicate that a thumbnail/sample cannot be made
4428         * for this exhibit and no attempt need be made in future.
4429         * <p>
4430         * This retains a private in-memory cache of
4431         * deserialised thumbnails held by SoftReference,
4432         * and it tries first to recover them from there.
4433         * This is very important to fast delivery of thumbnails
4434         * for building pages referencing many thumbnails.
4435         * <p>
4436         * This tries to retrieve thumbnails from the cache,
4437         * and returns them if they are there.
4438         * <p>
4439         * Else, if create is true, this tries to create the thumbnails,
4440         * cache them, and return the value.  But we won't bother unless
4441         * the main image is fully loaded.
4442         * <p>
4443         * Note that only the read and write of our tn cache is done under lock;
4444         * the thumbnail generation is unlocked and concurrency is restricted,
4445         * if at all, by the handler routine(s).
4446         * <p>
4447         * Partly because this routine is called by our own
4448         * precache routines, we do not regard reading a thumbnail
4449         * as proving user access to the cache
4450         * (exhibit data has to be read for that).
4451         *
4452         * @param create  if true, and no thumbnail yet exists, try to
4453         *     create one if possible; else if create is false
4454         *     only return an existing one and return null if none is to hand
4455         *     (or possibly allow fetch of pre-built remote one)
4456         * @param allowSyncFetch  if they then allow a synchronous fetch from upstream
4457         *
4458         * @return null if no such exhibit or a transient problem,
4459         *         NO_THUMBNAILS if this exhibit type can never have thumbnails
4460         *         or it appears impossible for this particular exhibit,
4461         *         or else a non-null non-NO_THUMBNAILS value
4462         */
4463        private ExhibitThumbnails _getThumbnails(final Name.ExhibitFull name,
4464                                                 final boolean create,
4465                                                 final boolean allowSyncFetch)
4466            {
4467            // In the simple case where we know that thumbnails
4468            // definitely can't be created (from the file type)
4469            // return the NO_THUMBNAILS value immediately,
4470            // ignoring the create parameter.
4471            // We never cache this (negative) value.
4472            //
4473            // NOTE: in the special case of an empty AEP then return null
4474            // for a non-existent AEP since the empty AEP may be transient.
4475            final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(name));
4476            if((type == null) || (type.handler == null) ||
4477               !type.canPossiblyCreateThumbnailOfSameMIMEType())
4478                { return(ExhibitThumbnails.NO_THUMBNAILS); }
4479            final AllExhibitProperties aep = _AEP;
4480            final ExhibitStaticAttr esa = _AEP.aeid.getStaticAttr(name);
4481            if(esa == null)
4482                { return(null); } // Can't make thumbnail yet...
4483    
4484    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... past arg checks"); }
4485    
4486            // Get some of our important system state for later.
4487            final GenProps gp = getGenProps(-1L);
4488    
4489            // Get a handle on the private in-memory thumbnail cache.
4490            // Read access to this can in practice be highly concurrent.
4491            final SoftReferenceMap<ExhibitFull, Object> _tIM = _thumbnailsInMemory;
4492    //        final ConcurrentMap<Name.ExhibitFull, Object> _tIM = _thumbnailsInMemory;
4493    
4494            // Check the memory cache to see if it is still valid
4495            // and see if we can find our thumbnails in there.
4496            // Do with a lock held on the table for atomicity.
4497            //
4498            // There are two sorts of things in this table:
4499            //   * A mapping from an exhibit name to ExhibitThumbnails
4500            //   * A Long with the time that we last tried to fetch the thumbnail
4501            //     from the datasource (or generated it locally);
4502            //     we should postpone trying again until reasonable time elapses
4503            //     to avoid wasting bandwidth and/or CPU cycles.
4504            //
4505            // Don't attempt to fetch from upstream if:
4506            //   * We were told not to fetch it.
4507            //   * The caller said not to bother creating the thumbnail.
4508            //   * It isn't long enough since we last tried to create/fetch it.
4509            // This last case is also used as a crude 'lock'
4510            // to try to avoid concurrent fetched/generation of the same thumbnail.
4511            boolean dontFetchFromSource = !allowSyncFetch || !create;
4512    
4513            // OK, try from in-memory cache.
4514            final Object memCachedValue = _tIM.get(name);
4515    
4516            if(memCachedValue instanceof Long)
4517                {
4518                // This was the time we last tried to do a fetch/generate.
4519                final long lastTime = ((Long) memCachedValue).longValue();
4520    
4521                // If we have not yet waited long enough
4522                // before another fetch/generate attempt,
4523                // then veto any fetch attempt at least.
4524                if(lastTime +
4525                    Math.min(1+(_instanceLifems()>>>1), // A new cache is desperate to populate itself...
4526                             upstreamStratum.isUpstreamConserving() ? MAX_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS : NORMAL_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS) >
4527                    System.currentTimeMillis())
4528                    {
4529                    dontFetchFromSource = true;
4530    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... negatively cached since "+(new Date(lastTime))+": dontFetchFromSource = true"); }
4531                    }
4532                else
4533                    {
4534                    // OK, passed the wait time, so purge this time value to save space
4535                    // (though avoid races with new values being inserted).
4536                    _tIM.remove(name);
4537    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... negative cache expired"); }
4538                    }
4539                }
4540    
4541            // See if we have a cached copy of the thumbnail.
4542            else if(memCachedValue instanceof ExhibitThumbnails)
4543                {
4544                final ExhibitThumbnails tns = (ExhibitThumbnails) memCachedValue;
4545                if(tns != null) // Got it!  Excellent!
4546                    {
4547                    // Note hit from in-memory thumbnail cache...
4548                    StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHETNMEMHIT);
4549    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... returning from in-memory cache: return " + tns); }
4550    
4551                    return(tns);
4552                    }
4553    //            else // Expunge expired reference to save some space (while avoiding races).
4554    //                { _tIM.remove(name, memCachedValue); }
4555                }
4556    
4557            else { assert(memCachedValue == null) : "incorrectly typed entry in tn cache: " + memCachedValue.getClass().getName(); }
4558    
4559            // If we've got the thumbnails in the disc cache
4560            // then return the (disc) cached value
4561            // having saved it to the in-memory cache.
4562            // Fetching from disc cache grabs a read lock on the cache.
4563            try {
4564                final ExhibitThumbnails tns =
4565                    metaData.exhibitGetThumbnails(rwl, cacheDir, name, logger);
4566    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... on-disc metadata cache result: " + tns); }
4567    
4568                if(tns != null)
4569                    {
4570                    // Unconditionally save the thumbnails to the in-memory cache.
4571                    _tIM.put(name, tns);
4572    //                _tIM.put(esa.getExhibitFullName(), TIMSoftReference.create(esa.getExhibitFullName(), tns, _tIM_deadRefQueue));
4573    
4574                    // Note hit from on-disc thumbnail cache...
4575                    StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHETNHIT);
4576    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... return " + tns); }
4577    
4578                    return(tns);
4579                    }
4580                }
4581            catch(final IOException e) { } // Ignore.
4582    
4583            // Note cache miss if we have gotten this far...
4584            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHETNMISS);
4585    
4586            final Long now = new Long(System.currentTimeMillis());
4587            // If we can get it from the underlying data source,
4588            // then return it having saved it to the disc and in-memory caches.
4589            // We *don't* hold any locks in the cache while fetching from upstream.
4590            if(!dontFetchFromSource)
4591                {
4592    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... trying to fetch from upstream"); }
4593    
4594                try {
4595                    // Try to prevent any concurrent attempt to fetch this thumbnail from upstream...
4596                    if((memCachedValue == null) && (!(_tIM.get(name) instanceof ExhibitThumbnails)))
4597                        { _tIM.put(name, now); }
4598    
4599                    ExhibitThumbnails tns = null;
4600                    if(ALLOW_TN_FETCH_FROM_PEERS)
4601                        {
4602                        // Try to fetch extant thumbnails from a peer
4603                        // to minimise traffic to the master.
4604                        // Since all mirrors/peers should cache all thumbnails once fetched
4605                        // (unless very short of cache space indeed)
4606                        // then any peer should have what we want
4607                        // or should be getting it shortly anyway,
4608                        // so this should not induce significant extra traffic overall.
4609                        try
4610                            {
4611                            tns = _getThumbnailsFromPeer(name);
4612    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... from peer: "+tns); }
4613                            }
4614                        catch(final IOException e) { if(TRACE_THUMBNAIL_ACTIVITY) { e.printStackTrace(); } } // Absorb P2P error; possibly log it.
4615                        }
4616                    // If the peer gave us nothing at all or an apparent permanent fail
4617                    // then try the master for a definitive answer if possible.
4618                    // This prevents spurious failures at peers from spreading laterally.
4619                    if((null == tns) || ExhibitThumbnails.NO_THUMBNAILS.equals(tns))
4620                        {
4621                        // Try to get thumbnails from directly upstream
4622                        // (since we could not get them from a peer)
4623                        // ultimately from the master server.
4624                        final long startFetchFromMaster = System.currentTimeMillis();
4625                        tns = source.getThumbnails(name, create); // Fetch from upstream.
4626                        final long endFetchFromMaster = System.currentTimeMillis();
4627                        final long timeTaken = endFetchFromMaster - startFetchFromMaster;
4628                        if(ALLOW_TN_FETCH_FROM_PEERS) // Only record successes here for simplicity...
4629                            { _updatePeerStats(MASTER_FAKE_TAG, true, timeTaken); }
4630    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... from upstream ("+timeTaken+"ms): "+tns); }
4631                        }
4632                    // If we retrieved some thumbnails then save them and return them...
4633                    if(null != tns)
4634                        {
4635    if(TRACE_THUMBNAIL_ACTIVITY && ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) { System.err.println("WARNING: _getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing upstream NO_THUMBNAILS"); }
4636    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing upstream thumbnails in memory and metadata disc caches: " + tns); }
4637    
4638                        // Unconditionally save in the in-memory cache.
4639                        _tIM.put(name, tns);
4640    //                    _tIM.put(esa.getExhibitFullName(), TIMSoftReference.create(esa.getExhibitFullName(), tns, _tIM_deadRefQueue));
4641    
4642                        // Now save in the disc cache...
4643                        // Writing to disc cache grabs a write lock on the cache.
4644                        boolean saved = false;
4645                        try { saved = metaData.exhibitSaveThumbnails(rwl, gp, cacheDir, esa, tns, logger, statsIDSCGEN); }
4646                        catch(final IOException e) { } // Ignore.
4647    
4648    if(TRACE_THUMBNAIL_ACTIVITY_ALL && saved) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... saved upstream thumbnails to metadata disc caches: return " + tns); }
4649    
4650                        // Return the thumbnails to the caller.
4651                        return(tns);
4652                        }
4653    
4654                    // Note failure to fetch by putting off any further tries
4655                    // by noting at what time we experienced the failure,
4656                    // unless one of the following mechanisms
4657                    // manages to overwrite our Long with a real set of thumbnails.
4658                    // This is not race-free.
4659                    // FIXME: should use shorter timeout for temporary network/congestion issues
4660                    // FIXME: verify that any race is harmless...
4661                    if((memCachedValue == null) && (!(_tIM.get(name) instanceof ExhibitThumbnails)))
4662                        {
4663                        _tIM.put(name, now);
4664    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... failed fetch so postponing next attempt"); }
4665                        }
4666    
4667    //                // We make the update race-free in case a concurrent call succeeds.
4668    //                // THUS ANY FOLLOWING METHODS MUST UPDATE THE MEMORY CACHE.
4669    //                if(memCachedValue == null) { _tIM.putIfAbsent(esa.getExhibitFullName(), now); }
4670    //                else { _tIM.replace(esa.getExhibitFullName(), memCachedValue, now); }
4671                    }
4672                catch(final IOException e) { logger.equals("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") remote fetch FAILED: " + e.getMessage()); } // Log tersely...
4673                }
4674            // If not allowed to fetch the thumbnail immediately,
4675            // then at least make space in the cache for it (and other missing ones)
4676            // for later (pre)caching if enough space is not currently available...
4677            else if(metaData.isReadWrite() && !metaData.canPrecacheThumbnails(cacheDir) &&
4678                    (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0)) // We trust the cache-sizing properties?
4679                {
4680                try
4681                    {
4682    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... making space for thumbnail for next time"); }
4683    
4684                    // Make space for thumbnail(s) to be (pre)cached later.
4685                    // Make space for several in one go here, for efficiency,
4686                    // on the grounds that if we are missing one thumbnail
4687                    // then we may well be missing more than one.
4688                    metaData.purge(rwl,
4689                             gp,
4690                             cacheDir,
4691                             // Very generous notion of elbow room needed...
4692                             (ExhibitThumbnails.MAX_BYTES_EST +
4693                                  CachedFile.MAX_EMPTY_ENTRY_BYTES_ON_DISC +
4694                                  MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL +
4695                                  MAX_TRANSFER_CHUNK_SIZE) << 3,
4696                             name,
4697                             logger, statsIDSCGEN);
4698                    }
4699                catch(final IOException e) { e.printStackTrace(); } // Absorb error but whinge...
4700                }
4701    
4702    
4703            // Not asked to create thumbnails here,
4704            // so return a null to indicate that
4705            // the requested thumbnails were not immediately to hand.
4706            if(!create)
4707                {
4708    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... NOT trying to create missing thumbnails: return(null)"); }
4709    
4710                return(null);
4711                }
4712    
4713    
4714            ExhibitThumbnails tns = null;
4715            try
4716                {
4717                // Don't try to make the thumbnail
4718                // if a synchronous fetch is not allowed by the caller
4719                // and the exhibit is not fully cached locally,
4720                // or there is too much exhibit data not yet in local cache
4721                // (because it might take an indefinitely long time
4722                // to load a big exhibit across the Net).
4723                final CachedFile exhibitMetadata = metaData.exhibitGetInfo(name);
4724                boolean getRawDataFromOwnCache = true;
4725                if(esa.length > (allowSyncFetch ? MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL : 0) +
4726                                ((null == exhibitMetadata) ? 0 : exhibitMetadata.cachedLength))
4727                    {
4728    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... NOT enough data cached locally to create thumbnails (metadata="+exhibitMetadata+")"); }
4729    
4730                    // Usually only trust/treat our own cache as being 'local'
4731                    // to help ensure a reasonably-bounded thumbnail build time for example.
4732                    // However, if our upstream source is an ExhibitDataFileSource
4733                    // and system power is not currently too tight
4734                    // (eg so we can power up underlying bulk storage if necessary)
4735                    // then we can consider going directly upstream for data.
4736                    final boolean allowRawExhibitDataFromUpstream =
4737                        upstreamSourceIsLocal() &&
4738                        !GenUtils.mustConservePower();
4739    
4740                    // If we can't go directly upstream for the missing raw data
4741                    // then we have to give up for now and return null.
4742                    if(!allowRawExhibitDataFromUpstream) { return(null); }
4743    
4744                    // Otherwise we can fetch the data directly from upstream
4745                    // to (try to) construct the thumbnails.
4746                    getRawDataFromOwnCache = false;
4747    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... allowing direct upstream access to get exhibit data to build thumbnail"); }
4748                    }
4749    
4750                // Block any other attempt at generation of this thumbnail concurrently.
4751                // FIXME: show any remaining race to be harmless...
4752                if((memCachedValue == null) && (!(_tIM.get(name) instanceof ExhibitThumbnails)))
4753                    {
4754    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... locking out concurrent creation of this thumbnail"); }
4755    
4756                    _tIM.put(name, now);
4757                    }
4758    
4759    //            // This value will be overwritten if the thumbnail is generated successfully.
4760    //            // We make the update race-free in case a concurrent call succeeds.
4761    //            if(memCachedValue == null) { _tIM.putIfAbsent(esa.getExhibitFullName(), now); }
4762    //            else { _tIM.replace(esa.getExhibitFullName(), memCachedValue, now); }
4763    
4764    
4765                // As selected above, fetch raw exhibit data from local cache or upstream.
4766                final AllExhibitProperties.ExhibitDataSource dataSourceTN = getRawDataFromOwnCache ? exhibitDataSource :
4767                    (new AllExhibitProperties.ExhibitDataSource(){
4768                            @Override public final void getRawFile(final ByteBuffer buf, final ExhibitFull exhibitName, final int position)
4769                                throws IOException
4770                                { source.getRawFile(buf, exhibitName, position, false); }
4771                            /**Assume data is always available immediately from upstream in this case. */
4772                            @Override public final boolean isExhibitFullyLoaded(final ExhibitStaticAttr esa)
4773                                { return(true); }
4774                            });
4775    
4776                // Attempt to generate the thumbnails here, not under any visible/blocking lock.
4777                // Claim that we can take as much memory/time (etc) as we need
4778                // ***iff we don't appear to be memory-stressed at the moment to try to avoid OOMEs***
4779                // for at most one thumbnail build out of any concurrent builds;
4780                // others will be vetoed if not enough memory is available.
4781                // We expect a strictly limited number of these concurrently anyway,
4782                // and no user should be blocked waiting for this to finish.
4783                // Without this we may never easily find enough space to build thumbnails on small servers.
4784                // Note: an OOME here is not a disaster.
4785                final boolean isFirst = _gTfirstTNBuildLock.tryLock();
4786                try
4787                    {
4788                    tns = type.handler.makeThumbnails(
4789                                esa,
4790                                dataSourceTN,
4791                                aep,
4792                                isFirst && !MemoryTools.isMemoryStressed());
4793                    }
4794                finally { if(isFirst) { _gTfirstTNBuildLock.unlock(); } }
4795    
4796    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... result of attempt to makeThumbnails(): " + tns); }
4797                }
4798            catch(final OutOfMemoryError e)
4799                {
4800                // We may not actually be able to catch OOME in practice, but if we can then
4801                // note that OOME is neither unexpected nor disasterous here.
4802                logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... OutOfMemoryError; retry may be possible...");
4803                }
4804            catch(final Exception e)
4805                {
4806                // Absorb errors but whinge...
4807                logger.log("Exception while generating thumbnail during precache for "+esa+" "+e.getMessage());
4808    if(TRACE_THUMBNAIL_ACTIVITY) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... makeThumbnails() FAILED: " + e); }
4809    if(TRACE_THUMBNAIL_ACTIVITY) { e.printStackTrace(); }
4810                }
4811    
4812            // Having generated it, cache it if not null.
4813            // This will overwrite the timestamp temporarily put in place to prevent concurrent access.
4814            if(tns != null)
4815                {
4816    if(TRACE_THUMBNAIL_ACTIVITY && ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) { System.err.println("WARNING: _getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing generated NO_THUMBNAILS"); }
4817    if(TRACE_THUMBNAIL_ACTIVITY) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing generated thumbnails in memory and metadata disc caches: " + tns); }
4818    
4819                // Unconditionally save to the in-memory cache.
4820                _tIM.put(name, tns);
4821    //            _tIM.put(esa.getExhibitFullName(), TIMSoftReference.create(esa.getExhibitFullName(), result, _tIM_deadRefQueue));
4822    
4823                // Save to disc cache.
4824                boolean saved = false;
4825                try { saved = metaData.exhibitSaveThumbnails(rwl, gp, cacheDir, esa, tns, logger, statsIDSCGEN); }
4826                catch(final IOException e) { } // Ignore.
4827    
4828    if(TRACE_THUMBNAIL_ACTIVITY_ALL && saved) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... saved generated thumbnails to metadata disc caches: " + tns); }
4829    
4830                // Note local creation of thumbnails...
4831                StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXTHUCREATED);
4832                }
4833    
4834    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... return " + tns); }
4835    
4836            return(tns);
4837            }
4838    
4839        /**Time of construction. */
4840        private final long consTime = System.currentTimeMillis();
4841    
4842        /**How long this instance has been alive in milliseconds.
4843         * Will be non-negative when system clock is monotonic.
4844         */
4845        private long _instanceLifems() { return(System.currentTimeMillis() - consTime); }
4846    
4847    
4848        /**Returns true iff upstream is local disc so some operations should be cheap without cacheing.
4849         * Can avoid significant redundant effort.
4850         */
4851        private boolean upstreamSourceIsLocal()
4852            { return(source instanceof ExhibitDataFileSource); }
4853    
4854        /**A lock to allow only one of concurrent thumbnail builds to assume unlimited resources.
4855         * Other thumbnail may try with whatever memory (etc) remains,
4856         * but at most one gets special privileges.
4857         */
4858        private final ReentrantLock _gTfirstTNBuildLock = new ReentrantLock();
4859    
4860        /**Private lock for _getThumbnailsFromPeer() to prevent concurrent P2P thumbnail fetches; never null. */
4861        private final ReentrantLock _gTFP_lock = new ReentrantLock();
4862    
4863        /**Attempt to fetch the specified thumbnails from any peer; may be null if currently unavailable.
4864         * This never attempts to force creation of a thumbnail remotely,
4865         * but rather tries to fetch an already-present value.
4866         * <p>
4867         * This updates the P2P stats as if an exhibit-data-block transfer,
4868         * which is reasonable since this only attempts a fetch of data,
4869         * never a create which may take signifiant remote CPU time.
4870         * Note that only an (IO)Exception (not having null returned) is 'failure'.
4871         * <p>
4872         * This may potentially 'loop' between peers consuming resources uselessly
4873         * unless some other mechanism is used to prevent such behaviour.
4874         * However, only one outgoing P2P thumbnail request is allowed at once here,
4875         * which should limit any such problem and resources consumed by it.
4876         *
4877         * @param name  full exhibit name; never null
4878         */
4879        private ExhibitThumbnails _getThumbnailsFromPeer(final Name.ExhibitFull name)
4880            throws IOException
4881            {
4882            // No P2P thumbnail request concurrency allowed.
4883            if(!_gTFP_lock.tryLock()) { return(null); }
4884            try
4885                {
4886                // Choose the 'best' peer to try...
4887                final String peerToTry = _pickPeer();
4888    if(TRACE_P2P_ACTIVITY || TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnailsFromPeer("+name+") selected peer: "+peerToTry); }
4889    
4890                // If the master was selected rather than any peer
4891                // then give up and return null immediately.
4892                if(!MASTER_FAKE_TAG.equals(peerToTry))
4893                    {
4894    if(TRACE_P2P_ACTIVITY || TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnailsFromPeer("+name+") attempting fetch from: "+peerToTry); }
4895    
4896                    // Create temporary tunnel to the selected peer.
4897                    final String url = "http://" +
4898                        LoadBalancingUtils.makeMirrorNameFromTag(peerToTry) +
4899                        CoreConsts.TUNNEL_URI;
4900                    // Suppress logging from this short-lived tunnel...
4901                    final ExhibitDataHTTPTunnelSource tempTunnel =
4902                        new ExhibitDataHTTPTunnelSource(url, peerToTry, GenUtils.nullLogger);
4903                    try
4904                        {
4905                        boolean successful = false;
4906                        final long startFetch = System.currentTimeMillis();
4907                        final ExhibitThumbnails tns;
4908                        try {
4909                            // Get extant thumbnails or abort with an IOException...
4910                            tns = tempTunnel.getThumbnails(name, false); // No 'create' requested.
4911                            successful = true; // Only an exception is 'failure' for these purposes.
4912                            }
4913                        finally
4914                            {
4915                            final long endFetch = System.currentTimeMillis();
4916                            final long timeTaken = endFetch - startFetch;
4917                            _updatePeerStats(peerToTry,
4918                                             successful,
4919                                             timeTaken);
4920    if(TRACE_P2P_ACTIVITY || TRACE_THUMBNAIL_ACTIVITY) { logger.log("_getThumbnailsFromPeer("+name+") fetch ("+timeTaken+"ms) from "+peerToTry+(successful?" successful":" FAILED")); }
4921                            }
4922                        return(tns);
4923                        }
4924                    finally // Ensure that the tunnel is cleared down.
4925                        { tempTunnel.destroy(); }
4926                    }
4927                }
4928            finally { _gTFP_lock.unlock(); }
4929    
4930            return(null); // Not currently possible...
4931            }
4932    
4933        /**Attempt to asynchronously fetch/create thumbnail that we have failed to return to the user.
4934         * We so this to attempt to fetch soon any thumbnail that was recently
4935         * requested but was not immediately available on the grounds that
4936         * it may be needed again soon.
4937         * <p>
4938         * This only uses a strictly limited number of threads,
4939         * but avoids a wait for general precacheing, which may never happen.
4940         *
4941         * @param exhibitName  full exhibit name; non-null valid exhibit name
4942         */
4943        private void _asyncTNFetch(final Name.ExhibitFull exhibitName)
4944            {
4945            // Don't bother trying to fetch thumbnails in the background
4946            // for exhibits that cannot have them,
4947            // ie don't waste time queueing requests that we cannot ever satisfy.
4948            final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(exhibitName));
4949            if((type == null) || (type.handler == null) ||
4950               !type.canPossiblyCreateThumbnailOfSameMIMEType())
4951                { return; }
4952    
4953    if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_asyncTNFetch("+exhibitName+") queued request..."); }
4954    
4955            // Put this in the queue of things to be done.
4956            discardableReadAheadTaskThreadPool.submit(new Runnable(){
4957                public final void run()
4958                    {
4959                    // Always allow a sync fetch from upstream.
4960                    // Always allow local thumbnail creation attempt.
4961                    _getThumbnails(exhibitName, true, true);
4962                    }
4963                });
4964            }
4965    
4966        /**Manages our local cache of variables, etc; never null.
4967         * We set this up to be:
4968         * <ul>
4969         * <li><em>Not</em> write-through,
4970         *     ie we reduce "set" (write) traffic to the upstream source.
4971         * </ul>
4972         * <p>
4973         *
4974         */
4975        private final PipelineVarMgr varMgr;
4976    
4977        /**Variable flush/retrieval interval (ms); strictly positive.
4978         * Based on the allowed distribution latency as centrally defined.
4979         * <p>
4980         * We randomise this a little to help avoid many slaves
4981         * bothering the master simultaneously.
4982         */
4983        private static final int VAR_CACHE_HOLD_TIME_MS = Math.min(30001 + Rnd.fastRnd.nextInt(16384),
4984            (SystemVariables.MAX_VALUE_DISTRIBUTION_LATENCY_MS / 4) + 1);
4985    
4986        /**Last time we flushed/fetched variables, private to _handleSysVars().
4987         * We do another flush/fetch when we find this to be null (eg initially)
4988         * or more than VAR_CACHE_HOLD_TIME_MS in the past.
4989         * <p>
4990         * This is volatile so that we do not need to hold a lock to access it.
4991         */
4992        private transient volatile Long _handleSysVars_lastFetch;
4993    
4994        /**Last time we saved (any) event histories, private to _handleSysVars(), initially zero.
4995         * This is volatile so that we do not need to hold a lock to access it.
4996         */
4997        private transient volatile long _handleSysVars_evSave;
4998    
4999        /**Handle (update, sync, persist) system variables as required.
5000         * Recompute any local values generated by the cache itself,
5001         * flush any outbound values,
5002         * and retrieve any upstream values periodically.
5003         * <p>
5004         * We recompute local variables when we would be prepared to
5005         * flush/fetch system variables.
5006         * <p>
5007         * Timing is handled with volatile values,
5008         * so we do not need to take out any other locks while working.
5009         * <p>
5010         * We rely on the locking within varMgr to ensure consistency,
5011         * including during the save.  Bad things may happen if trying to
5012         * remove a cache while we are trying to save event histories!
5013         * <p>
5014         * We also use this to recompute any vote/correlation factors
5015         * and update our notion of this instance's stratum.
5016         *
5017         * @param force  if true, force an immediate complete save of state upstream,
5018         *     and to disc (if upstream source not already local)
5019         */
5020        private void _handleSysVars(final boolean force)
5021            throws IOException
5022            {
5023            final long now = System.currentTimeMillis();
5024            final Long lastTime = _handleSysVars_lastFetch;
5025            // If still not time for another round of variable updates,
5026            // then return immediately.
5027            if(!force &&
5028               (lastTime != null) &&
5029               (lastTime.longValue() + VAR_CACHE_HOLD_TIME_MS > now))
5030                { return; }
5031    
5032            // OK, recompute and set locally-generated variable values.
5033            // 100 - percentage-of-cache-space free over target size, to float precision...
5034            final Float percentUsed = new Float(100f - ((100.0f * metaData.computeFreeSpaceBelowHighWaterMark(cacheDir)) /
5035                metaData.computeTargetHighWaterMark()));
5036            final SimpleVariableValue pU = new SimpleVariableValue(
5037                SystemVariables.ExhibitDataSimpleCache_CACHE_AVAIL_SPACE_PERCENT_USED,
5038                percentUsed);
5039            varMgr.setVariable(pU);
5040            // Exhibit (partially or fully) cached count...
5041            varMgr.setVariable(new SimpleVariableValue(
5042                SystemVariables.ExhibitDataSimpleCache_CACHED_EXHIBIT_COUNT,
5043                new Integer(metaData.size())));
5044            // Exhibit fully-cached-exhibit percentage (do NOT force recomputation here).
5045            final Float percentFullyCached = new Float((100.0f * metaData.getFullyCachedCount(_AEP, false)) /
5046                    Math.max(1, _AEP.aeid.length));
5047            final SimpleVariableValue pFC = new SimpleVariableValue(
5048                    SystemVariables.ExhibitDataSimpleCache_EXHIBITS_FULLY_CACHED_PERCENT,
5049                    percentFullyCached);
5050            varMgr.setVariable(pFC);
5051    
5052            // Push locally-cached updates upstream,
5053            // and bring upstream values into our local cache.
5054            // Always request an incremental update.
5055            try { varMgr.syncVariables(false); }
5056            catch(final IOException e)
5057                {
5058                /* Ignore (though log) the error so that events can still be saved... */
5059                logger.log("ExhibitDataSimpleCache: failed to syncVariables() upstream");;
5060                }
5061    
5062            // Save updated event histories from time-to-time
5063            // unless upstream source is local/fast.
5064            // Use the form which has bounded space requirements,
5065            // and persist to the cache area, if extant.
5066            // We can probably risk saving at most
5067            // a reasonable fraction of our temporal slackness...
5068            if(!upstreamSourceIsLocal() &&
5069               (cacheDir != null) &&
5070               (force || (_handleSysVars_evSave < now)))
5071                {
5072                final File evhd = new File(cacheDir, EVENT_HISTORY_DIR);
5073                if((evhd != null) &&
5074                   (evhd.isDirectory()) &&
5075                   (evhd.canWrite()))
5076                    {
5077    if(ORG.hd.d.IsDebug.isDebug) { logger.log("[event history save start: "+ System.currentTimeMillis() +".]"); }
5078                    // Usually save incrementally so as to be quick,
5079                    // Only recording histories with updates since the last save
5080                    // and don't leave a trail (eg use bounded disc space).
5081                    varMgr.saveEventHistories(evhd, true, false, !force);
5082    if(ORG.hd.d.IsDebug.isDebug) { logger.log("[event history save end:   "+ System.currentTimeMillis() +".]"); }
5083                    }
5084                else
5085                    {
5086                    logger.log("[ExhibitDataSimpleCache: WARNING: could not save event history in: " + evhd);
5087                    }
5088    
5089                // Postpone the next event save.
5090                // Make the wait a small fraction of the default slackness time.
5091                // Postpone the next save significantly longer if this or upstream are (temporarily) conserving power.
5092                _handleSysVars_evSave = System.currentTimeMillis() +
5093                        (CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S * 128) +
5094                        ((upstreamStratum.isUpstreamConserving() || GenUtils.mustConservePower()) ?
5095                                CoreConsts.ASYNC_MIN_POWER_SAVE_NON_CRITICAL_DATA_FLUSH_MS : (CoreConsts.ASYNC_MIN_POWER_SAVE_NON_CRITICAL_DATA_FLUSH_MS >>> 3)) +
5096                        (Rnd.fastRnd.nextInt(1 + CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S/13) * 1007);
5097                }
5098    
5099            // Having done the vars, etc, try to update the upstream stratum info,
5100            try {
5101                final Stratum stratum = source.getStratum();
5102                if(null != stratum) { upstreamStratum = stratum; } // Belt and braces, though null shouldn't happen!
5103                }
5104            catch(final IOException e)
5105                {
5106                // Note the failure to fetch a current value.
5107                upstreamStratum = Stratum.UNKNOWN;
5108                logger.log("ExhibitDataSimpleCache: failed to getStratum() upstream");;
5109                }
5110    
5111            // Note successful finish.
5112            _handleSysVars_lastFetch = new Long(System.currentTimeMillis());
5113            }
5114    
5115        /**Set variable.
5116         * Set local cached value immediately;
5117         * store global values to periodically propagate upstream to master
5118         * but show last global values obtained from master on periodic poll.
5119         *
5120         * @throws java.lang.IllegalArgumentException  on attempt to:
5121         *     set variable with value of wrong type or incompatible definition,
5122         *     set non-existent or read-only variable (or these can be ignored)
5123         */
5124        public void setVariable(final SimpleVariableValue newValue)
5125            throws IOException
5126            {
5127            varMgr.setVariable(newValue);
5128            }
5129    
5130        /**Update number of variables at once for efficiency.
5131         * Is passed a Set of SimpleVariableValues and behaves as if it
5132         * operates on all of them by calling setVariable() for each item
5133         * in the Set.
5134         * <p>
5135         * This implementation "fails fast" on the first error.
5136         * <p>
5137         * This implementation never throws an IOException.
5138         *
5139         * @throws java.lang.IllegalArgumentException  on attempt to:
5140         *     set variable with value of wrong type or incompatible definition,
5141         *     set non-existent or read-only variable (or these can be ignored)
5142         */
5143        public int setVariables(final SimpleVariableValue[] newValues)
5144            throws IOException
5145            {
5146            return(varMgr.setVariables(newValues));
5147            }
5148    
5149        /**Get a single variable value; returns null if no such value or wrong type.
5150         * Always get from local cache.
5151         * <p>
5152         * This implementation never throws an IOException.
5153         */
5154        public SimpleVariableValue getVariable(final SimpleVariableDefinition var)
5155            {
5156            return(varMgr.getVariable(var));
5157            }
5158    
5159        /**Get immutable Set of variable values altered on or after a given time, or all for -1.
5160         * Always get from local cache
5161         * (the variable cache being periodically updated from the master).
5162         * <p>
5163         * This may be slow if there are many live variables.
5164         * <p>
5165         * This implementation never throws an IOException.
5166         */
5167        public SimpleVariableValue[] getVariables(final long changedSince)
5168            {
5169            return(varMgr.getVariables(changedSince));
5170            }
5171    
5172        /**Get the current partial, or previous full, event set at the specified interval; never returns null.
5173         * This is a simplified interface to return either the current event set
5174         * that is being collected, or the previous completed set.
5175         * <p>
5176         * The current set is the most timely, but may not contain enough data
5177         * to be meaningful if the new interval has just started.
5178         * <p>
5179         * The previous set is complete and thus most likely to have enough samples
5180         * to be useful, but is not completely current.
5181         * <p>
5182         * If the requested event set is not (immediately) available,
5183         * an empty synthetic one is created and returned.
5184         * Thus, with this interface, it is not possible to distinguish between
5185         * there being no events in the given interval or simply no data.
5186         * <p>
5187         * TODO: This attempts to limit the amount of time that may be spent blocking,
5188         * eg due to upstream I/O issues,
5189         * but its ability to do so may depend on availability of threads, etc.
5190         *
5191         * @param def  event definition (must be for an event); never null
5192         * @param intervalSelector  never null
5193         * @param current  if true the current event set is returned,
5194         *     else the previous complete set is returned
5195         *
5196         * @return  requested event set; never null
5197         *
5198         * @throws IllegalArgumentException  if the request arguments are invalid
5199         */
5200        public EventVariableValue getEventValue(final SimpleVariableDefinition def,
5201                                                final EventPeriod intervalSelector,
5202                                                final boolean current)
5203            {
5204            return(varMgr.getEventValue(def, intervalSelector, current));
5205            }
5206    
5207        /**Get the specified event sets for the specified intervals; never null.
5208         * This allows retrieval of zero or more event sets for the specified
5209         * interval size.
5210         * <p>
5211         * Requests for more than SystemVariables.EVENT_SAMPLES_RETAINED in the
5212         * past (or for the future!) cannot be satisfied and data will not be
5213         * returned for them.
5214         * <p>
5215         * Usually not more than SystemVariables.EVENT_SAMPLES_RETAINED samples
5216         * will be returned in response to any one request as a safety measure.
5217         * <p>
5218         * (An implementation that is not an end-point may go upstream to fetch
5219         * missing values and cache them to satisfy future requests.)
5220         *
5221         * @param def  event definition (must be for an event); never null
5222         * @param intervalSelector  never null
5223         * @param intervalNumber  a time (as from System.currentTimeMillis())
5224         *     which identifies the first interval for which data is potentially
5225         *     required; if too far in the past or future then possibly no data
5226         *     will be available,
5227         *     zero is used to select the "all" bucket
5228         * @param whichValues  each true bit represents a slot for which data is
5229         *     required, bit 0 indicating data from the slot within which
5230         *     firstIntervalTime is located, bit 1 the previous slot, etc
5231         *
5232         * @return as many of the requested values as available,
5233         *     at least long enough to return all the available values,
5234         *     with [0] corresponding to bit 0 in the BitSet;
5235         *     may contain nulls or be zero-length but is never null
5236         */
5237        public EventVariableValue[] getEventValues(final SimpleVariableDefinition def,
5238                                                   final EventPeriod intervalSelector,
5239                                                   final long intervalNumber,
5240                                                   final BitSet whichValues)
5241            { return(varMgr.getEventValues(def, intervalSelector, intervalNumber, whichValues)); }
5242    
5243        /**Synchronise with upstream values.
5244         * Pushes updated values upstream to the source,
5245         * calls sync on the source with the same "force" argument,
5246         * and then retrieves changed values from upstream.
5247         * <p>
5248         * Holds no externally-visible locks,
5249         * but if called by multiple threads this will serialise the calls.
5250         *
5251         * @param force  if true, this will force a full sync on the read side
5252         *     by using getVariables(-1) rather than attempting to choose a
5253         *     nearer timestamp for efficiency;
5254         *     the implementation is at liberty to use getVariables(-1)
5255         *     at any time whatever the argument value,
5256         *     and almost certainly should use it on the first call
5257         *
5258         * @throws java.io.IOException if one is received from upstream
5259         */
5260        public void syncVariables(final boolean force)
5261            throws IOException
5262            { varMgr.syncVariables(force); }
5263    
5264    
5265        /**Flag to adjust the aggressiveness of the cache; by default not aggressive.
5266         * Aggressive cacheing may include read-ahead, and fetching exhibits
5267         * or at least some leading portion of them to keep the cache full
5268         * or at least primed with exhibits to improve the user experience.
5269         * <p>
5270         * This can be set false when the system is overloaded to eliminate most
5271         * effort not strictly necessary.
5272         * <p>
5273         * Volatile to eliminate the need for locking.
5274         */
5275        private volatile boolean _aggressive = LocalProps.fastStartMode();
5276    
5277        /**Set the aggressiveness of the cache; by default not aggressive.
5278         */
5279        public void setAggressive(final boolean isAggressive)
5280            { _aggressive = isAggressive; }
5281    
5282    
5283        /**Get requested Properties selected by key and versionID.
5284         * Fetches a Properties set unconditionally (versionID == -1)
5285         * else if the versionID presented is not current.
5286         *
5287         * @param key  selector (with possible embedded sub-key)
5288         *     for desired properties set; never null
5289         * @param versionID  if -1 then map is always returned if available,
5290         *     else must be non-negative and null is returned if the versionID
5291         *     presented matches that of the current version
5292         *     (ie if the caller has presumably got the up-to-date version);
5293         *     may be a timestamp or a hash or other value,
5294         *     and by convention is zero only for an empty properties set
5295         *
5296         * @return null, or Properties map guaranteed to contain only
5297         *     String keys and values
5298         */
5299        public java.util.Properties getProperties(final PropsKey key,
5300                                                  final long versionID)
5301            throws IOException
5302            {
5303            return(source.getProperties(key, versionID)); // FIXME: needs cacheing.
5304            }
5305    
5306    
5307        /**Poll periodically (of the order of a second) to do cache maintenance.
5308         * We keep the poll up to date to keep the work out of a servlet
5309         * response; the data retrieval might easily take a long time...
5310         * <p>
5311         * This routine takes care of calling the upstream poll().
5312         * <p>
5313         * We have to be careful about not restricting servlet callers'
5314         * concurrency here...  We try not to do fetches from the back-end,
5315         * which may be very slow, with the instance lock, which would shut
5316         * out all foreground users needlessly.
5317         * <p>
5318         * We ignore the caller's GenProps and fetch and cache our own...
5319         */
5320        public void poll(final GenProps _gp)
5321            throws IOException
5322            {
5323            // Last error encountered...
5324            IOException lastErr = null;
5325    
5326            // Do any upstream work...
5327            try { source.poll(genProps); }
5328            catch(final IOException e) { lastErr = e; }
5329    
5330            // Make sure that the sysprops cache is not stale.
5331            try { _getGenProps(); }
5332            catch(final IOException e) { lastErr = e; }
5333    
5334            // Make sure that the generic-security-props cache is not stale.
5335            try { _getGenSecProps(); }
5336            catch(final IOException e) { lastErr = e; }
5337    
5338            // Make sure that the immutable-attr cache is not stale.
5339            /* try */ { _getAllExhibitProperties(); }
5340            // catch(final IOException e) { lastErr = e; }
5341    
5342            // Do any incremental precacheing.
5343            /* try */ { _doPreCache(genProps); }
5344            // catch(final IOException e) { lastErr = e; }
5345    
5346            // Check our cache meta-data against the filesystem.
5347            /* try */ { _checkMetaData(); }
5348            // catch(final IOException e) { lastErr = e; }
5349    
5350            // Save our cache meta-data if we need to.
5351            /* try */ { _cleanAndSaveMetaData(false); }
5352            // catch(final IOException e) { lastErr = e; }
5353    
5354            // Handle system variables if necessary...
5355            try { _handleSysVars(false); }
5356            catch(final IOException e) { lastErr = e; }
5357    
5358            // Incrementally purge the thumbnail image cache.
5359            _thumbnailsInMemory.compact();
5360    
5361            // Update any vote/correlation values as required
5362            // (if not (temporarily) conserving power here or upstream).
5363            // Entirely ignore IOExceptions that are usually benign.
5364            if(!upstreamStratum.isUpstreamConserving() && !GenUtils.mustConservePower())
5365                {
5366                ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
5367                    public final void run()
5368                        {
5369                        try { _AEP.updateVoteCache(varMgr, false); }
5370                        catch(final IOException e) { }
5371                        }
5372                    });
5373                }
5374    
5375            // Log if the cache seems to be busy...
5376            final int ql = rwl.getQueueLength();
5377            if(ql > 0)
5378                { logger.log("[ExhibitDataSimpleCache: busy: threads queued for lock: "+ql+".]"); }
5379    
5380            // Rethrow any subordinate error.
5381            // However, trim simple master-not-in-service errors.
5382            if(lastErr != null)
5383                {
5384    if(ORG.hd.d.IsDebug.isDebug) { lastErr.printStackTrace(); }
5385    
5386                if(!(lastErr instanceof PGMasterNotInServiceException))
5387                    { throw lastErr; }
5388                else
5389                    { logger.log("PGMasterNotInServiceException: " + lastErr.getMessage()); }
5390                }
5391            }
5392    
5393        /**Our record of the current GenProps; never null.
5394         * Maintained by poll() under the instance lock.
5395         * <p>
5396         * Is volatile so can be accessed without a lock.
5397         */
5398        private volatile GenProps genProps = new GenProps(); // Start with defaults.
5399    
5400    
5401    
5402    
5403        /**If true, we are in purge mode, zapping files before answering each query.
5404         * Set true when we hit or exceed the cache size limit,
5405         * set false when we drop below the cache low-water mark.
5406         * <p>
5407         * This must be accessed only under the instance lock,
5408         * and only by _getRawFileViaPersistentCache().
5409         */
5410        //private boolean discCachePurgeMode; // Initially false.
5411    
5412        /**Object representing one (partially) cached file on disc.
5413         * This is immutable, serialisable, and totally ordered.
5414         * <p>
5415         * The natural sort order is LRU (Least-Recently Used)
5416         * first, with ties (which should be very rare) broken
5417         * by increasing cached length (ignoring thumbnails, etc)
5418         * (to represent the cost expended getting the data to cache),
5419         * and then exhibit name.
5420         * <p>
5421         * This carries the minimum that need be known about each
5422         * file on disc.
5423         * <p>
5424         * The item is immutable to avoid causing difficulties when
5425         * already inserted in a sorted collection.
5426         * <p>
5427         * We store the timestamp of the original source exhibit file;
5428         * if it doesn't match the current
5429         * exhibit timestamp then this cache entry and its cached data
5430         * has to be regarded as invalidated and discarded.
5431         */
5432        private static final class CachedFile implements Comparable<CachedFile>,
5433                                                         Serializable, ObjectInputValidation,
5434                                                         Cloneable
5435            {
5436            /**The official name of the exhibit; never null and always a valid exhibit name. */
5437            final Name.ExhibitFull name;
5438    
5439            /**The master-exhibit timestamp; strictly positive. */
5440            final long timestamp;
5441    
5442            /**The initial portion cached on disc (bytes); never negative. */
5443            final long cachedLength;
5444    
5445            /**The time last written or read; non-negative.
5446             * May be zero if no exhibit data is cached.
5447             */
5448            public long getLastAccessed()
5449                {
5450                return(lastAccessed);
5451                }
5452    
5453            /**The time last written or read; non-negative.
5454             * We will use setLastModified(System.currentTimeMillis())
5455             * to enforce our notion of `now', since the filesystem
5456             * may be mounted from a remote server with a different
5457             * clock to ours.
5458             * <p>
5459             * We only do the setLastModified() when writing to the
5460             * filesystem anyway, or in the special case of
5461             * reading the first byte of an exhibit.  We do this to
5462             * save (synchronous) disc traffic updating timestamps
5463             * when in fact this should all be in the metadata
5464             * and is only needed if that is lost for some reason.
5465             * <p>
5466             * Logically final, but made mutable for touchedEntry()
5467             * to be able to overwrite in a cloned copy.
5468             * <p>
5469             * May be zero if only metadata but no exhibit data is present.
5470             */
5471            /* final */ long lastAccessed;
5472    
5473            /**The size of the serialised thumbnail object, zero if none; never negative. */
5474            final int tnBytes;
5475    
5476    
5477            /**Creates a raw entry given all the data about the entry.
5478             * The name must be a syntactically-correct exhibit name.
5479             * <p>
5480             * Should not be called directly from outside the class.
5481             */
5482            private CachedFile(final Name.ExhibitFull _name,
5483                               final long _ts,
5484                               final long _len,
5485                               final long _lastA,
5486                               final int _tnBytes)
5487                throws IllegalArgumentException
5488                {
5489                name = _name;
5490                timestamp = _ts;
5491                cachedLength = _len;
5492                lastAccessed = _lastA;
5493                tnBytes = _tnBytes;
5494    
5495                // Verify object state.
5496                try { validateObject(); }
5497                catch(final InvalidObjectException e)
5498                    { throw new IllegalArgumentException(e.getMessage()); }
5499                }
5500    
5501            /**Returns true if this exhibit's metadata is essentially equivalent to another one.
5502             * This insists that everything but the last-accessed timestamp is correct.
5503             * <p>
5504             * The result is false if the argument is null.
5505             */
5506            boolean isEquivalent(final CachedFile other)
5507                {
5508                if(other == null) { return(false); }
5509                return(name.equals(other.name) &&
5510                       (timestamp == other.timestamp) &&
5511                       (cachedLength == other.cachedLength) &&
5512                       (tnBytes == other.tnBytes));
5513                }
5514    
5515            /**Do we have thumbnails?
5516             * True if the thumbnails file is non-zero length.
5517             * Does not prove that we can load and/or use the thumbnail
5518             * file even if this returns true.
5519             */
5520            boolean hasThumbnails()
5521                { return(tnBytes != 0); }
5522    
5523            /**Returns thumbnails; never null.
5524             * In case of difficulty, eg if thumbnails do not exist
5525             * or cannot be deserialised, throws IOException.
5526             * <p>
5527             * We're fairly dumb about this,
5528             * assuming that all aurgument have been validated.
5529             */
5530            final ExhibitThumbnails getThumbnails(final File cacheDir)
5531                throws IOException
5532                {
5533                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5534                final File dataFileDir = new File(dataFile.getParent());
5535                final String fileComponent = dataFile.getName();
5536                final File tnFile = new File(dataFileDir,
5537                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5538                return((ExhibitThumbnails) FileTools.deserialiseFromFile(tnFile, false));
5539                }
5540    
5541            /**Return data chunk from cached file in the result argument.
5542             * This file must live in the usual place, in CACHE_EXDATA_DIR,
5543             * and the name must be a syntactically-correct exhibit name.
5544             * <p>
5545             * We touch the file (set its stamp to `now') to show that it
5546             * has been used iff we retrieve the first byte otherwise
5547             * we rely on the in-memory metadata to hold the last access time;
5548             * this is a belt-and-braces measure in case we have to reconstruct
5549             * the metadata from the disc cache for some reason.
5550             * <p>
5551             * This will throw an IOException or IllegalArgumentException
5552             * if it cannot find the file or there is some other problem.
5553             * <p>
5554             * We're fairly dumb about this, assuming that everything has
5555             * been checked elsewhere.
5556             * <p>
5557             * If the read length is zero the buffer may be null.
5558             *
5559             * @param quick  be as fast as possible, eg don't update timestamps
5560             */
5561            CachedFile getCachedData(final File cacheDir,
5562                                     final int start,
5563                                     final ByteBuffer buf,
5564                                     final boolean quick)
5565                throws IOException
5566                {
5567                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5568    
5569                RandomAccessFile raf = null;
5570                try
5571                    {
5572                    raf = new RandomAccessFile(dataFile, "r");
5573                    raf.getChannel().read(buf, start);
5574                    if(quick) { return(this); }
5575                    final long now = System.currentTimeMillis();
5576                    if(start == 0) { dataFile.setLastModified(now); } // Enforce our clock's `now'.
5577                    return(touchedEntry(now));
5578                    }
5579                finally
5580                    {
5581                    if(raf != null) { raf.close(); }
5582                    }
5583                }
5584    
5585    
5586            /**Recovers the actual on-disc cached exhibit prefix length, or 0 if not present.
5587             * Can be used to double-check the on-disc cached data length before
5588             * adjusting it, but may be slow since it requires a real disc access.
5589             * Use sparingly.
5590             * <p>
5591             * All arguments must be validated and safe before calling this routine;
5592             * for speed it does no further validation.
5593             *
5594             * @param cacheDir  non-null directory containing cache
5595             * @param exhibitName  name of exhibit; syntactically valid, non-null
5596             *
5597             * @return  length cached on disc; 0 if no data cached for this exhibit
5598             */
5599            static long getCachedDataLength(final File cacheDir,
5600                                            final Name.ExhibitFull exhibitName)
5601                {
5602                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), exhibitName.toString());
5603                return(dataFile.length());
5604                }
5605    
5606            /**Fixes-up in-memory cached meta-data for exhibit with that from disc.
5607             * Reconstructs a set of details for the exhibit from disc,
5608             * makes sure that the record of the cached length and timestamps is OK,
5609             * and returns a new record if not (else the original is returned).
5610             * <p>
5611             * In particular this returns a new item if:
5612             * <ul>
5613             * <li>this record of the cached prefix length is wrong, or
5614             * <li>the timestamp in this record is older than that recovered, or
5615             * <li>the record of cached thumbnail data is wrong.
5616             * </ul>
5617             * <p>
5618             * This instance is not altered, nor is the disc-based data.
5619             * <p>
5620             * TODO: Possibly discard all cached data for an exhibit with a changed timestamp.
5621             *
5622             * @param rec  the meta-data as reconstructed from disc; never null
5623             */
5624            CachedFile fixup(final CachedFile rec,
5625                             final SimpleLoggerIF logger)
5626                {
5627                if(!name.equals(rec.name))
5628                    { throw new IllegalArgumentException(); }
5629    
5630                // If something is wrong with this meta-data
5631                // then we must do a fixup.
5632                final boolean lengthWrong = (rec.cachedLength != cachedLength);
5633                if(lengthWrong ||
5634                   (rec.lastAccessed > lastAccessed) || /* old timestamp. */
5635                   (rec.tnBytes != tnBytes))
5636                    {
5637                    // Warn if the problem was cached-data-length,
5638                    // since it might indicate significant lost data.
5639                    if(lengthWrong) { logger.log("[ExhibitDataSimpleCache: WARNING: doing fix-up because in-memory ("+cachedLength+") and on-disc ("+rec.cachedLength+") cached length meta-data differ for "+name+".]"); }
5640    
5641                    // Return fixed-up value...
5642                    return(new CachedFile(name, timestamp,
5643                                          rec.cachedLength,
5644                                          Math.max(rec.lastAccessed, lastAccessed),
5645                                          rec.tnBytes));
5646                    }
5647    
5648                // This instance does not need fixing up, so return as-is.
5649                return(this);
5650                }
5651    
5652            /**Recovers an approximate entry for an extant cached file; never null.
5653             * This file must live in the usual place, in CACHE_EXDATA_DIR,
5654             * and the name must be a syntactically-correct exhibit name.
5655             * <p>
5656             * This will throw an IOException or IllegalArgumentException
5657             * if it cannot find the file or other data that it needs.
5658             * <p>
5659             * This does not adjust the in-memory records,
5660             * nor alter anything on disc.
5661             *
5662             * @return  synthesised cache record; never null
5663             */
5664            static CachedFile recoverExtantCachedFileDetails(final File cacheDir,
5665                                                             final Name.ExhibitFull exhibitName)
5666                throws IOException
5667                {
5668                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), exhibitName.toString());
5669                final File dataFileDir = new File(dataFile.getParent());
5670                final String fileComponent = dataFile.getName();
5671                final File stampFile = new File(dataFileDir,
5672                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TIMESTAMP_KW + '.' + fileComponent);
5673                final File tnFile = new File(dataFileDir,
5674                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5675    
5676                return(new CachedFile(exhibitName,
5677                    // Read the contents of the timestamp file
5678                    // as a decimal signed long.
5679                    // (Note that this file need not be read again since it
5680                    // is assumed not to change and so the in-memory value
5681                    // can be used instead.)
5682                    // We take a missing timestamp file to indicate
5683                    // potential data corruption.
5684                    Long.parseLong(FileTools.readTextFile(stampFile).trim(), 10),
5685    
5686                    dataFile.length(), // Must be present, so must be >= 0.
5687    
5688                    // We take the newest of the last-modified times of
5689                    // the exhibit data and the thumbnails (if present)
5690                    // as our best approximation to the last-accessed time.
5691                    // (Since java.io.File does not have a lastAccessed() member
5692                    // or equivalent.)
5693                    Math.max(dataFile.lastModified(),
5694                             tnFile.lastModified()),
5695    
5696                    // A thumbnail file may be present,
5697                    // in which case we need its size,
5698                    // else we record 0.
5699                    Math.max(0, (int) tnFile.length()) // Thumbnail file, if extant.
5700                    ));
5701                }
5702    
5703            /**If true, touchedEntry() used clone() rather than a constructor. */
5704            private static final boolean _TE_USE_CLONE = true;
5705    
5706    
5707            /**Make touched cache file entry.
5708             * This returns a new object identical to the old (this)
5709             * one except that the last-accessed stamp is set to the current time of day.
5710             * <p>
5711             * This does not alter anything on disc.
5712             */
5713            CachedFile touchedEntry()
5714                { return(touchedEntry(System.currentTimeMillis())); }
5715    
5716            /**Make touched cache file entry.
5717             * This returns a new object identical to the old (this)
5718             * one except that the last-accessed stamp is set to the
5719             * time of day passed in (usually the current time of day).
5720             * <p>
5721             * This does not alter anything on disc.
5722             * <p>
5723             * This could construct a new object,
5724             * but to save time and avoid the heavy lifting in the constructor,
5725             * this uses clone().
5726             *
5727             * @param newLastAccessedTimestamp  time to make touched version of descriptor with,
5728             *     strictly positive
5729             */
5730            CachedFile touchedEntry(final long newLastAccessedTimestamp)
5731                {
5732                assert(newLastAccessedTimestamp > 0);
5733    
5734                if(!_TE_USE_CLONE)
5735                    {
5736                    // Validate arguments via the constructor.
5737                    return(new CachedFile(name,
5738                        timestamp, cachedLength,
5739                        newLastAccessedTimestamp,
5740                        tnBytes));
5741                    }
5742    
5743                // To try to keep this frequent operation as quick as possible,
5744                // knowing that we are overriding just one value,
5745                // we use clone() rather than constructing a new object.
5746                // This avoids redundantly revalidating all the fields.
5747                else
5748                    {
5749                    assert(newLastAccessedTimestamp > 0);
5750    
5751                    final CachedFile result = cloneMe();
5752                    result.lastAccessed = newLastAccessedTimestamp;
5753                    return(result);
5754                    }
5755                }
5756    
5757            /**Clone the object; does not throw CloneNotSupportedException. */
5758            private CachedFile cloneMe()
5759                {
5760                try { return((CachedFile) super.clone()); }
5761                catch(final CloneNotSupportedException e) { throw new Error(); } // Cannot happen.
5762                }
5763    
5764            /**Make a new file on disc and return its CachedFile.
5765             * This is passed the case base dir, the name of the exhibit
5766             * to have a cache entry created, and the timestamp of the
5767             * master exhibit itself.
5768             * <p>
5769             * No zero-length cache entry is created.
5770             * <p>
5771             * No thumbnail is created; any extant one is expunged.
5772             * <p>
5773             * This does not adjust the in-memory records.
5774             */
5775            static CachedFile makeNewDiscCacheFile(final File cacheDir,
5776                                                   final Name.ExhibitFull name,
5777                                                   final long exhibitStamp)
5778                throws IOException
5779                {
5780                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5781                final File dataFileDir = new File(dataFile.getParent());
5782                final String fileComponent = dataFile.getName();
5783                final File stampFile = new File(dataFileDir,
5784                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TIMESTAMP_KW + '.' + fileComponent);
5785                final File tnFile = new File(dataFileDir,
5786                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5787    
5788                // Expunge any extant files.
5789                dataFile.delete();
5790                tnFile.delete();
5791                stampFile.delete();
5792    
5793                // Make sure that any parent directories exist...
5794                dataFileDir.mkdirs();
5795    
5796                // Attempt to write the (signed decimal) timestamp file.
5797                PrintWriter pw = null;
5798                try {
5799                    pw = new PrintWriter(new FileWriter(stampFile));
5800                    pw.println(exhibitStamp);
5801                    }
5802                finally
5803                    {
5804                    if(pw != null) { pw.close(); }
5805                    }
5806    
5807                final long now = System.currentTimeMillis();
5808    
5809    //            // Write a zero-length data file, and set its timestamp
5810    //            // to our notion of `now' rather than the fileserver's.
5811    //            // Ignore any error report for now...
5812    //            dataFile.createNewFile();
5813    //            dataFile.setLastModified(now);
5814    
5815                return(new CachedFile(name,
5816                    exhibitStamp,
5817                    0, // No data cached...
5818                    now,
5819                    0)); // No thumbnail.
5820                }
5821    
5822    
5823            /**Extend given cache file on disc and return new CachedFile.
5824             * This is passed the cache base dir, the name of the exhibit
5825             * to have a cache entry created, the start point of the new data
5826             * and the data itself, which must be more than zero length.
5827             * <p>
5828             * Note that though the new data should normally be exactly at the end
5829             * of the existing data,
5830             * it is not an error, though probably inefficient,
5831             * to start writing before the end of the existing data,
5832             * since that indicates wasted effort re-cacheing data we already have.
5833             * <p>
5834             * It <em>is</em> an error to start writing at a point beyond the end
5835             * of the existing cached data since we don't support "sparse" data
5836             * and on some operating systems (eg UNIX) the gaps would be
5837             * filled with zeros which we probably don't want.
5838             * <p>
5839             * If there is an overlap with the existing data,
5840             * the old (overlapped) data is overwritten silently with the new.
5841             * This may allow us to update parts in-situ to fix errors,
5842             * and to silently handle partly-overlapping concurrent updates.
5843             * <p>
5844             * The cache file is extended with (the non-overlapping part of)
5845             * the given data.
5846             * <p>
5847             * This does not adjust the in-memory records.
5848             * <p>
5849             * This must only be called when other cache write activity
5850             * (such as removing entries) is locked out
5851             * to avoid possible file corruption.
5852             */
5853            CachedFile extendCacheFile(final File cacheDir,
5854                                       final long dataStart,
5855                                       final ByteBuffer data)
5856                throws IOException
5857                {
5858                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5859    
5860                if(dataStart < 0)
5861                    { throw new IllegalArgumentException("dataStart must be non-negative"); }
5862                if(dataStart > cachedLength)
5863                    { throw new IllegalArgumentException("dataStart must be no later than existing cachedLength"); }
5864                if(data == null)
5865                    { throw new IllegalArgumentException("data buffer must not be null"); }
5866                final int len = data.remaining();
5867                if(len < 1)
5868                    { throw new IllegalArgumentException("data length must be greater than zero"); }
5869    
5870                // Double-check that data on disc is as long as we think that it is.
5871                final long actualLenBefore = dataFile.length();
5872                if(dataStart > actualLenBefore)
5873                    { throw new IOException("cannot leave gaps in cached data; actual cached data is shorter than expected"); }
5874    
5875                RandomAccessFile raf = null;
5876                try
5877                    {
5878                    raf = new RandomAccessFile(dataFile, "rw");
5879    //                raf.seek(dataStart);
5880    //                raf.write(data);
5881                    raf.getChannel().write(data, dataStart);
5882                    if(data.remaining() != 0) { throw new IOException("did not write all requested data"); }
5883                    final long now = System.currentTimeMillis();
5884                    dataFile.setLastModified(now); // Enforce our clock's `now'.
5885                    return(new CachedFile(name, timestamp, Math.max(actualLenBefore, dataStart + len), now, tnBytes));
5886                    }
5887                finally
5888                    {
5889                    if(raf != null) { raf.close(); }
5890                    }
5891                }
5892    
5893            /**Remove any thumbnail file and return a new in-memory cache entry.
5894             * This just zaps any thumbnail file and sets the thumbnail length to zero.
5895             * <p>
5896             * This does not mark the entry as updated on disc nor in the new
5897             * record.
5898             * <p>
5899             * This does not adjust the in-memory records itself.
5900             */
5901            CachedFile zapThumbnails(final File cacheDir)
5902                //throws IOException
5903                {
5904                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5905                final File dataFileDir = new File(dataFile.getParent());
5906                final String fileComponent = dataFile.getName();
5907                final File tnFile = new File(dataFileDir,
5908                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5909                tnFile.delete();
5910                return(new CachedFile(name, timestamp, cachedLength, lastAccessed, 0));
5911                }
5912    
5913            /**Remove any exhibit data and return a new in-memory cache entry.
5914             * This just zaps any exhibit data file
5915             * and records the cached length as zero in the returned value.
5916             * <p>
5917             * This attempts to force a deletion even if the file is not obviously present,
5918             * just to make best efforts to purge it,
5919             * and we whinge (on System.err) if we cannot make it go away.
5920             * <p>
5921             * This does not mark the entry as updated on disc nor in the new record.
5922             * <p>
5923             * This does not adjust the in-memory records itself.
5924             */
5925            CachedFile zapData(final File cacheDir)
5926                //throws IOException
5927                {
5928                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5929                final boolean dfExists = dataFile.exists();
5930                final boolean dfDeleted = dataFile.delete();
5931    if(dfExists && !dfDeleted && dataFile.exists()) { System.err.println("ERROR: ExhibitDataSimpleCache: unable to zapData() for: "+name); }
5932                return(new CachedFile(name, timestamp, 0, lastAccessed, tnBytes));
5933                }
5934    
5935            /**Add a thumbnail file and return a new in-memory cache entry.
5936             * This is not allowed if we already think we have a
5937             * thumbnail or if there is a thumbnail file already on disc.
5938             * <p>
5939             * The thumbnail argument must be non-null.
5940             * <p>
5941             * This does not mark the entry as updated on disc or in the new
5942             * record.
5943             * <p>
5944             * This does not adjust the in-memory records itself.
5945             */
5946            CachedFile saveThumbnails(final File cacheDir,
5947                                      final ExhibitThumbnails tns)
5948                throws IOException
5949                {
5950                if(tns == null)
5951                    { throw new IllegalArgumentException(); }
5952    
5953                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5954                final File dataFileDir = new File(dataFile.getParent());
5955                final String fileComponent = dataFile.getName();
5956                final File tnFile = new File(dataFileDir,
5957                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5958    
5959                if(hasThumbnails() || tnFile.exists())
5960                    { throw new IOException("thumbnails already exist"); }
5961    
5962                FileTools.serialiseToFile(tns, tnFile, false, true);
5963                final long now = System.currentTimeMillis();
5964                dataFile.setLastModified(now); // Enforce our clock's `now'.
5965                final int tnBytesNew = (int) tnFile.length();
5966                if(tnBytesNew <= 0)
5967                    {
5968                    tnFile.delete();
5969                    throw new IOException("error saving thumbnail");
5970                    }
5971                return(new CachedFile(name, timestamp, cachedLength, now, tnBytesNew));
5972                }
5973    
5974    
5975    
5976    
5977            /**Equality relies on comparison, ie on last-access time, cached length and name.
5978             */
5979            @Override
5980            public final boolean equals(final Object o)
5981                {
5982                if(!(o instanceof CachedFile)) { return(false); }
5983                return(compareTo((CachedFile) o) == 0);
5984                }
5985    
5986            /**The hash depends on the timestamp.
5987             * Two objects can only be equal if their timestamps and cached lengths are.
5988             */
5989            @Override
5990            public final int hashCode()
5991                {
5992                return(((int) lastAccessed) ^ ((int) cachedLength));
5993                }
5994    
5995            /**Total ordering: oldest first, then smallest first, then by name.
5996             */
5997            public final int compareTo(final CachedFile other)
5998                {
5999                // Oldest first, primarily.
6000                if(lastAccessed < other.lastAccessed) { return(-1); }
6001                if(lastAccessed > other.lastAccessed) { return( 1); }
6002    
6003                // Then smallest first.
6004                if(cachedLength < other.cachedLength) { return(-1); }
6005                if(cachedLength > other.cachedLength) { return( 1); }
6006    
6007                // Then sort by name to break ties, ie provide a total ordering.
6008                return(TextUtils.compare(name, other.name));
6009                }
6010    
6011            /**Rough estimate of maximum space required for new empty cache entry.
6012             * This assumes a maximum-length exhibit name and no data nor thumbnail
6013             * for the exhibit.
6014             * <p>
6015             * This should approximately match what calcDiscSpace() should
6016             * produce for such an exhibit, but the values may not be
6017             * exactly the same.
6018             */
6019            static final int MAX_EMPTY_ENTRY_BYTES_ON_DISC = (int) (
6020                // Basic file name representation...
6021                FileTools.roundUpToFSBlockSize(ExhibitName.MAX_NAME_LENGTH) +
6022                // Extra directory space for timestamp file...
6023                FileTools.roundUpToFSBlockSize(ExhibitName.MAX_NAME_LENGTH-2+32));
6024    
6025            /**Calculate the disc space taken up by this exhibit cache entry all told.
6026             * This guesses a little for overheads such as the directory entry,
6027             * rounding for allocation space on disc, etc,
6028             * and aims to be slightly conservative.
6029             * <p>
6030             * This assumes a roughly UFS-like (UNIX File System) pattern of disc usage.
6031             */
6032            final long calcDiscSpace()
6033                {
6034                // Estimated storage consumed as a result of
6035                // length of final portion of each aux file name in directory entry
6036                // plus some overhead for prefixes,
6037                // the directory entry metadata, etc.
6038                final int dirEntry = 32 + name.getShortName().length();
6039    
6040                // Estimate of space taken by exhibit,
6041                // including full cost of its name from top of tree
6042                // assuming that there is some sharing of per-path-component directory overhead.
6043                final long exhibitDataAndDirSpace =
6044                    FileTools.roundUpToFSBlockSize(cachedLength) + // For exhibit data.
6045                    name.length() + // Storage for path to exhibit.
6046                    dirEntry + // For exhibit-file directory entry.
6047                    // Other amortized per-exhibit overheads such as inodes, etc.
6048                    1024 + FileTools.FS_EST_BLOCK_SIZE_BYTES;
6049    
6050                // Extra storage cost of (small) timestamp file.
6051                final long stampFileSpace =
6052                    FileTools.FS_EST_BLOCK_SIZE_BYTES + // For `timestamp' file block.
6053                    dirEntry; // For timestamp-file directory entry.
6054    
6055                // Extra storage cost of thumbnails file,
6056                // iff the thumbnails exist.
6057                final long tnFileSpace = (tnBytes == 0) ? 0 :
6058                    (FileTools.roundUpToFSBlockSize(tnBytes) + // For `thumbnails' file block.
6059                     dirEntry); // For thumbnail-file directory entry.
6060    
6061                return(exhibitDataAndDirSpace + stampFileSpace + tnFileSpace);
6062                }
6063    
6064            /**Used to zap my disc files, including all auxiliary files for this exhibit.
6065             * Quietly ignores any errors.
6066             * <p>
6067             * This does not adjust the in-memory records.
6068             */
6069            final void zapMe(final File cacheDir)
6070                {
6071                assert(cacheDir != null);
6072                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
6073                final File dataFileDir = new File(dataFile.getParent());
6074                final String fileComponent = dataFile.getName();
6075                final File stampFile = new File(dataFileDir,
6076                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TIMESTAMP_KW + '.' + fileComponent);
6077                final File tnFile = new File(dataFileDir,
6078                    CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
6079                dataFile.delete();
6080                stampFile.delete();
6081                tnFile.delete();
6082                }
6083    
6084            /**Generate human-readable summary of state. */
6085            @Override public String toString()
6086                {
6087                return("CachedFile [cachedLength=" + cachedLength
6088                        + ", lastAccessed=" + (new Date(lastAccessed))
6089                        + ", name=" + name
6090                        + ", timestamp=" + (new Date(timestamp))
6091                        + ", tnBytes=" + tnBytes + "]");
6092                }
6093    
6094            /**My serialisation version number. */
6095            private static final long serialVersionUID = -5246653973669373340L;
6096    
6097            /**Deserialise. */
6098            private void readObject(final ObjectInputStream in)
6099                throws IOException, ClassNotFoundException
6100                {
6101                in.defaultReadObject();
6102                validateObject(); // Validate state immediately.
6103                }
6104    
6105            /**Validate fields/state.
6106             * Called in the constructor and possibly after de-serialising.
6107             * <p>
6108             * Barf if something bad is found.
6109             * (Maybe allow some extra info in debug version.)
6110             */
6111            public void validateObject()
6112                throws InvalidObjectException
6113                {
6114                // Check that all components are sane and safe.
6115                if(!ExhibitName.validNameSyntax(name))
6116                    { throw new InvalidObjectException("bad object: invalid name: " + name); }
6117    
6118                if(timestamp <= 0) // Could maybe have tighter constraint...
6119                    { throw new InvalidObjectException("bad object: invalid timestamp"); }
6120    
6121                if(cachedLength < 0)
6122                    { throw new InvalidObjectException("bad object: invalid cached length"); }
6123    
6124                if(lastAccessed < 0) // Could maybe have tighter constraint...
6125                    { throw new InvalidObjectException("bad object: bad access time: " + lastAccessed); }
6126    
6127                if(tnBytes < 0)
6128                    { throw new InvalidObjectException("bad object: invalid thumbnail size"); }
6129                }
6130            }
6131    
6132    
6133        /**Fraction of max cache size that is the low-water mark.
6134         * In the range ]0.0f, 0.1f[ excluding both end points.
6135         * <p>
6136         * We will only do precacheing when the cache size is below
6137         * the low-water mark.
6138         * <p>
6139         * Don't get this too close to 1 to avoid churning the cache
6140         * when loading large single blocks of data
6141         * or upon other minor disturbances.
6142         */
6143        private static final float LOW_WATER_FRACTION = 0.95f;
6144    
6145        /**Routine to do incremental pre-cacheing.
6146         * Exits as soon as it detects other threads queueing for the cache lock.
6147         * <p>
6148         * This relies on the following items being fetched and entirely
6149         * maintained by other activity/methods, probably under poll():
6150         * <ul>
6151         * <li>GenProps
6152         * <li>AllExhibitProperties
6153         * </ul>
6154         * <p>
6155         * This will only do any data prefetching while the cache is
6156         * below the low-water mark, and will limit the amount
6157         * of client and master resource used.
6158         * <p>
6159         * We won't actually start any precacheing until
6160         * we see some evidence of user/upstream activity that
6161         * might eventually benefit from it.
6162         * <p>
6163         * This will also only precache while the "aggressive" flag is set,
6164         * and this should be set true only when the system is not busy.
6165         * <p>
6166         * This may also involve precomputation/preloading of optional data,
6167         * though should not be relied in lieu of other methods to keep this fresh,
6168         * so we may have work to do even if there is no space for prefetching.
6169         * <p>
6170         * This also incrementally checks the cache for consistency with the
6171         * current exhibit properties, ie timestamp, size, hashes.
6172         * <p>
6173         * When running as a cloud instance, with bandwidth and CPU metered/charged,
6174         * we may resist precaching exhibit data and all but the most popular thumbnails.
6175         * <p>
6176         * This does not hold a cache lock for its duration,
6177         * but does hold a private lock to protect its internal state
6178         * because it must not be multi-threaded;
6179         * any attempt to run this in a second thread is quietly vetoed.
6180         */
6181        private void _doPreCache(final GenProps gp)
6182            {
6183            // If not in an aggressive mode
6184            // then return immediately.
6185            if(!_aggressive) { return; }
6186    
6187            // If no evidence of continuing downstream use of this cache instance
6188            // then reduce the polling frequency/effort.
6189            if(!_userRequestedDataFromCache && Rnd.fastRnd.nextBoolean()) { return; }
6190    
6191            // If it is too soon to do any more precacehing
6192            // then return immediately.
6193            if(System.currentTimeMillis() < _noMorePrecacheUntil) { return; }
6194    
6195            // If the cache is too full to allow any precacheing
6196            // then return immediately.
6197            if(!metaData.canPrecache(cacheDir)) { return; }
6198    
6199            // If the cache is current read- or write- locked
6200            // of if there is anyone waiting to acquire a cache lock,
6201            // then quit immediately so as not to cause extra contention and so as
6202            // to keep precacheing as unintrusive as possible.
6203            if(rwl.isWriteLocked() || (rwl.getReadLockCount() > 0) || rwl.hasQueuedThreads()) { return; }
6204    
6205            // If the system is (temporarily) conserving power
6206            // then return immediately.
6207            if(GenUtils.mustConservePower()) { return; }
6208    
6209            // If we're already running a precache thread
6210            // then don't try to start another one.
6211            if(_preCacheLock.isLocked()) { return; }
6212    
6213            // Spin off an I/O-bound thread for the next bit of precacheing.
6214            // Discardable to avoid blocking poll() for long periods.
6215            // This is executed immediately or not at all if the pool if full.
6216            ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){ public final void run() {
6217                // Don't attempt to precache if already in progress...
6218                if(!_preCacheLock.tryLock()) { return; }
6219                try
6220                    {
6221                    final long startTime = System.currentTimeMillis();
6222    
6223                    // If it is too soon to do any more precacehing
6224                    // then return immediately.
6225                    if(startTime < _noMorePrecacheUntil) { return; }
6226    
6227                    // If we have been asked to consume <= 2% resources
6228                    // then just don't even try precaching.
6229                    final int websvr_bw_limiter =
6230                        gp.getWEBSVR_BW_LIMITER() * LocalProps.getServerSlowdownFactor();
6231                    if(websvr_bw_limiter >= 50) { return; }
6232    
6233                    // OK, do some precache work...
6234                    final AllExhibitProperties aep = _AEP;
6235                    final AllExhibitImmutableData aeid = aep.aeid;
6236    
6237                    // Force expiry of the precache iterator
6238                    // if we can tell that the exhibit set has changed
6239                    // since the iterator was created.
6240                    final Long exhibitHash = new Long(aep.longHash);
6241    
6242                    try
6243                        {
6244                        if((_precacheExhibitHash != null) && !exhibitHash.equals(_precacheExhibitHash))
6245                            { _precacheIterator = null; }
6246    
6247                        // If the precache iterator has `expired'
6248                        // then reset it.
6249                        if((_precacheIterator == null) ||
6250                           !_precacheIterator.hasNext())
6251                            {
6252                            // If the hash is (non-null and) equal to the exhibit hash,
6253                            // it means that there is no further precaching work to do
6254                            // for now.
6255                            if(exhibitHash.equals(_precacheExhibitHash))
6256                                {
6257                                // Postpone precacheing until the exhibit set changes
6258                                // OR data is requested from the cache by a visitor.
6259                                _userRequestedDataFromCache = false;
6260                                _precacheExhibitHash = null;
6261                                logger.log("[ExhibitDataSimpleCache: stopped precaching until more user activity seen.]");
6262                                return;
6263                                }
6264    
6265                            final ScorerCacheIF scorers = null; // FIXME: should get access to RealThing(TM).
6266    
6267                            // We (re)set it to the full set of
6268                            // currently valid (full) exhibit names,
6269                            // since those are the only things that we should be
6270                            // aggressively precaching (there may be defunct
6271                            // items in the metadata that have been deleted
6272                            // from the exhibit set, for example).
6273                            //
6274                            // Sort the list by recent download popularity,
6275                            // then sort the list "best"-first,
6276                            // at least to a quick approximation,
6277                            // since we usually will not have the
6278                            // fully-computed "goodness" data available
6279                            // when an exhibit set changes for example.
6280                            //
6281                            // We give priority to exhibits with
6282                            // no metadata or thumbnails cached yet.
6283                            //
6284                            // For speed we'll accept stale/approximate data,
6285                            // as this only alters the order of processing.
6286                            final EventVariableValue downloads = varMgr.getEventValue(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD, EventPeriod.VLONG, false);
6287                            final ArrayList<Name.ExhibitFull> al = new ArrayList<Name.ExhibitFull>(aeid.getAllExhibitNamesSorted());
6288                            Collections.sort(al, (new Comparator<Name.ExhibitFull>(){
6289                                /**Sort "best"-first, being prepared to accept stale data if need be. */
6290                                public final int compare(final Name.ExhibitFull s1, final Name.ExhibitFull s2)
6291                                    {
6292                                    // First give priority to exhibits with
6293                                    // no metadata or thumbnails cached yet.
6294                                    final CachedFile cf1 = metaData.exhibitGetInfo(s1);
6295                                    final boolean hasMetaData1 = (cf1 != null);
6296                                    final CachedFile cf2 = metaData.exhibitGetInfo(s2);
6297                                    final boolean hasMetaData2 = (cf2 != null);
6298                                    // Item without metadata should come first.
6299                                    if(hasMetaData1 != hasMetaData2)
6300                                        { return(hasMetaData1 ? +1 : -1); }
6301    
6302                                    final ExhibitMIME.ExhibitTypeParameters exhibitType1 = ExhibitMIME.getExhibitType(ExhibitName.getExtensionComponent(s1).toString());
6303                                    final boolean needsTN1 = (cf1 != null) && !cf1.hasThumbnails() &&
6304                                                             (exhibitType1 != null) && exhibitType1.canPossiblyCreateThumbnailOfSameMIMEType();
6305                                    final ExhibitMIME.ExhibitTypeParameters exhibitType2 = ExhibitMIME.getExhibitType(ExhibitName.getExtensionComponent(s2).toString());
6306                                    final boolean needsTN2 = (cf2 != null) && !cf2.hasThumbnails() &&
6307                                                             (exhibitType2 != null) && exhibitType2.canPossiblyCreateThumbnailOfSameMIMEType();
6308                                    // Item with missing thumbnail should come first.
6309                                    if(needsTN1 != needsTN2)
6310                                        { return(needsTN2 ? +1 : -1); }
6311    
6312                                    // Now sort with more-downloads (more-popular) item first.
6313                                    final int dl1 = downloads.getCount(s1);
6314                                    final int dl2 = downloads.getCount(s2);
6315                                    if(dl1 > dl2) { return(-1); /* Correct order. */ }
6316                                    if(dl1 < dl2) { return(+1); /* Wrong order. */ }
6317    
6318                                    // If we've not yet taken much time on this sort/round
6319                                    // (and we're not conserving CPU temporarily or permanently)
6320                                    // then we can potentially force items' EPCM values up-to-date as we go.
6321                                    final boolean saveCPU = GenUtils.mustConserveCPU() ||
6322                                        ((System.currentTimeMillis() - startTime) > MAX_dPC_SPIN_TIME_MS/2);
6323    
6324                                    final ScorerCacheIF scorers = null; // FIXME: should get access to RealThing(TM).
6325    
6326                                    // Then sort by (integer) goodness value.
6327                                    final int g1 = aep.getExhibitPropsComputableMutable(s1, saveCPU, gp, ExhibitDataSimpleCache.this, scorers).getGoodness();
6328                                    final int g2 = aep.getExhibitPropsComputableMutable(s2, saveCPU, gp, ExhibitDataSimpleCache.this, scorers).getGoodness();
6329                                    if(g1 > g2) { return(-1); /* Correct order. */ }
6330                                    if(g1 < g2) { return(+1); /* Wrong order. */ }
6331                                    return(0); // Identical goodness.
6332                                    }
6333                                }));
6334    
6335                            // Compute the "best" exhibits for enhanced precaching.
6336                            final int lastIndex = al.size() / 10;
6337                            // Filter to a list of "good", non-stale-EPCM items.
6338                            final List<Name.ExhibitFull> bestFiltered = new ArrayList<Name.ExhibitFull>(al.subList(0, Math.min(lastIndex, MAX_BEST_EX_PRECACHED)));
6339                            // Since items to be trimmed will more often be at the end
6340                            // work backwards through the list to minimise copying.
6341                            for(int i = bestFiltered.size(); --i >= 0; )
6342                                {
6343                                final Name.ExhibitFull ex = bestFiltered.get(i);
6344                                final ExhibitPropsComputableMutable epcm =
6345                                    aep.getExhibitPropsComputableMutable(ex, true, gp, ExhibitDataSimpleCache.this, scorers);
6346                                // Remove trivially-stale or bad entry.  (Continue to trust a stale entry however...)
6347                                    if((epcm == null) || epcm.isTriviallyStale() || (epcm.getGoodness() <= 0))
6348                                        { bestFiltered.remove(i); }
6349                                    }
6350    if(IsDebug.isDebug) { logger.log("[ExhibitDataSimpleCache: bestFiltered.size()="+bestFiltered.size()+".]"); }
6351    
6352                            // Update the best-exhibit list atomically...
6353                            synchronized(_bestExhibits)
6354                                {
6355                                _bestExhibits.clear();
6356                                _bestExhibits.addAll(bestFiltered);
6357                                }
6358    
6359                            // Reinsert the "bestFiltered" entries at the start
6360                            // so that they get two bites of the cherry each pass.
6361                            al.addAll(0, bestFiltered);
6362    
6363                            // Help avoid unused memory being held for a long time.
6364                            bestFiltered.clear();
6365                            al.trimToSize();
6366    
6367                            // Create/save the iterator.
6368                            _precacheIterator = al.iterator();
6369    
6370                            // Note the hash of the current exhibit set.
6371                            _precacheExhibitHash = exhibitHash;
6372    
6373                            // Note restart of precaching scan of all exhibits.
6374                            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHERESTART);
6375                            logger.log("[ExhibitDataSimpleCache: started precaching round for "+aeid.length+" exhibits.]");
6376                            }
6377    
6378                        // Do the precacheing work.
6379                        // Stop when we have used up all the pending names to examine,
6380                        // or we've used a just-GUI-significant amount of time,
6381                        // or we see other users queueing to get into the cache...
6382                        do
6383                            {
6384                            // If cache is too full to allow any precacheing
6385                            // then return immediately.
6386                            if(!metaData.canPrecache(cacheDir)) { return; }
6387    
6388                            // If there is anyone waiting to acquire a cache lock,
6389                            // then quit immediately so as not to cause extra contention
6390                            // and so as to keep precacheing as unintrusive as possible.
6391                            if(rwl.hasQueuedThreads()) { return; }
6392    
6393                            // Stop if we have run out of exhibits to precache...
6394                            if(!_precacheIterator.hasNext())
6395                                {
6396                                // Force recompute of 'fully-loaded' measure for this new AEP.
6397                                metaData.getFullyCachedCount(aep, true);
6398    
6399                                if(_precacheExhibitHash != null)
6400                                    {
6401                                    logger.log("[ExhibitDataSimpleCache: finished precaching round with no work to do.]");
6402                                    return;
6403                                    }
6404    
6405                                logger.log("[ExhibitDataSimpleCache: finished precaching round with work still pending.]");
6406    
6407                                // Force the meta-data out to disc
6408                                // since we did do some work this time around.
6409                                metaData.setNeedsSave();
6410                                return;
6411                                }
6412    
6413                            // Note precache examination of next exhibit.
6414                            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHEEXAMINED);
6415    
6416                            // Get the next item to examine and make sure that
6417                            // it is still a valid exhibit.
6418                            final Name.ExhibitFull nextToExamine = _precacheIterator.next();
6419                            final ExhibitStaticAttr esa = aeid.getStaticAttr(nextToExamine);
6420                            if(esa == null)
6421                                {
6422                                // If we find that an exhibit name from the iterator
6423                                // is no longer valid
6424                                // then force creation of a new iterator from
6425                                // the new exhibit set.
6426                                // This is to stop us wasting time on deleted exhibits.
6427                                // This should restart this precacheing round
6428                                // rather than terminating it.
6429                                final Set<Name.ExhibitFull> empty = Collections.emptySet();
6430                                _precacheIterator = empty.iterator();
6431                                _precacheExhibitHash = null;
6432                                break;
6433                                }
6434    
6435                            // Update one exhibit.
6436                            // Keep trying to bring it up-to-date if:
6437                            //   * This exhibit is not completely cached.
6438                            //   * We have not run out of time.
6439                            //   * We draw a Gaussian random number smaller than
6440                            //     the exhibit's goodness.
6441                            // The aim is that better exhibits should have relatively
6442                            // more time spent on bringing them up to date.
6443                            //
6444                            // An IOException terminates this burst of precaching.
6445                            //
6446                            // Note that we are prepared to try to force
6447                            // the EPCM to be (re)computed if necessary
6448                            // as a service to the rest of the system
6449                            // if we're are prepared to 'precache' these values anyway.
6450                            final ScorerCacheIF scorers = null; // FIXME: should ideally get access to RealThing(TM).
6451                            while(_updateOneExhibit(esa, gp, aep, false) &&
6452                                  (Rnd.fastRnd.nextGaussian() <
6453                                        1.5*aep.getExhibitPropsComputableMutable(esa.getExhibitFullName(),
6454                                                                                 (!CALC_MISSING_EPCM_DURING_PRECACHE) || Rnd.fastRnd.nextBoolean(),
6455                                                                                 gp,
6456                                                                                 ExhibitDataSimpleCache.this,
6457                                                                                 scorers).getGoodnessAsFloat()))
6458                                {
6459                                // If there is anyone waiting to acquire a lock,
6460                                // then quit immediately to let them in
6461                                // to keep precacheing as unintrusive as possible.
6462                                if(rwl.hasQueuedThreads()) { return; }
6463    
6464                                // If we run out of time,
6465                                // then quit immediately.
6466                                if((System.currentTimeMillis() - startTime) > MAX_dPC_SPIN_TIME_MS) { return; }
6467                                }
6468    
6469                        // If we still haven't taken too much time
6470                        // then examine another exhibit.
6471                        } while((System.currentTimeMillis() - startTime) < MAX_dPC_SPIN_TIME_MS);
6472                    }
6473                catch(final IOException e)
6474                    {
6475                    // Absorb any IOException quietly.
6476                    }
6477                finally
6478                    {
6479                    // See how long this round took,
6480                    // and put off the next bit of precacheing by a multiple.
6481                    //
6482                    // If this round took much longer than intended
6483                    // or upstream is conserving power
6484                    // then postpone the next attempt much further than usual
6485                    // as this may be invoking expecially expensive operations elsewhere,
6486                    // eg filesystem mounts at the server or connection dial-ups, etc.
6487                    //
6488                    // We cap our pause time in case of transient delays.
6489                    //
6490                    // At the end of an entire precaching round
6491                    // put off more precache work for an additional significant time.
6492                    final long now = System.currentTimeMillis();
6493                    final long timeTaken = now - startTime;
6494                    final boolean muchSlowerThanExpected = (timeTaken > Math.max(1024, 3*MAX_dPC_SPIN_TIME_MS));
6495                    final boolean goSlow = muchSlowerThanExpected || upstreamStratum.isUpstreamConserving();
6496    
6497                    _noMorePrecacheUntil = now +
6498                        (((_precacheIterator != null) && _precacheIterator.hasNext()) ? 11 : 2*MAX_dPC_BACKOFF_TIME) +
6499                        Math.min(MAX_dPC_BACKOFF_TIME, (((websvr_bw_limiter>1) ? (timeTaken*websvr_bw_limiter) : timeTaken) << (goSlow ? 3 : 1)));
6500                    }
6501                }
6502                finally { _preCacheLock.unlock(); } } });
6503            }
6504    
6505        /**Partly check the cache data (including metadata, tns, etc) for validity.
6506         * This picks one or more aspects (at random) of the currently cached data
6507         * for the specified exhibit for validity.
6508         * <p>
6509         * Checks include length, timestamp, and hashes
6510         * dependent on the data available.
6511         * <p>
6512         * This is designed to complete reasonably quickly in most cases,
6513         * to perform an incremental check,
6514         * removing or in some cases repairing damaged/corrupt/invalid data.
6515         * <p>
6516         * If this finds the cache entry to be broken somehow
6517         * then this routine may delete the cache entry entirely,
6518         * or repair the data, or prune back to some valid prefix of the data held,
6519         * or just remove the corrupt underlying data to leave the metadata
6520         * to be fixed at a later date.
6521         * <p>
6522         * This will grab locks only as it needs them
6523         * in order to be as unintrusive as possible.
6524         * <p>
6525         * This is not expected to be needed very often,
6526         * but is mainly designed to avoid silent disc corruption,
6527         * and to provide for automatic repair.
6528         *
6529         * @param aep the current exhibit properties; never null
6530         * @param esa the exhibit whose cache data is to be verified; never null
6531         */
6532        private final void _doCacheDataValidityTest(final AllExhibitProperties aep,
6533                                                    final ExhibitStaticAttr esa)
6534            throws IOException
6535            {
6536            assert((aep != null) && (esa != null));
6537    
6538            // Get a snapshot of the cache data, if any, for this exhibit.
6539            CachedFile cf = metaData.exhibitGetInfo(esa.getExhibitFullName());
6540    
6541            // If nothing in the cache for this exhibit then return immediately
6542            // since there is nothing to check or repair!
6543            if(cf == null)
6544                { return; }
6545    
6546    //if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache._doCacheDataValidityTest(): validating exhibit "+esa+"...]"); }
6547    
6548            // If there is a gross change,
6549            // ie in length or timestamp,
6550            // then remove the current cache entry entirely and return.
6551            if((cf.timestamp != esa.timestamp) ||
6552               (cf.cachedLength > esa.length))
6553                {
6554    logger.log("ERROR: cached data timestamp/length wrong for exhibit: " + esa);
6555                _removeCorruptData(cf);
6556                return;
6557                }
6558    
6559            // If thumbnail data exists but is corrupt/unusable then remove it.
6560            if(cf.hasThumbnails())
6561                {
6562                ExhibitThumbnails tns = null;
6563    
6564                // Grab cache read lock just long enough to read the thumbnails.
6565                // This is assumed to force a validation check during deserialisation.
6566                _getReadLock(rwl, "ExhibitDataSimpleCache._doCacheDataValidityTest() reading thumbnails", logger);
6567                try { tns = cf.getThumbnails(cacheDir); }
6568                catch(final Exception e) { e.printStackTrace(); }
6569                finally { rwl.readLock().unlock(); }
6570    
6571                // If we couldn't load/validate the thumbnails
6572                // or they are of the less less-safe form
6573                // without built-in timestamp/checksums
6574                // then get a write lock and zap them
6575                // and use the updated metadata subsequently.
6576                // Iff this cache entry is removed entirely while we are working
6577                // then we will return.
6578                if((tns == null) || // Couldn't load thumbnails at all.
6579                   ((tns.created <= 0) && !ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) || // Dubious timestamp for non-empty thumbnails.
6580                   ((tns.getSmall() != null) && !tns.getSmall().hasMD5Hash()) || // Missing checksum.
6581                   ((tns.getStandard() != null) && !tns.getStandard().hasMD5Hash())) // Missing checksum.
6582                    {
6583                    logger.log("WARNING: purging damaged or old-style thumbnail for "+esa+": "+tns);
6584                    _getWriteLock(rwl, "ExhibitDataSimpleCache._doCacheDataValidityTest() purging damaged/old thumbnail file", logger);
6585                    try
6586                        {
6587                        final CachedFile reReadInfo = metaData.exhibitGetInfo(esa.getExhibitFullName());
6588                        if(reReadInfo == null) { return; /* Entry now gone. */ }
6589                        metaData._update(rwl, cf = reReadInfo.zapThumbnails(cacheDir).touchedEntry(), logger);
6590                        }
6591                    catch(final Exception e) { e.printStackTrace(); }
6592                    finally { rwl.writeLock().unlock(); }
6593                    }
6594                }
6595    
6596            // If we don't have the accession data then we have no hashes to check.
6597            final ExhibitPropsLoadable exhibitPropsLoadable = aep.getExhibitPropsLoadable(esa.getExhibitFullName());
6598            final AccessionData ad = exhibitPropsLoadable.getAccessionMetadata();
6599            if(ad == null) { return; }
6600    
6601            // OK, note when we are about to start the validation.
6602            final long startTime = System.currentTimeMillis();
6603    
6604            // We can check the whole-exhibit hashes
6605            // if we have the whole exhibit in cache
6606            // AND we have the hashes available.
6607            final boolean wholeExhibitInCache = (cf.cachedLength == esa.length);
6608            final boolean canCheckWholeExhibit = wholeExhibitInCache &&
6609                ((ad.hashCRC32 != null) || (ad.hashMD5 != null));
6610    
6611            if(canCheckWholeExhibit)
6612                {
6613                // Set up our hashes...
6614                final java.util.zip.Checksum hCRC32 = new java.util.zip.CRC32();
6615                final MessageDigest hMD5;
6616                try { hMD5 = MessageDigest.getInstance(CoreConsts.HASH_MD5); }
6617                catch(final NoSuchAlgorithmException e) // Should never happen...
6618                    { throw new Error("could not find "+CoreConsts.HASH_MD5+" digester!"); }
6619    
6620                // Create our buffer...
6621                // We can economise on memory for very short exhibits...
6622                final byte buf[] = new byte[Math.min(AccessionData.HASH_BLOCK_SIZE_BYTES, (int) cf.cachedLength)];
6623    
6624                // To examine the whole file without locking out cache activity
6625                // we'll read the exhibit file a block at a time,
6626                // reacquiring the cache read lock each time.
6627                // We ignore any data added to the cached file while we work.
6628                // If the file gets *shorter* while we're working
6629                // (this *might* be legit if the exhibit was tossed out of cache)
6630                // or we encounter any other unexpected event
6631                // (eg an IOException caused by an unreadable disc sector),
6632                // then we just trash the whole cached entry.
6633                final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), esa.getCharSequence().toString());
6634                final DataInputStream dis = new DataInputStream(new FileInputStream(dataFile));
6635                try
6636                    {
6637                    for(int block = 0; ; ++block)
6638                        {
6639                        // Compute start offset of this block.
6640                        final int start = block * AccessionData.HASH_BLOCK_SIZE_BYTES;
6641    
6642                        // Stop when we run out of file!
6643                        if(start >= cf.cachedLength) { break; /* Finished. */ }
6644    
6645                        // Compute size of block to read.
6646                        final int len = Math.min(AccessionData.HASH_BLOCK_SIZE_BYTES,
6647                                                    (int) (cf.cachedLength - start));
6648    
6649                        // Grab cache read lock just long enough to read one block.
6650                        _getReadLock(rwl, "ExhibitDataSimpleCache._doCacheDataValidityTest() reading one block of full file", logger);
6651                        try { dis.readFully(buf, 0, len); }
6652                        finally { rwl.readLock().unlock(); }
6653    
6654                        // Update the hashes.
6655                        hCRC32.update(buf, 0, len);
6656                        hMD5.update(buf, 0, len);
6657                        }
6658    
6659                    // Now extract the final hashes and compare them
6660                    // against the accession data...
6661                    final Integer hashCRC32 = new Integer((int) hCRC32.getValue());
6662                    final ROByteArray hashMD5 = new ROByteArray(hMD5.digest());
6663    
6664                    // Check the CRC32 hash if we have it in the accession data.
6665                    if((ad.hashCRC32 != null) && !hashCRC32.equals(ad.hashCRC32))
6666                        {
6667                        logger.log("ERROR: cached data failed CRC32 check ("+hashCRC32+"/"+ad.hashCRC32+") for exhibit: " + esa);
6668                        _removeCorruptData(cf);
6669                        return;
6670                        }
6671    
6672                    // Check the MD5 hash if we have it in the accession data.
6673                    if((ad.hashMD5 != null) && !hashMD5.equals(ad.hashMD5))
6674                        {
6675                        logger.log("ERROR: cached data failed MD5 check ("+hashMD5+"/"+ad.hashMD5+") for exhibit: " + esa);
6676                        _removeCorruptData(cf);
6677                        return;
6678                        }
6679    
6680                    // OK, exhibit was fully validated!
6681                    StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHE_VALIDATION);
6682    //if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache._doCacheDataValidityTest(): whole-file hashes verified for exhibit: "+esa+".]"); }
6683    
6684                    final long endTime = System.currentTimeMillis();
6685                    final long dur = endTime - startTime;
6686                    if(dur > 60000) // Warn about relatively slow validations...
6687                        { logger.log("[ExhibitDataSimpleCache: validation took "+dur+"ms for "+esa+".]"); }
6688                    return;
6689                    }
6690                catch(final IOException e)
6691                    {
6692                    _removeCorruptData(cf); // Zap dodgy data ASAP.
6693                    logger.log("ERROR: unexpected IOException when checking cached data (so removed ALL data) for exhibit: " + esa + ": " + e.getMessage());
6694                    e.printStackTrace();
6695                    return;
6696                    }
6697                finally
6698                    { dis.close(); }
6699                }
6700    
6701    
6702            // TODO: write the per-block hash checks...
6703    
6704            }
6705    
6706        /**Remove cached exhibit data identified as corrupt.
6707         * If possible then this clears up the metadata and raw data, etc,
6708         * but if not then this just removes the suspect raw data, thumbnails, etc,
6709         * from the disc cache.
6710         * <p>
6711         * The metadata will have to get fixed later.
6712         * <p>
6713         * Since this will have to grab a cache write lock
6714         * and then the metadata lock if it is to remove the corrupt data,
6715         * the caller must not have a read lock in place for example.
6716         *
6717         * @param cf   exhibit cache metadata; never null
6718         */
6719        private void _removeCorruptData(final CachedFile cf)
6720            throws IOException
6721            {
6722            // Note removal of corrupt exhibit from cache...
6723            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEREM_CORRUPT);
6724    
6725            logger.log("WARNING: ExhibitDataSimpleCache: cached exhibit corrupt so being completely purged from cache: " + cf.name);
6726            if(!metaData.exhibitRemoveCacheEntry(rwl, cacheDir, cf.name, false, logger, statsIDSCGEN))
6727                {
6728                // If we could not remove the entry neatly,
6729                // eg because the metadata was read-only,
6730                // then just remove the corrupt underlying data brutally!
6731                logger.log("WARNING: ExhibitDataSimpleCache: cannot update metadata; just removing raw data: " + cf.name);
6732                // Grab both locks in correct order...
6733                _getWriteLock(rwl, "ExhibitDataSimpleCache._removeCorruptData() raw data removal", logger);
6734                try
6735                    {
6736                    synchronized(this)
6737                        { cf.zapMe(cacheDir); /* Zap raw cache data. */ }
6738                    }
6739                finally
6740                    { rwl.writeLock().unlock(); }
6741                }
6742            return;
6743            }
6744    
6745        /**If true then try to at least partially compute EPCM while precacheing.
6746         * To do this fully we would need access to Scorers, etc,
6747         * which may simply not be available in this cacheing layer,
6748         * so at most we can do a fast approximation for EPCM values completely absent.
6749         */
6750        private final static boolean CALC_MISSING_EPCM_DURING_PRECACHE = false;
6751    
6752        /**Update one exhibit incrementally during precaching.
6753         * Must not be called within a lock; will grab any locks it needs.
6754         *
6755         * @param forceDataFetch  if true, strongly encourage data fetch,
6756         *     ie extension of exhibit data if at all possible
6757         *
6758         * @return true  iff this exhibit is not completely cached,
6759         *     ie may benefit from another call to this routine immediately
6760         */
6761        private boolean _updateOneExhibit(final ExhibitStaticAttr esa,
6762                                          final GenProps gp,
6763                                          final AllExhibitProperties aep,
6764                                          final boolean forceDataFetch)
6765            throws IOException
6766            {
6767            boolean notCompletelyCached = false;
6768    
6769            CachedFile icf = metaData.exhibitGetInfo(esa.getExhibitFullName());
6770    
6771            // Always try to get thumbnails first for any exhibit.
6772            // Much more useful to show in catalogue pages,
6773            // takes less space and puts less strain on the master
6774            // (which which usually have them cached locally).
6775            //
6776            // We'll create a cache entry for this exhibit first if need be.
6777            // This allows us to pre-cache just metadata
6778            // when the server is in a "slow" (less aggressive) mode.
6779            if(metaData.canPrecacheThumbnails(cacheDir))
6780                {
6781    //            CachedFile icf = metaData.exhibitGetInfo(esa.filePath);
6782                if(icf == null)
6783                    {
6784                    // Try to create new entry...
6785                    // Ignore the return code...
6786                    metaData.exhibitCreateNewCacheEntry(rwl,
6787                                                        cacheDir,
6788                                                        esa,
6789                                                        logger,
6790                                                        statsIDSCGEN);
6791                    // ...but try again to get the cache entry.
6792                    icf = metaData.exhibitGetInfo(esa.getExhibitFullName());
6793                    }
6794    
6795                // Is it worth precaching a thumbnail?
6796                // Only do it if:
6797                //  0) There is a cache entry for this exhibit.
6798                //  1) There is not yet a thumbnail.
6799                // AND
6800                //  2) A thumbnail can potentially be made.
6801                // THEN
6802                //  3) We try to fetch the thumbnail directly from the
6803                //     back end without forcing it to be created...
6804                // AND IF THAT DOESN'T GET US THE THUMBNAIL, THEN
6805                //  4) If we have the full exhibit data immediately available
6806                //     (note that this might not be in local cache,
6807                //     eg on the master this might be coming from the
6808                //     filesystem still), and
6809                //     if it looks worth doing, then we call the normal
6810                //     front-end getThumbnails() routine to MAKE the
6811                //     thumbnail from the exhibit data.
6812                if((icf != null) &&
6813                   !icf.hasThumbnails() &&
6814                   (((ExhibitMIME.getInputFileType(esa.getExhibitFullName())) != null) &&
6815                   (ExhibitMIME.getInputFileType(esa.getExhibitFullName())).canPossiblyCreateThumbnailOfSameMIMEType()))
6816                    {
6817                    // Try to force thumbnail to be fetched/made.
6818                    if(_getThumbnails(esa.getExhibitFullName(), true, true) == null)
6819                        {
6820                        // If we can't get the thumbnail made this time
6821                        // then clear the _precacheExhibitHash
6822                        // to indicate there is work still to be done,
6823                        // eg on another precache pass if need be.
6824                        _precacheExhibitHash = null; // Work to be done...
6825    
6826                        // Most of the time, give up quickly for this exhibit
6827                        // if we couldn't get thumbnails in place.
6828                        //
6829                        // We generally want to try very hard
6830                        // to get thumbnails in place first.
6831                        if(0 != Rnd.fastRnd.nextInt(7))
6832                            { return(false); }
6833                        }
6834                    }
6835                }
6836    
6837    
6838            // If this is a cloud for which extra bandwidth may be expensive
6839            // then do not pre-cache any exhibit data; be content with thumbnails.
6840            if(LocalProps.isCloudMirrorInstance())
6841                { return(false); } // Treat exhibit as fully precached for this instance type.
6842    
6843            // If no evidence of continuing downstream use of this cache instance
6844            // then return immediately; be content with thumbnails.
6845            if(!_userRequestedDataFromCache)
6846                { return(false); }
6847    
6848            // If possible, attempt to fetch first/next chunk of an exhibit.
6849            // We are very keen to do this for "popular"/tiny/"best" exhibits,
6850            // even when the cache is quite full and we otherwise might not.
6851            // We also like to bring in the tail of very-nearly-completely-cached
6852            // exhibits so that we can promptly check hashes, etc.
6853            final long cachedLen = (icf == null) ? 0 : icf.cachedLength;
6854            final boolean dataNotFullyCached = cachedLen < esa.length;
6855            final boolean canPrecacheAtAll = metaData.canPrecache(cacheDir);
6856            if(metaData.canPrecacheExhibitData(cacheDir) ||
6857               (canPrecacheAtAll && forceDataFetch /* Asked to extend if at all possible. */ ) ||
6858               (canPrecacheAtAll && metaData.someFree(cacheDir) &&
6859                    ((esa.length <= MAX_TRANSFER_CHUNK_SIZE) /* Small exhibit. */ ||
6860                     ImageUtils.canBeOwnThumbnail(esa) /* Tiny exhibit. */ ||
6861                     _bestExhibits.contains(esa.getExhibitFullName()) /* Good exhibit. */ ||
6862                     ((icf != null) && dataNotFullyCached &&
6863                        ((esa.length - cachedLen) <= MAX_TRANSFER_CHUNK_SIZE))  /* Very-nearly-completely cached exhibit. */ ||
6864                     isPopularDownload(esa) /* Popular exhibit. */ )))
6865                {
6866                // Attempt to extend the on-disc copy of the exhibit
6867                // by one (possibly maximal) chunk, if not yet fully cached.
6868                if(dataNotFullyCached)
6869                    {
6870                    // Exhibit data is not fully cached locally...
6871                    // Read one whole chunk more for efficiency,
6872                    // up to the lower of the file end or cache limit.
6873                    // For now, we don't bother to align this read.
6874    
6875                    // Find out what initial portion we are prepared to cache.
6876                    final int MAX_CACHEABLE_EX_BYTES = _getMaximumCacheableBytesForOneExhibit(gp);
6877    
6878                    // Max position to aspire to read up to...
6879                    final long readLimit =
6880                            Math.min(esa.length, MAX_CACHEABLE_EX_BYTES);
6881                    // Maximum number of bytes that we will actually read...
6882                    // We read the biggest normally-allowed chunk for efficiency.
6883                    final int toRead = Math.min(MAX_TRANSFER_CHUNK_SIZE,
6884                                                (int) (readLimit - cachedLen));
6885                    if(toRead > 0)
6886                        {
6887                        try
6888                            {
6889                            // Extend our cache if possible.
6890                            _getExhibitDataFromUpstreamToPrecache(esa, aep.aeid, gp, cachedLen, toRead, false);
6891    
6892                            // Note successful fetch/cache of data block.
6893                            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHEEXDATABLOCK);
6894    
6895                            // Read data successfully,
6896                            // but would it be useful to read some more immediately?
6897                            final CachedFile icf2 = metaData.exhibitGetInfo(esa.getExhibitFullName());
6898                            if((icf2 == null) || (icf2.cachedLength < readLimit))
6899                                {
6900                                notCompletelyCached = true; // This exhibit could do with more precaching work.
6901                                }
6902                            else
6903                                {
6904                                logger.log("[ExhibitDataSimpleCache: completely precached data for "+esa+"; will now test data integrity...]");
6905    
6906                                // Now test the exhibit's hash(es), etc,
6907                                // to ensure that everything arrived intact...
6908                                _doCacheDataValidityTest(aep, esa);
6909                                }
6910                            }
6911                        catch(final IOException e)
6912                            {
6913                            // Note error during precaching...
6914                            StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHEERROR);
6915                            throw e;
6916                            }
6917                        finally
6918                            {
6919                            // If file still not read as far as possible/allowed,
6920                            // then note that there is still work to do...
6921                            final CachedFile icf2 = metaData.exhibitGetInfo(esa.getExhibitFullName());
6922                            if((icf2 == null) || (icf2.cachedLength < readLimit))
6923                                {
6924                                _precacheExhibitHash = null; // Work to be done...
6925                                }
6926                            }
6927                        }
6928                    }
6929                }
6930    
6931    
6932            if(CALC_MISSING_EPCM_DURING_PRECACHE)
6933                {
6934                // Try to get mutable properties at least partially computed if necessary.
6935                final ExhibitPropsComputableMutable epcmS =
6936                        aep.getExhibitPropsComputableMutable(esa.getExhibitFullName());
6937                if(epcmS == null)
6938                    {
6939                    // If we find that the current properties are not computed at all
6940                    // then we take that as an indication
6941                    // that others (already passed over) may need (re)computing too.
6942                    _precacheExhibitHash = null; // Quite aggressive...
6943    
6944                    // Now attempt to force partial computation for this value...
6945                    // (As there is no access to Scorers this can only be an approximation.)
6946                    aep.getExhibitPropsComputableMutable(esa.getExhibitFullName(), true, gp, this, null);
6947                    }
6948                }
6949    
6950    // DO OTHER PRE-CACHE OPERATIONS FOR THIS EXHIBIT...
6951    
6952            return(notCompletelyCached);
6953            }
6954    
6955    
6956        /**If true, allow us to try fetching exhibit data from peers rather than master. */
6957        private static final boolean ALLOW_DATA_FETCH_FROM_PEERS = true;
6958    
6959        /**If true, allow us to try fetching thumbnails from peers rather than only the master.
6960         * Since all mirrors/peers should generally cache all thumbnails indefinitely
6961         * then this should not incur any significant extra traffic even in poor circumstances.
6962         * <p>
6963         * This may allow lateral spread of a thumbnail once any mirror has managed to create one
6964         * even when the master is unable (eg due to resource restrictions) to make one.
6965         */
6966        private static final boolean ALLOW_TN_FETCH_FROM_PEERS = true;
6967    
6968        /**Thread-safe Map from mirror ID to strictly-positive rating with "" for master; never null.
6969         * The rating is a synthetic (milliseconds) time to fetch a big data block (or thumbnails).
6970         * Lower values represent peers that have the data available quickly
6971         * more of the time.
6972         * <p>
6973         * All values are strictly positive.
6974         * <p>
6975         * This is periodically purged of stale data (ie inactive peers)
6976         * to keep it from growing without bound.
6977         */
6978        private final Map<String,Long> altDataSourceRating = new Hashtable<String, Long>();
6979    
6980        /**If true then use a cautious strategy to select a peer to talk to. */
6981        private static final boolean PEER_SELECTION_CAUTIOUS = false;
6982    
6983        /**Default rating/time (ms) for "unknown" data source/mirror/peer; strictly positive.
6984         * The value chose corresponds to 10000ms RTT and 1kBps throughput,
6985         * ie much slower than most reasonable peers.
6986         */
6987        private static final int PEER_STATS_UNKNOWN_MS = 10000 + MAX_TRANSFER_CHUNK_SIZE;
6988    
6989        /**Time-constant for updating peer fetch time value; strictly positive.
6990         * Higher values means a lower-pass filter,
6991         * and more robustness in the face of temporary glitches.
6992         * <p>
6993         * A value that is a power of two may result in more efficient code.
6994         * <p>
6995         * A value from 8 to 256 is probably reasonable.
6996         */
6997        private static final int PEER_STATS_TC = PEER_SELECTION_CAUTIOUS ? 64 : 32;
6998    
6999        /**Fake tag we use to indicate a fetch from the master/upstream via the pipeline. */
7000        private static final String MASTER_FAKE_TAG = "";
7001    
7002        /**Try to extend cached data for the specified exhibit.
7003         * By default we try to get this from the master,
7004         * but we may try to fetch it from a peer/mirror instead (P2P)
7005         * to reduce the load on the master.
7006         * <p>
7007         * Also, this may be used to attempt data/error recovery
7008         * if data cannot be fetched from the master for some reason.
7009         *
7010         * @param start  byte offset in exhibit to start reading/fetching for; non-negative
7011         * @param len  number of bytes to read; strictly positive.
7012         * @param forceIt  if true then we try very hard to get the data from a peer,
7013         *     for example to help with master/server error recovery
7014         */
7015        private void _getExhibitDataFromUpstreamToPrecache(final ExhibitStaticAttr esa,
7016                                                           final AllExhibitImmutableData aeid,
7017                                                           final GenProps gp,
7018                                                           final long start,
7019                                                           final int len,
7020                                                           final boolean forceIt)
7021            throws IOException
7022            {
7023            assert(esa != null);
7024            assert(aeid != null);
7025            assert(gp != null);
7026            assert(start <= Integer.MAX_VALUE);
7027            assert(len > 0);
7028    
7029            // Dummy buffer to read the data into.
7030            // The data is discarded; we only want the side-effect of it being cached.
7031            final ByteBuffer buf = ByteBuffer.allocate(len);
7032    
7033            // Attempt to adaptively fetch data from peers if allowed.
7034            // This may significantly reduce load on the master for exhibit data.
7035            if(ALLOW_DATA_FETCH_FROM_PEERS || forceIt)
7036                {
7037                // Choose which peer to contact...
7038                final String peerToTry = _pickPeer();
7039    
7040                // If we selected the master rather than a peer
7041                // then just fall through to the default handler.
7042                if(!MASTER_FAKE_TAG.equals(peerToTry))
7043                    {
7044                    final long startFetch = System.currentTimeMillis();
7045                    boolean successful = false;
7046                    try
7047                        {
7048                        metaData.exhibitRead(rwl,
7049                                             peerToTry,
7050                                             cacheDir, esa.getExhibitFullName(), source,
7051                                             aeid,
7052                                             gp,
7053                                             (int) start,
7054                                             buf,
7055                                             null, // Cache locally but not upstream...
7056                                             logger,
7057                                             statsIDSCGEN);
7058    
7059                        // Got the data OK!
7060                        successful = true;
7061    
7062                        // Don't need to go to the master!
7063                        return;
7064                        }
7065                    catch(final IOException e)
7066                        {
7067                        logger.log("INFO: data block fetch from peer for exhibit "+esa+" failed with IOException: " + peerToTry + ": " + e.getMessage());
7068                        /* Fall through to get data from master. */
7069                        }
7070                    finally
7071                        {
7072                        final long endFetch = System.currentTimeMillis();
7073                        _updatePeerStats(peerToTry,
7074                                         successful,
7075                                         (endFetch - startFetch));
7076    
7077                        // Remove some stats logging from the timing...
7078                        if(successful)
7079                            {
7080                            // Note successful fetch/cache of data block.
7081                            StatsLogger.captureDataPoint(statsIDSCGEN, SCGPREF_PRECACHEEXDATABLOCKSRC + peerToTry);
7082                            logger.log("INFO: successful data block fetch from peer: "+peerToTry+"; stats: " + (new ArrayList<Map.Entry<String,Long>>(altDataSourceRating.entrySet())));
7083                            }
7084                        else
7085                            {
7086                            // Note unsuccessful fetch/cache of data block.
7087                            final String ev = SCGPREF_PRECACHEEXDATABLOCKSRCERR + peerToTry;
7088                            StatsLogger.captureDataPoint(statsIDSCGEN, ev);
7089                            logger.log("INFO: FAILED data block fetch from peer "+peerToTry+"; stats: " + (new ArrayList<Map.Entry<String,Long>>(altDataSourceRating.entrySet())));
7090                            }
7091                        }
7092                    }
7093                }
7094    
7095            // Fall through to use the master/upstream by default.
7096    
7097            // Get the data from the master...
7098            // Note stats...
7099            final long startFetchFromMaster = System.currentTimeMillis();
7100            boolean fetchFromMasterSuccessful = false;
7101            try
7102                {
7103                metaData.exhibitRead(rwl,
7104                                     null, // Fetch from upstream/master.
7105                                     cacheDir, esa.getExhibitFullName(), source,
7106                                     aeid,
7107                                     gp,
7108                                     (int) start,
7109                                     buf,
7110                                     null, // Cache locally but not upstream...
7111                                     logger,
7112                                     statsIDSCGEN);
7113                fetchFromMasterSuccessful = true;
7114                }
7115            finally
7116                {
7117    //            // Possibly note/update stats for large fetches only.
7118    //            final boolean largeFetch = (len >= CoreConsts.BULK_DATA_TRANSFER_SIZE);
7119    //            if(largeFetch)
7120                    {
7121                    final long endFetchFromMaster = System.currentTimeMillis();
7122                    _updatePeerStats(MASTER_FAKE_TAG,
7123                                     fetchFromMasterSuccessful,
7124                                     (endFetchFromMaster - startFetchFromMaster));
7125    
7126                    // Remove some stats logging from the timing...
7127                    if(fetchFromMasterSuccessful)
7128                        {
7129                        // Note successful fetch/cache of data block.
7130                        StatsLogger.captureDataPoint(statsIDSCGEN, SCGPREF_PRECACHEEXDATABLOCKSRC + "master");
7131                        }
7132                    else
7133                        {
7134                        // Note unsuccessful fetch/cache of data block.
7135                        final String ev = SCGPREF_PRECACHEEXDATABLOCKSRCERR + "master";
7136                        StatsLogger.captureDataPoint(statsIDSCGEN, ev);
7137    
7138                        logger.log("INFO: data block fetch from peer stats: " + (new ArrayList<Map.Entry<String,Long>>(altDataSourceRating.entrySet())));
7139                        }
7140                    }
7141                }
7142            }
7143    
7144        /**Fraction of the time to pick a peer completely at random; strictly positive. */
7145        private final int P2P_RND_FRAC = (PEER_SELECTION_CAUTIOUS ? 61 : 37) +
7146                                        Rnd.fastRnd.nextInt(11);
7147    
7148        /**Fraction of the time to choose a 2nd-tier peer rather than the best peer; strictly positive. */
7149        private final int P2P_NEXT_BEST_FRAC = 3 + Rnd.fastRnd.nextInt(3);
7150    
7151        /**Factor/multiplier of peers worse than top that will be considered for fetches routinely; strictly positive. */
7152        private static final int NEARLY_TOP_FACTOR = 2;
7153    
7154        /**If true, avoid use of master where peers are available. */
7155        private static final boolean PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE = true;
7156    
7157        /**Pick one of the supplied peers to attempt to fetch exhibit data from; never null.
7158         * Usually this returns the "best" (fastest) peer,
7159         * but sometimes this will return an apparently sub-optimal peer so as to:
7160         * <ul>
7161         * <li>to test the waters (ie to keep sampling all peers occasionally), and
7162         * <li>to spread load around a little, and
7163         * <li>to rehabilitate once-poor peers (eg bad peers can become good again).
7164         * </ul>
7165         * In particular we are allowing for peer/server load
7166         * and inter-peer network conditions to change continually.
7167         * <p>
7168         * Occasionally this will purge the cached peer stats of anything
7169         * not in the argument set, with constant amortised cost per call.
7170         *
7171         * @param activeMirrors  set of mirror tags
7172         *     (and possible MASTER_FAKE_TAG ("") for the master);
7173         *     never null, never empty
7174         * @return selected peer, MASTER_FAKE_TAG for master; never null
7175         */
7176        private String _pickPeer(final Set<String> activeMirrors)
7177            {
7178            assert(activeMirrors != null);
7179            assert(!activeMirrors.isEmpty());
7180    
7181            // A small fraction of the time pick a peer completely at random
7182            // (providing it is the master or it seems at least to be "up").
7183            // This allows us to try new peers and those currently out of favour,
7184            // and spreads traffic load a little.
7185            // This "hot potato" selection method is also fairly quick.
7186            if(Rnd.fastRnd.nextInt(P2P_RND_FRAC) == 0)
7187                {
7188                final String putativePeer = (new ArrayList<String>(activeMirrors)).get(
7189                            Rnd.goodRnd.nextInt(activeMirrors.size()));
7190                if((MASTER_FAKE_TAG.equals(putativePeer) ||
7191                    (Boolean.TRUE == LoadBalancingUtils.testIfHTTPServerIsUp(HostUtils.makeMirrorNameGeneric(putativePeer), true))))
7192                                { return(putativePeer); }
7193                }
7194    
7195            // Unless being cautious in peer use/selection,
7196            // if there are one or more untested peers
7197            // then shuffle them and pick the first one that is up (or is the master).
7198            if(!PEER_SELECTION_CAUTIOUS)
7199                {
7200                final List<String> untested = new ArrayList<String>(activeMirrors);
7201                untested.removeAll(altDataSourceRating.keySet());
7202                if(!untested.isEmpty())
7203                    {
7204                    Collections.shuffle(untested, Rnd.fastRnd);
7205                    for(final String peer : untested)
7206                        {
7207                        if(MASTER_FAKE_TAG.equals(peer) ||
7208                           (Boolean.TRUE == LoadBalancingUtils.testIfHTTPServerIsUp(HostUtils.makeMirrorNameGeneric(peer), true)))
7209                            { return(peer); }
7210                        }
7211                    }
7212                }
7213    
7214            // Usually pick a peer in relation to its weighting,
7215            // ie pick "better" peers (with lower service times) more often.
7216    
7217            // Select the peer with the lowest score here.
7218            long bestScore = Long.MAX_VALUE;
7219            String selectedPeer = MASTER_FAKE_TAG; // Default to the master.
7220            for(final String peer : activeMirrors)
7221                {
7222                final Long score = altDataSourceRating.get(peer);
7223    
7224                // Treat unknown peers as having a high "unknown" fallback time, a la XNTPD.
7225                // Possibly mark the master so as to prefer peers and reduce strain on the master.
7226                final long sl = (score == null) ? PEER_STATS_UNKNOWN_MS :
7227                    ((PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE && MASTER_FAKE_TAG.equals(peer)) ?
7228                            (score.longValue() * 2*NEARLY_TOP_FACTOR) : score.longValue());
7229                if(sl < bestScore)
7230                    {
7231                    bestScore = sl;
7232                    selectedPeer = peer;
7233                    }
7234                }
7235    
7236            // Some of the time, when scores are sufficiently good,
7237            // select randomly any peer with a score close enough to the best.
7238            // This helps spread the load amongst, and keep an eye on,
7239            // the top few candidates.
7240            //
7241            // We deliberately exclude any peer whose status is unknown (or worse).
7242            if((bestScore < PEER_STATS_UNKNOWN_MS / (2*NEARLY_TOP_FACTOR)) &&
7243                (Rnd.fastRnd.nextInt(P2P_NEXT_BEST_FRAC) == 0))
7244                {
7245                final List<String> candidatePeers = new ArrayList<String>(1 + activeMirrors.size()/2);
7246                for(final String peer : activeMirrors)
7247                    {
7248                    final Long score = altDataSourceRating.get(peer);
7249    
7250                    // Skip unknown peers altogether...
7251                    if(score == null) { continue; }
7252    
7253                    // Possibly mark the master down to prefer peers.
7254                    final long sl = ((PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE && MASTER_FAKE_TAG.equals(peer)) ? (score.longValue() * 2*NEARLY_TOP_FACTOR) :
7255                        score.longValue());
7256                    if(sl < NEARLY_TOP_FACTOR*bestScore) // Consider peer if relatively good...
7257                        { candidatePeers.add(peer); }
7258                    }
7259                // Override best peer if we have a real choice...
7260                // Use goodRnd to make as independent a selection as possible.
7261                final int nCands = candidatePeers.size();
7262                if(nCands > 1)
7263                    { selectedPeer = candidatePeers.get(Rnd.goodRnd.nextInt(nCands)); }
7264                }
7265    
7266            // Possibly take this opportunity to clear out stale data,
7267            // amortising the mean cost-per-call to be O(1) ie independent of peer-set size.
7268            if(0 == Rnd.fastRnd.nextInt(altDataSourceRating.size() + 2))
7269                {
7270                // Atomically remove any entries for peers not in the argument set.
7271                // The master tag is never removed however.
7272                synchronized(altDataSourceRating)
7273                    {
7274                    for(final Iterator<String> it = altDataSourceRating.keySet().iterator(); it.hasNext(); )
7275                        {
7276                        final String peer = it.next();
7277                        if(MASTER_FAKE_TAG.equals(peer))
7278                            { continue; }
7279                        if(!activeMirrors.contains(peer))
7280                            { it.remove(); }
7281                        }
7282                    }
7283                }
7284    
7285            // Return the selected peer...
7286            return(selectedPeer);
7287            }
7288    
7289        /**Pick a peer to attempt to fetch exhibit data from; never null.
7290         * Usually this returns the "best" peer;
7291         * sometimes this will return an apparently sub-optimal peer so as to:
7292         * <ul>
7293         * <li>to test the waters (ie keep sampling all peers occasionally),
7294         * <li>spread load around a little, and
7295         * <li>rehabilitate once-poor peers (eg bad guys may come good).
7296         * </ul>
7297         * In particular we are allowing for peer/server load
7298         * and inter-peer network conditions to change continually.
7299         * <p>
7300         * Occasionally this will purge the cached peer stats of anything
7301         * not in the argument set.
7302         * <p>
7303         * The master should by preference avoid fetching data from a peer
7304         * to avoid contaminating the master copy with bad data from any peer.
7305         * However, if it has a mirror tag, then it may fetch data P2P.
7306         *
7307         * @return selected peer, MASTER_FAKE_TAG for master (ie no peer); never null
7308         */
7309        private String _pickPeer()
7310            {
7311            final String mirrorTag = LocalProps.getMirrorTag();
7312            // This host has to be a mirror itself to be allowed to use P2P...
7313            if(mirrorTag != null)
7314                {
7315                // Get candidate mirrors...
7316                // We use whatever (even stale) entries that we can find
7317                // in order to allow P2P to work longer after
7318                // losing contact with the master
7319                // (when it may prove to be especially useful).
7320                final Set<String> activeMirrors = new HashSet<String>(
7321                    LoadBalancingUtils.getActiveMirrors(varMgr, false).keySet());
7322                // Remove our/this instance/mirror as a candidate...
7323                activeMirrors.remove(mirrorTag);
7324    if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._pickPeer(): potential peer count for P2P: "+activeMirrors.size()+".]"); }
7325    
7326                // Fall back to using the master as usual if no other candidates...
7327                if(activeMirrors.size() > 0)
7328                    {
7329    if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._pickPeer(): potential peers for P2P: "+(new ArrayList<String>(activeMirrors))+".]"); }
7330    
7331                    // Add the master as a synthetic candidiate unless we always try to favour peers.
7332                    if(!PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE)
7333                        { activeMirrors.add(MASTER_FAKE_TAG); }
7334    
7335                    // Choose which peer to contact...
7336                    final String peerToTry = _pickPeer(activeMirrors);
7337    if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._pickPeer(): peer to try for P2P: "+peerToTry+".]"); }
7338    
7339                    // Return the selected mirror (which may be MASTER_FAKE_TAG).
7340                    return(peerToTry);
7341                    }
7342                }
7343    
7344            return(MASTER_FAKE_TAG); // Fake tag for the master as there were no peers available...
7345            }
7346    
7347        /**Update data-transfer stats for the given peer.
7348         * A failed fetch is treated as a very slow access,
7349         * so that a failing peer compares numerically unfavourably with reliable peers.
7350         */
7351        private void _updatePeerStats(final String peer,
7352                                      final boolean fetchSuccessful,
7353                                      final long timeTaken)
7354            {
7355            assert(peer != null);
7356    
7357            // Treat a fail as worse numerically than "unknown"
7358            // and as worse than the actual time taken
7359            // so that consistent failures are liked less than unknown/slow peers.
7360            final long l = fetchSuccessful ? timeTaken :
7361                (1 + (timeTaken + PEER_STATS_UNKNOWN_MS));
7362    
7363            // Atomic wrt the cache...
7364            synchronized(altDataSourceRating)
7365                {
7366                // If there is no entry for this peer then create one now.
7367                Long t = altDataSourceRating.get(peer);
7368                if(t == null)
7369                    {
7370                    if(PEER_SELECTION_CAUTIOUS)
7371                        {
7372                        // Start with a (very) cautious estimate of performance...
7373                        t = new Long(PEER_STATS_UNKNOWN_MS);
7374                        }
7375                    else
7376                        {
7377                        // Start with the value/time that we see for this new peer.
7378                        t = new Long(l);
7379                        }
7380                    altDataSourceRating.put(peer, t);
7381                    }
7382    
7383                // Compute new value for the cache.
7384                // Back off fast in case of failure.
7385                final int timeConst = fetchSuccessful ? PEER_STATS_TC :
7386                                        (Math.max(1, PEER_STATS_TC/2));
7387                final Long v = new Long(Math.max(1, ((t.longValue() * (timeConst-1)) + l + (timeConst/2)) / timeConst));
7388                altDataSourceRating.put(peer, v);
7389    
7390                // Record some stats...
7391    if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._updatePeerStats(\""+peer+"\", "+fetchSuccessful+", "+timeTaken+") new value = "+v+"...]"); }
7392                }
7393    
7394            if(fetchSuccessful)
7395                {
7396                StatsLogger.captureDataPoint(statsIDSCGEN, SCGPREF_PRECACHEEXDATABLOCKFETCHTIME + GenUtils.log2Approx(Math.max(0, timeTaken)));
7397                }
7398    
7399            // Should we be recording performance/usage stats centrally?
7400            final boolean recordStats = "true".equals(genProps.getGen().get(KEY_debugFlag_P2P_BLOCKXFER));
7401            // If so, do it now.
7402            if(recordStats)
7403                {
7404                final StringBuilder sb = new StringBuilder(32);
7405                sb.append("P2P:bx:");
7406                sb.append(LocalProps.getMirrorTag());
7407                sb.append(':');
7408                sb.append("".equals(peer) ? "M" : peer);
7409                sb.append('=');
7410                if(fetchSuccessful)
7411                    {
7412                    // Note time taken for successful transfer.
7413                    sb.append(GenUtils.log2Approx(timeTaken));
7414                    }
7415                else
7416                    {
7417                    // Note failure.
7418                    sb.append("FAIL");
7419                    }
7420    
7421                try
7422                    {
7423                    varMgr.setVariable(new SimpleVariableValue(
7424                        SystemVariables.PERFMON_STRING_GLOBAL_EVENT,
7425                        sb.toString()));
7426                    }
7427                catch(final IOException e)
7428                    {
7429                    e.printStackTrace(); // Whinge but absorb error...
7430                    }
7431                }
7432            }
7433    
7434        /**In the top-N (global) downloads recently, with at least 2 downloads.
7435         * The "more than one download" filter is to trim off a noisy "long tail"
7436         * (eg on a quiet day).
7437         * <p>
7438         * This uses a combination of hits from the previous and current days.
7439         *
7440         * @param esa  the exhibit details; never null
7441         * @return true if this exhibit is a "popular" download.
7442         */
7443        private boolean isPopularDownload(final ExhibitStaticAttr esa)
7444            throws IOException
7445            {
7446            final EventVariableValue downloadsYesterday = varMgr.getEventValue(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD, EventPeriod.VLONG, false);
7447            final EventVariableValue downloadsToday;
7448            final String shortName = esa.getExhibitFullName().getShortName().toString();
7449            return(((downloadsYesterday.getRank(shortName) < 100) &&
7450                    (downloadsYesterday.getCount(shortName) > 1)) ||
7451                   // Note that fetching current period possibly incurs more cost.
7452                   (((downloadsToday = varMgr.getEventValue(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD, EventPeriod.VLONG, true)).getRank(shortName) < Integer.MAX_VALUE) &&
7453                    (downloadsToday.getCount(shortName) > 1)));
7454            }
7455    
7456        /**Precache lock to prevent multi-threaded precache runs. */
7457        private final ReentrantLock _preCacheLock = new ReentrantLock();
7458    
7459        /**Time before which we will not do more precacheing.
7460         * Private to _doPreCache().
7461         * <p>
7462         * Initial value of 0 allows precaching to start immediately.
7463         */
7464        private volatile transient long _noMorePrecacheUntil;
7465    
7466        /**Set true when a user requests data from the cache.
7467         * This is as a result of user activity, and without this
7468         * being true we still won't indulge in any precacheing.
7469         * This means that we can safely have more than one context
7470         * set up (for example), in a servlet runner as long as
7471         * only one is ever actually used.
7472         * <p>
7473         * Accessed without locking; read by _doPreCache()
7474         * and may be set by a routine that is sure that it has received
7475         * a user request to fetch exhibit/thumbnail data.
7476         * <p>
7477         * We can reset this if we believe we have finished precaching
7478         * (or at least a reasonable chunk of precacheing work)
7479         * for the current exhibit set.
7480         */
7481        private transient volatile boolean _userRequestedDataFromCache = LocalProps.fastStartMode();
7482    
7483        /**An iterator over a snapshot of all exhibit names.
7484         * This is initially null, and when null or when exhausted
7485         * it is reset to be a new snapshot of the exhibit names.
7486         * This avoids starvation of some exhibits.
7487         * <p>
7488         * We access this only from _doPreCache() which is single-threaded,
7489         * so this need not be thread safe.
7490         * <p>
7491         * We may order the iteration in some way as to try to
7492         * precache as efficiently as possible, eg smallest or
7493         * `best' first, or we might store the exhibits in, for
7494         * example, a shuffled order.
7495         * <p>
7496         * When we get a name from this iterator we must make sure
7497         * that it still represents a valid exhibit, since the exhibit
7498         * might have been deleted, for example.
7499         * <p>
7500         * Accessed only by _doPreCache().
7501         */
7502        private transient Iterator<Name.ExhibitFull> _precacheIterator;
7503    
7504        /**Indicator for which image set we are working on.
7505         * When we start a new round of precaching we set this
7506         * to the hash of the current exhibit set.
7507         * <p>
7508         * If we come across an exhibit that we do some precaching work on,
7509         * we set this to null.
7510         * <p>
7511         * When we are about to start a new round of precaching
7512         * and discover this is set to the hash of the current exhibit set,
7513         * we assume that there was no work to be done and we skip precaching.
7514         * When the exhibit set changes we will then resume.
7515         * <p>
7516         * Accessed under the rwl by _doPreCache().
7517         */
7518        private transient Long _precacheExhibitHash;
7519    
7520        /**Maximum time that _doPreCache() can spend in one go (ms).
7521         * Designed to be short enough to avoid causing massively irritating
7522         * interruptions to user interactivity if we lock other activity
7523         * out for this long,
7524         * though long enough to be relatively efficient if possible.
7525         * <p>
7526         * Precacheing will not generally interfere with interactive operations
7527         * so we try to make this time large enough
7528         * to allow the fetch of a block or three of exhibit data over a slow Net link,
7529         * allowing for RTT and connection setup and bandwidth, etc.
7530         * <p>
7531         * Something of the order of a few seconds may be good.
7532         * <p>
7533         * We radically reduce this for CPU-sensitive (eg cloud) environments.
7534         */
7535        private final int MAX_dPC_SPIN_TIME_MS = LocalProps.isCloudMirrorInstance() ? 113 : 3001;
7536    
7537        /**Max time _doPreCache() has to sleep for (ms).
7538         * This is basically if some freak event happens beyond
7539         * _doPreCache()'s reasonable control.
7540         * <p>
7541         * A few minutes is probably reasonable.
7542         */
7543        private final int MAX_dPC_BACKOFF_TIME = 4 * 60 * 1000 +
7544            Rnd.fastRnd.nextInt(1 * 60 * 1000);
7545    
7546        /**Our stratum cached; never null though may be UNKNOWN.
7547         * We may examine the low-power flag to decide to reduce upstream access.
7548         * <p>
7549         * Is marked volatile for thread-safe lock-free access.
7550         * <p>
7551         * Updates piggybacked on variable set/fetch work.
7552         */
7553        private volatile Stratum upstreamStratum = Stratum.UNKNOWN;
7554    
7555        /**Return cached stratum; never null.
7556         * Never throws an exception.
7557         */
7558        public Stratum getStratum()
7559            { return(upstreamStratum); }
7560    
7561    
7562    
7563        /**Set true once destroy() is called; never set false again. */
7564        private volatile boolean destroyed;
7565    
7566        /**Shut down the data pipeline.
7567         * Flush state, variables and logs upstream and to disc as appropriate,
7568         * and then make sure that upstream of us is destroyed.
7569         */
7570        public void destroy()
7571            {
7572            // Quickly discourage precaching and free some memory...
7573            _aggressive = false;
7574            _userRequestedDataFromCache = false;
7575            _thumbnailsInMemory.clear();
7576    
7577            // Stop the thread pool.
7578            try { discardableReadAheadTaskThreadPool.shutdownNow(); }
7579            finally
7580                {
7581                // Try to save all useful persistable state that we hold.
7582                try
7583                    {
7584                    try { _cleanAndSaveMetaData(true); }
7585                    finally { _handleSysVars(true); }
7586                    }
7587                catch(final IOException e) { e.printStackTrace(); }
7588                // And ensure that the upstream source is destroy()ed too.
7589                finally { source.destroy(); }
7590                }
7591    
7592            // Mark this instance as doomed once we've completed any saves.
7593            destroyed = true;
7594    
7595            // Check for background threads...
7596            if(_gAEP_lock.isLocked())
7597                { System.err.println("WARNING: AEP fetch thread still running..."); }
7598            // Grab read lock on cache to prevent further changes.
7599            try { _getReadLock(rwl, "shutting down cache in destroy()", logger); }
7600            catch(final InterruptedIOException e) { e.printStackTrace(); }
7601    
7602            // Prevent further updates to metadata
7603            // and thus most cache updates.
7604            metaData.setReadWrite(false);
7605            }
7606        }