001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore.datasource;
030
031 import java.io.DataInputStream;
032 import java.io.File;
033 import java.io.FileFilter;
034 import java.io.FileInputStream;
035 import java.io.FileNotFoundException;
036 import java.io.FileWriter;
037 import java.io.IOException;
038 import java.io.InterruptedIOException;
039 import java.io.InvalidObjectException;
040 import java.io.ObjectInputStream;
041 import java.io.ObjectInputValidation;
042 import java.io.ObjectOutputStream;
043 import java.io.PrintWriter;
044 import java.io.RandomAccessFile;
045 import java.io.Serializable;
046 import java.nio.ByteBuffer;
047 import java.security.MessageDigest;
048 import java.security.NoSuchAlgorithmException;
049 import java.util.ArrayList;
050 import java.util.Arrays;
051 import java.util.BitSet;
052 import java.util.Collections;
053 import java.util.Comparator;
054 import java.util.Date;
055 import java.util.HashMap;
056 import java.util.HashSet;
057 import java.util.Hashtable;
058 import java.util.Iterator;
059 import java.util.List;
060 import java.util.Map;
061 import java.util.Properties;
062 import java.util.Set;
063 import java.util.SortedSet;
064 import java.util.TreeSet;
065 import java.util.concurrent.ArrayBlockingQueue;
066 import java.util.concurrent.ThreadPoolExecutor;
067 import java.util.concurrent.TimeUnit;
068 import java.util.concurrent.locks.ReentrantLock;
069 import java.util.concurrent.locks.ReentrantReadWriteLock;
070
071 import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
072 import org.hd.d.pg2k.svrCore.AccessionData;
073 import org.hd.d.pg2k.svrCore.AllExhibitImmutableData;
074 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
075 import org.hd.d.pg2k.svrCore.CoreConsts;
076 import org.hd.d.pg2k.svrCore.ExhibitFile;
077 import org.hd.d.pg2k.svrCore.ExhibitName;
078 import org.hd.d.pg2k.svrCore.ExhibitPropsComputableMutable;
079 import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
080 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
081 import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
082 import org.hd.d.pg2k.svrCore.FileTools;
083 import org.hd.d.pg2k.svrCore.GenUtils;
084 import org.hd.d.pg2k.svrCore.HostUtils;
085 import org.hd.d.pg2k.svrCore.ImageUtils;
086 import org.hd.d.pg2k.svrCore.MemoryTools;
087 import org.hd.d.pg2k.svrCore.MemoryTools.RecurrentEmergencyFreeHandle;
088 import org.hd.d.pg2k.svrCore.MemoryTools.SoftReferenceMap;
089 import org.hd.d.pg2k.svrCore.Name;
090 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
091 import org.hd.d.pg2k.svrCore.PGMasterNotInServiceException;
092 import org.hd.d.pg2k.svrCore.ROByteArray;
093 import org.hd.d.pg2k.svrCore.Rnd;
094 import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
095 import org.hd.d.pg2k.svrCore.Stratum;
096 import org.hd.d.pg2k.svrCore.TextUtils;
097 import org.hd.d.pg2k.svrCore.ThreadUtils;
098 import org.hd.d.pg2k.svrCore.Tuple;
099 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
100 import org.hd.d.pg2k.svrCore.location.LoadBalancingUtils;
101 import org.hd.d.pg2k.svrCore.props.GenProps;
102 import org.hd.d.pg2k.svrCore.props.LocalProps;
103 import org.hd.d.pg2k.svrCore.props.SecurityProps;
104 import org.hd.d.pg2k.svrCore.stats.StatsLogger;
105 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
106 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
107 import org.hd.d.pg2k.svrCore.vars.PipelineVarMgr;
108 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
109 import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
110 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
111
112 import ORG.hd.d.IsDebug;
113
114 /**Exhibit pipeline cache stage.
115 * This performs transparent persistent cacheing of exhibit data and variables.
116 * <p>
117 * The presence of an instance of this stage upstream of a tunnel or
118 * other potentially slow/expensive/unreliable data source should,
119 * for normal data access (eg sequential download of exhibits)
120 * significantly reduce upstream bandwidth requirements
121 * and reduce downstream latency by answering requests from local cache.
122 * <p>
123 * This also attempts to shelter its downstream callers/users from I/O errors
124 * upstream, by fulfilling requests from cache,
125 * or, when synchronous calls upstream have to be made,
126 * transforming some requests and replies into async forms where possible.
127 * <p>
128 * When caching exhibit data this class only does so as a continuous prefix
129 * from offset zero; other (random) accesses may have to read-through the cache.
130 * <p>
131 * This cache is also able to precache data likely to be valuable,
132 * such as thumbnails and the initial portions of exhibits,
133 * though this will only be attempted if the cache appears to be in use.
134 * Bandwidth/resource consumption used by precacheing are regulated.
135 * This cache regards thumbnails and meta-data as precious,
136 * and tries not to let them go once collected and cached
137 * because reasonable application performance will often depend on
138 * fast access to these data.
139 * <p>
140 * This attempts to cache data well enough that, especially if aggressive
141 * (pre)cacheing is available and the cache area is large enough,
142 * almost no reference should be needed to the backend server in response to
143 * a data request on the pipeline except in response to the tail end
144 * of very long exhibits; all requests are answered from the local
145 * cache where possible. Ideally, it should be possible for the back-end
146 * server to go down altogether and have the front end still provide a
147 * high degree of functionality. The front-end and back-end are almost
148 * completely decoupled in this cache design.
149 * <p>
150 * This includes limited in-memory cacheing, in many cases using
151 * soft references to allow peaceful coexistence with other (potentially
152 * heavy) users of memory.
153 * <p>
154 * This class relies mainly on the asynchronous calls to poll() to fetch
155 * meta-data updates such as GenProps and AllExhibitImmutableData. These
156 * happen under a private lock and do not block cache access much or at all.
157 * <p>
158 * The full lock ordering (where multiple locks need to be taken at once) is:
159 * <ol>
160 * <li> ExhibitDataSimpleCache.rwl
161 * <li> ExhibitDataSimpleCache.metaData
162 * </ol>
163 * <p>
164 * This cache may serialise access to raw exhibit data (and may serialise
165 * accesses to back-end resources too). No two live instances of this
166 * class should refer to the same cache directory at once
167 * else madness and corruption will almost certainly break out.
168 * <p>
169 * On disc, the files are some prefix of the full exhibit, retrieved if possible
170 * in MAX_TRANSFER_CHUNK_SIZE chunks. They are touched every time
171 * accessed or updated and the timestamps can therefore be used as
172 * the basis of an LRU cache. We expect almost all access to be
173 * sequential, starting at the beginning.
174 * <p>
175 * We satisfy requests wholly within already-cached data immediately,
176 * and will extend (up to the limit) by up to one
177 * chunk each time by downloading from the server to satisfy
178 * requests just beyond the current end. Requests starting well beyond
179 * the current end of cache are punted directly to the server,
180 * which is rather ugly and slow, but there we go.
181 * <p>
182 * This cache only considers general precacheing until the low-water
183 * mark is reached, and by default only deletes existing entries if it has to
184 * in order to satisfy an incoming cacheable request.
185 * This means that stale entries for deleted/renamed exhibits
186 * may persist for a while, but this is mainly harmless.
187 * <p>
188 * This maintains a bidirectional cache of variable values and updates,
189 * and also some running parameters of the cache may be read as variables.
190 * <p>
191 * This cache supports a limited amount of peer-to-peer (P2P) data transfers
192 * to reduce load on the master. The general policy is that any
193 * synchronous (and thus presumably time-sensitive) data request from an end user
194 * that cannot be satisfied from local cache is satisfied upstream from the master.
195 * Asynchronous data fetches, such as read-ahead and precache activity,
196 * can be fetched P2P. Also if the master fails or is unavailable
197 * then it may be acceptable to use P2P if loops/cycles can be avoided.
198 * <p>
199 * TODO: re-analyse/reduce locking and possibly avoid locks on the metaData object.
200 */
201 public final class ExhibitDataSimpleCache implements SimpleExhibitPipelineIF
202 {
203 /**If true then trace/log all thumbnail activity; defaults to false/off.
204 * Usually only true while debugging/tuning.
205 * <p>
206 * Can be activated from the command-line.
207 */
208 private static final boolean TRACE_THUMBNAIL_ACTIVITY_ALL;
209
210 /**If true then trace/log interesting/unusual thumbnail activity; defaults to false/off.
211 * Usually only true while debugging/tuning.
212 * <p>
213 * Generally shows rare events such as generation of thumbnails
214 * and request to save NO_THUMBNAILS values
215 * and other non-routine activity.
216 * <p>
217 * Forced to true if TRACE_THUMBNAIL_ACTIVITY_ALL is true.
218 * <p>
219 * Can be activated from the command-line.
220 */
221 private static final boolean TRACE_THUMBNAIL_ACTIVITY;
222
223 static
224 {
225 boolean trace = false;
226 boolean traceAll = false;
227 try
228 {
229 traceAll = Boolean.getBoolean("org.hd.d.pg2k.svrCore.datasource.ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY_ALL");
230 trace = traceAll || Boolean.getBoolean("org.hd.d.pg2k.svrCore.datasource.ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY");
231 }
232 catch(final Exception e) { e.printStackTrace(); }
233 if(traceAll) { System.out.println("INFO: ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY_ALL enabled"); }
234 if(trace) { System.out.println("INFO: ExhibitDataSimpleCache.TRACE_THUMBNAIL_ACTIVITY enabled"); }
235 TRACE_THUMBNAIL_ACTIVITY = trace;
236 TRACE_THUMBNAIL_ACTIVITY_ALL = traceAll;
237 }
238
239 /**If true then trace/log P2P activity; defaults to false/off.
240 * Usually only true while debugging/tuning.
241 * <p>
242 * Can be activated from the command-line.
243 */
244 private static final boolean TRACE_P2P_ACTIVITY;
245
246 static
247 {
248 boolean trace = false;
249 try { trace = Boolean.getBoolean("org.hd.d.pg2k.svrCore.datasource.ExhibitDataSimpleCache.TRACE_P2P_ACTIVITY"); }
250 catch(final Exception e) { e.printStackTrace(); }
251 if(trace) { System.out.println("INFO: ExhibitDataSimpleCache.TRACE_P2P_ACTIVITY enabled"); }
252 TRACE_P2P_ACTIVITY = trace;
253 }
254
255
256
257 /**If true then we may purge cached data for exhibits that appear to have been deleted (or renamed).
258 * These are exhibits that have been "orphaned" in the cache,
259 * ie they are not accessible because they do not logically exist
260 * (though they do potentially serve as a backup in case of disaster).
261 * <p>
262 * We will generally only do this if:
263 * <ul>
264 * <li>the exhibit data has not been used for a long time, and
265 * <li>if cache size is above the low-water mark that would prevent us
266 * from doing useful precacheing.
267 * </ul>
268 * <p>
269 * We do not rush to delete exhibits' data in case a transient problem
270 * has made an exhibit disappear temporarily.
271 * (Data for deleted exhibits will in any case eventually be deleted LRU
272 * (Least-Recently-Used) if the cache becomes full.)
273 */
274 private static final boolean ORPHANED_EXHIBIT_EXPIRY_ALLOWED = true;
275
276 /**The minimum time before we will preemptively purge orphaned cache entries (ms); strictly positive.
277 * We don't expect to delete or rename exhibits very often,
278 * and the only harm in NOT purging them may be to prevent precacheing of new exhibits,
279 * ie a minor performance issue rather than a correctness issue.
280 * <p>
281 * Add a random component so that all clients do not purge orphans at once!
282 * <p>
283 * A value of the order of a few days to a few months is probably reasonable.
284 */
285 private static final long ORPHANED_EXHIBIT_MIN_UNUSED_TIME_MS = 14 * 24 * 3600 * 1000L + // A fortnight.
286 Rnd.fastRnd.nextInt(4 * 24 * 3600 * 1000);
287
288 /**Base dir within cache dir for all our exhibit data. */
289 private static final String CACHE_BASE_DIR = "_exhibitsCache";
290
291 /**Name of file in which to persist immutable exhibit data. */
292 private static final String CACHE_EXPROPS_FILENAME = CACHE_BASE_DIR + "/_ExProps.dat";
293
294 /**Name of file in which to persist cache meta data. */
295 private static final String CACHE_METADATA_FILENAME = CACHE_BASE_DIR + "/_metadata.dat";
296
297 /**If true, store exprops data (and cache metadata) GZIPed to possibly save space and I/O time. */
298 private static final boolean STORE_EXPROPS_GZIPED = true;
299
300 /**Base dir within cache dir for all our raw exhibit content data. */
301 private static final String CACHE_EXDATA_DIR = CACHE_BASE_DIR + "/exd";
302
303 /**Base dir within cache dir for all our event history data. */
304 private static final String EVENT_HISTORY_DIR = CACHE_BASE_DIR + "/history";
305
306 /**The prefix for all aux files associated with an exhibit file. */
307 private static final String CACHE_EXAUX_PREFIX = ".aux.";
308
309 /**The keyword for the file containing the (decimal) exhibit timestamp. */
310 private static final String CACHE_EXAUX_TIMESTAMP_KW = "ts";
311
312 /**The keyword for the file containing the serialised thumbnails object. */
313 private static final String CACHE_EXAUX_TN_KW = "tn";
314
315 /**Maximum (normal) exhibit data transfer chunk size (bytes); strictly positive.
316 * Maximum chunk transferred in one call (bytes),
317 * to avoid creating huge gaps in other activity by
318 * jamming up transactions and/or hogging all I/O bandwidth.
319 * <p>
320 * Should probably be of the order of a few tens of kBytes
321 * to allow efficient transfers on the wire, and a power of
322 * two to be more likely to interact efficiently with other
323 * caches (and network protocols).
324 * <p>
325 * If we use (a small multiple of) the bulk data transfer chunk size
326 * this will be reasonably efficient in terms of disc/network traffic,
327 * and if we can also keep it aligned to whole chunk boundaries
328 * then we may get maximally efficient accesses into disc (etc) data.
329 */
330 private static final int MAX_TRANSFER_CHUNK_SIZE = Math.max(1<<16,
331 2 * CoreConsts.BULK_DATA_TRANSFER_SIZE);
332
333 /**Maximum extended exhibit data transfer chunk size (bytes); strictly positive.
334 * When asked for data but we have a small gap in our cache before the request start
335 * normally we would have to pass the result upstream and not cache any.
336 * This is potentially very wasteful in P2P sharing of new exhibits.
337 * <p>
338 * So in this case we may allow the upstream request window to be moved back
339 * to patch up the hole from the end of our cached data to the start of the request,
340 * thus allowing us to capture the result.
341 * <p>
342 * This value should be a small multiple of the MAX_TRANSFER_CHUNK_SIZE.
343 */
344 private static final int MAX_EXTD_TRANSFER_CHUNK_SIZE = MAX_TRANSFER_CHUNK_SIZE << 2;
345
346 /**The maximum number of bytes we will force a transfer of to force an immediate thumbnail generation.
347 * This might as well be at least one block,
348 * and might as well be a little bigger than we anticipate the
349 * resulting thumbnail size to be (limited to),
350 * but no larger than the maximum single transfer that we can make.
351 */
352 private static final int MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL =
353 Math.min(MAX_USER_READ_SIZE,
354 Math.max(MAX_TRANSFER_CHUNK_SIZE,
355 ExhibitThumbnails.MAX_BYTES_EST));
356
357 /**Minimum cache size to allow if GenProps is not set (bytes).
358 * This prevents thrashing on an empty cache and should be enough
359 * to store a few thumbnails and a few reasonable image chunks.
360 */
361 private static final int FALLBACK_MIN_CACHE_SIZE =
362 Math.max(1234567, 8 * MAX_TRANSFER_CHUNK_SIZE) +
363 (50 * ExhibitThumbnails.MAX_BYTES_EST);
364
365 /**If true then accessing a thumbnail marks its exhibit as accessed.
366 * This means that for the purposes of avoiding eviction from
367 * cache, accessing a thumbnail is taken as being as significant
368 * as downloading (part of) the exhibit itself, if true.
369 * <p>
370 * If false then only actually reading part of the exhibit itself
371 * helps keep the exhibit ``fresh'' in the cache.
372 */
373 private static final boolean THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP = false;
374
375 /**Approximate minimum interval between rechecks of on-disc cache.
376 * When a check is done, the in-memory record of disc cache status
377 * is reloaded from disc, any debris is removed, etc.
378 * <p>
379 * A period of the order of at least a day is probably about right;
380 * not being exactly a multiple helps to ensure that we do not hit the same time
381 * every day, which might otherwise collide with other regular activity.
382 * Note that this recheck may take at least several minutes,
383 * so we don't want to do it too often!
384 * <p>
385 * The chosen interval is less than 2D (including a random component).
386 * We use less than a day-multiple to tend to be waiting for energy
387 * to become available at a similar time each day, eg from solar PV,
388 * when the system is in energy-conserving mode.
389 * This seems frequent enough in practice.
390 */
391 private static final int DISC_RECHECK_INTERVAL_MS =
392 (36 * 3600 * 1000) + Rnd.fastRnd.nextInt(11 * 3600 * 1000);
393
394 /**Approximate minimum interval between saves of the metadata; strictly positive.
395 * Since access to the exhibit/thumbnail data causes this to be updated
396 * (along with more significant changes to the cache),
397 * and saving can take significant time, we do not
398 * want to save this immediately we encounter a change.
399 * <p>
400 * We can postpone a save for a while at the risk that if the
401 * system crashes or shuts down during that time and there
402 * was a structural change to the cache, we might have to abandon
403 * the old metadata and start again, which could be slow and
404 * a bit messy (losing some useful though not vital information).
405 * <p>
406 * Taking our cue from the old UNIX sync interval of 30s,
407 * a value in the range 30s to a few minutes is probably reasonable.
408 * Larger values of several minutes help reduce disc (write) activity
409 * which may be important for (say) solid-state storage such as Flash.
410 */
411 private static final int METADATA_MIN_SAVE_INTERVAL_MS =
412 (5 * 60 * 1000) + Rnd.fastRnd.nextInt(60 * 1000);
413
414
415 /**Our local logger; never null. */
416 private final SimpleLoggerIF logger;
417
418 /**The stats set to which we log general cache behaviour.
419 * The unique codes are the constants SCGNAME_XXX.
420 */
421 private final StatsLogger.StatsConfig statsIDSCGEN;
422
423 /**General stats event name: an exhibit was evicted from the cache in LRU order. */
424 public static final String SCGNAME_MDSAVE = "metaDataSave";
425
426 /**General stats event name: an exhibit was evicted from the cache in LRU order. */
427 public static final String SCGNAME_CACHEEVICTLRU = "exhibitEvictedFromCacheLRU";
428
429 /**General stats event name: an exhibit was removed from the cache. */
430 public static final String SCGNAME_CACHEREM = "exhibitRemovedFromCache";
431
432 /**General stats event name: an exhibit was added to the cache. */
433 public static final String SCGNAME_CACHEADD = "exhibitAddedToCache";
434
435 /**General stats event name: a corrupt exhibit was removed from the cache. */
436 public static final String SCGNAME_CACHEREM_CORRUPT = "exhibitRemovedFromCacheCORRUPT";
437
438 /**General stats event name: an exhibit in the cache was fully validated against checksums, etc. */
439 public static final String SCGNAME_CACHE_VALIDATION = "exhibitValidatedInCache";
440
441 /**General stats event name: an exhibit in the cache was partially validated against checksums, etc. */
442 public static final String SCGNAME_CACHE_VALIDATION_PART = "exhibitValidatedInCachePartial";
443
444 /**General stats event name: cache raw data read miss.
445 * We had to go upstream for at least part of the data.
446 * <p>
447 * (It is possible to have a hit and a miss on the same read
448 * if part is satisfied from cache and part not.)
449 */
450 public static final String SCGNAME_CACHERAWDATAMISS = "exhibitCacheRawDataMiss";
451
452 /**General stats event name: cache raw data read hit.
453 * We satisfied at least part of the read from cache.
454 * <p>
455 * (It is possible to have a hit and a miss on the same read
456 * if part is satisfied from cache and part not.)
457 */
458 public static final String SCGNAME_CACHERAWDATAHIT = "exhibitCacheRawDataHit";
459
460 /**General stats event name: on-disc cache hit for thumbnail. */
461 public static final String SCGNAME_CACHETNHIT = "exhibitCacheThumbnailHit";
462
463 /**General stats event name: in-memory cache hit for thumbnail. */
464 public static final String SCGNAME_CACHETNMEMHIT = "exhibitCacheThumbnailMemoryHit";
465
466 /**General stats event name: in-memory cache hit for thumbnail. */
467 public static final String SCGNAME_CACHETNMISS = "exhibitCacheThumbnailMiss";
468
469 /**General stats event name: fetched a data block from a peer. */
470 public static final String SCGNAME_DATAFETCHFROMPEER_PREFIX = "exhibitPeerDataFetch-";
471
472 /**General stats event name: restarted scanning all exhibits for precaching. */
473 public static final String SCGNAME_PRECACHERESTART = "exhibitPrecacheRestart";
474
475 /**General stats event name: exhibits examined for precaching. */
476 public static final String SCGNAME_PRECACHEEXAMINED = "exhibitPrecacheExhibitsExamined";
477
478 /**General stats event name: exhibit data block precached. */
479 public static final String SCGNAME_PRECACHEEXDATABLOCK = "exhibitPrecacheExhibitBlock";
480
481 /**General stats event name prefix: exhibit data block precache source (if not from master/upstream). */
482 public static final String SCGPREF_PRECACHEEXDATABLOCKSRC = "exhibitPrecacheExhibitBlockSrc=";
483
484 /**General stats event name prefix: exhibit data block precache source for error (if not from master/upstream). */
485 public static final String SCGPREF_PRECACHEEXDATABLOCKSRCERR = "exhibitPrecacheExhibitBlockSrcErr=";
486
487 /**General stats event name prefix: exhibit data block precache (succesful) fetch time (log2 ms). */
488 public static final String SCGPREF_PRECACHEEXDATABLOCKFETCHTIME = "exhibitPrecacheExhibitBlockFetchTime=";
489
490 /**General stats event name: errors encountered during precaching. */
491 public static final String SCGNAME_PRECACHEERROR = "exhibitPrecacheErrors";
492
493 /**General stats event name: incoming request for exhibit data. */
494 public static final String SCGNAME_EXDATAREQIN = "exhibitRawDataRequestIn";
495
496 /**General stats event name: incoming request for exhibit data with "dontCache" flag set. */
497 public static final String SCGNAME_EXDATAREQINDC = "exhibitRawDataRequestIn_dontCache";
498
499 /**General stats event name: incoming request for exhibit thumbnails. */
500 public static final String SCGNAME_EXTHUREQIN = "exhibitThumbnailRequestIn";
501
502 /**General stats event name: incoming request for exhibit thumbnails with "dontCreate" flag set. */
503 public static final String SCGNAME_EXTHUREQINDC = "exhibitThumbnailRequestIn_dontCreate";
504
505 /**General stats event name: created thumbnails locally from cached data. */
506 public static final String SCGNAME_EXTHUCREATED = "exhibitThumbnailCreated";
507
508 /**If true, assume that newly-loaded meta-data at is OK at start-up until proven otherwise. */
509 private static final boolean ASSUME_LOADED_METADATA_OK = true;
510
511 /**If true, synchronously force a save of meta-data each time we add a new entry at least.
512 * May be slow (O(n^2) for n exhibits),
513 * especially where the cache is not large enough to hold all exhibits
514 * so items are continually coming and going,
515 * but potentially makes the system more robust against loss of data.
516 */
517 private static final boolean FORCE_IMMEDIATE_SAVE_ON_EXPANDED_METADATA = false;
518
519
520 /**Key in generic props of P2P-profiling flag. */
521 private static final String KEY_debugFlag_P2P_BLOCKXFER = CoreConsts.GEN_PREFIX_debugFlag + "P2P.profile.blockXfer";
522
523
524 /**Maximum number of "best" exhibits to get enhanced precaching; non-negative. */
525 private static final int MAX_BEST_EX_PRECACHED = 301;
526
527 /**Contains the full exhibit names of the "best" few exhibits for enhanced precacheing; never null.
528 * Maintained/updated by _doPrecache().
529 * <p>
530 * This is a snapshot of what the "best" exhibits are estimated to be
531 * as each precacheing round starts,
532 * often based on "quick approximation" data.
533 * <p>
534 * The implementation is optimised for fast lookup with "contains()".
535 * <p>
536 * Thread-safe.
537 */
538 private final Set<Name.ExhibitFull> _bestExhibits =
539 Collections.synchronizedSet(new HashSet<Name.ExhibitFull>(2 * MAX_BEST_EX_PRECACHED));
540
541 /**Cache meta-data class.
542 * Holds information about what exhibit data is cached on disc.
543 * <p>
544 * Is serialisable to be able to persist to disc and
545 * allow for a fast start-up.
546 * <p>
547 * Can be marked read-only to hold the cache in its current state.
548 * <p>
549 * This object is completely thread-safe and holds its instance
550 * lock to synchronise activity.
551 * <p>
552 * The names of all members affecting the state of cached exhibit data
553 * and meta-data start with `exhibit'.
554 * <p>
555 * All activity that actually affects disc operations must be
556 * passed the rwl and it will be held while disc
557 * I/O takes place, and most of these operations will throw
558 * and IOException.
559 * (Note that if the ExhibitDataSimpleCache instance lock
560 * must be held as well then it and the rwl must be
561 * grabbed in the appropriate order before one of these
562 * routines is called.)
563 */
564 private final static class MetaData implements Serializable,
565 ObjectInputValidation
566 {
567 /**Build an empty, read-only set of meta-data. */
568 MetaData()
569 {
570 // Update derived data.
571 _recomputeDerivedValues();
572
573 // Verify object state.
574 try { validateObject(); }
575 catch(final InvalidObjectException e)
576 { throw new IllegalArgumentException(e.getMessage()); }
577 }
578
579 /**Build a new meta-data set by examining the given cache directory.
580 * This is understood not to be perfect, because, for example,
581 * the last-accessed time may not be available on disc and
582 * we may have to approximate with an available last-modified-time;
583 * generally this is used when:
584 * <ol>
585 * <li>no persisted MetaData value is available, and we have
586 * to do the best we can, or,
587 * <li>we want to load a copy of what is on disc to check that
588 * what we have in memory is not badly wrong.
589 * </ol>
590 * <p>
591 * No locks (ie especially the rwl or cache's instance lock)
592 * should be held while this is running.
593 * <p>
594 * This does not modify the disc.
595 * <p>
596 * The constructed object is marked as needing to be saved.
597 * <p>
598 * This relies in part on being able to find the data files for
599 * the cached (prefixes of) exhibits by their valid full exhibit names.
600 * <p>
601 * We do also explicitly attempt to recover any plausible entries
602 * corresponding to current AEP entries.
603 * <p>
604 * Thus we may miss entries which have no current data file,
605 * eg because we had zero bytes cached for that file,
606 * and may have to recover such data later as we run across it.
607 * <p>
608 * We do also explicitly attempt to recover any plausible entries
609 * corresponding to current AEP entries.
610 *
611 * @param aepCurrent reasonably current AEP as a hint for entries to look for;
612 * never null
613 * @param cacheDir the cache top-level directory; never null
614 * @param logger to log to; never null
615 *
616 * @throws java.io.IOException in case of major problems in reconstructing
617 * the cache state (minor problems will be ignored)
618 */
619 MetaData(final AllExhibitProperties aepCurrent,
620 final File cacheDir,
621 final SimpleLoggerIF logger)
622 throws IOException
623 {
624 assert(aepCurrent != null);
625 assert(cacheDir != null);
626 assert(logger != null);
627
628 // Compute the base dir of the exhibit data cache.
629 final File dataBaseDir = new File(cacheDir, CACHE_EXDATA_DIR);
630
631 if(!dataBaseDir.isDirectory() ||
632 !dataBaseDir.canRead() ||
633 !dataBaseDir.canWrite())
634 { throw new IOException("invalid or unusable cache directory ``"+dataBaseDir+"''"); }
635
636 // Get the set of cached files,
637 // but not checking validity (eg magic numbers)
638 // because:
639 // a) there may not have cached enough for the item to be strictly valid,
640 // b) this should be efficient/quick.
641 final Set<Name.ExhibitFull> cachedFileNames = ExhibitFile.getFilesystemBasedExhibitNames(
642 dataBaseDir, false);
643
644 // We also throw in all names from the supplied AEP as candidates.
645 cachedFileNames.addAll(aepCurrent.aeid.getAllExhibitNamesSorted());
646
647 // Now load all cached file stats into in-memory records.
648 final Iterator<Name.ExhibitFull> it = cachedFileNames.iterator();
649 while(it.hasNext())
650 {
651 final Name.ExhibitFull name = it.next();
652 try {
653 final CachedFile cf = CachedFile.recoverExtantCachedFileDetails(cacheDir, name);
654
655 // Verify that the candidate has something worth recovering,
656 // (or at least inspecting and discarding to free up space)
657 // ie exhibit data and/or thumbnails.
658 // TODO: this may include other metadata (eg FEC) in future.
659 if((cf.cachedLength > 0) || cf.hasThumbnails() ||
660 (cf.lastAccessed > 0))
661 { cachedExhibits.put(name, cf); }
662 }
663 catch(final Exception e)
664 {
665 // Since we seemed to encounter a problem with this putative
666 // item, we ignore it.
667 // FIXME: Possibly we should forcefully purge it.
668 logger.log("[ExhibitDataSimpleCache.MetaData: ERROR: trouble loading cache item '"+name+"': "+e.getMessage()+"]");
669 }
670 }
671
672 // We should save this data that we have captured.
673 setNeedsSave();
674
675 // Update derived data.
676 _recomputeDerivedValues();
677
678 // Verify object state.
679 try { validateObject(); }
680 catch(final InvalidObjectException e)
681 { throw new IllegalArgumentException(e.getMessage()); }
682 }
683
684 /**Set of cached exhibits, from full exhibit name to CachedFile entry; never null after construction/deserialisation.
685 * We may like to optimise memory use by sharing the String values (etc)
686 * with those from AllExhibitImmutableData held elsewhere.
687 * <p>
688 * This is the primary store of data;
689 * other (transient) values are derived from it;
690 * this is saved to and restored from the serialised form
691 * in an efficient and defensive way.
692 * <p>
693 * This is a Hashtable for thread-safety.
694 * <p>
695 * Compound operations may be made atomic by a lock on this object,
696 * but no other lock may be taken while this happens.
697 * <p>
698 * Any access that updates this table should be under the instance lock.
699 */
700 private /*final*/ transient Map<Name.ExhibitFull, CachedFile> cachedExhibits = new Hashtable<Name.ExhibitFull, CachedFile>();
701
702 /**Get Set of exhibits on which metadata is currently held in this cache.
703 * This takes a private atomic copy of the Set of exhibits
704 * for which we have some data, thumbnails, etc, in the cache.
705 */
706 /*synchronized*/ Set<Name.ExhibitFull> getKnownExhibits()
707 { return(new HashSet<Name.ExhibitFull>(cachedExhibits.keySet())); }
708
709 /**LRU ordered set of the CachedFile values in cachedExhibits; never null after construction/deserialisation.
710 * This should be exactly the same set as cachedExhibits.valueSet().
711 * <p>
712 * The least-recently-accessed items are first in this list,
713 * and are the first to be discarded if the cache becomes over-full.
714 * <p>
715 * This is not of itself thread-safe and all access must be under
716 * the instance lock.
717 */
718 private /*final*/ transient SortedSet<CachedFile> exhibitsLRU;
719
720 /**Estimated total bytes of disc used by cached entries.
721 * Computed as sum of estimates from CachedFile values.
722 * <p>
723 * Write access restricted to _setTotalBytes();
724 * marked volatile to allow unlocked <em>read</em> access.
725 */
726 private volatile transient long totalBytes;
727
728 /**Get estimated total bytes of disc used by cached entries; never negative. */
729 long getTotalBytesCurrentlyUsedByCache() { return(totalBytes); }
730
731 /**Set estimated total bytes of disc used by cached entries; never negative. */
732 private synchronized void _setTotalBytes(final long newTB)
733 {
734 assert(newTB >= 0);
735 totalBytes = newTB;
736 if(newTB > totalBytesHighWaterMark) { totalBytesHighWaterMark = newTB; }
737 }
738
739 /**The highest value of totalBytes, ie the cache actual high-water mark; never negative.
740 * Computed as sum of estimates from CachedFile values.
741 * <p>
742 * Write access restricted to _setTotalBytes() and by _setTotalBytesHighWaterMark()
743 * and must be under the instance lock.
744 * <p>
745 * Marked volatile to allow lock-free <em>read</em> access.
746 */
747 private volatile transient long totalBytesHighWaterMark;
748
749 /**Get the highest value of totalBytes, ie the cache actual high-water mark; never negative. */
750 long getTotalBytesHighWaterMark() { return(totalBytesHighWaterMark); }
751
752 /**Resets totalBytesHighWaterMark to the current value of totalBytes. */
753 // private synchronized void _setTotalBytesHighWaterMark() { totalBytesHighWaterMark = getTotalBytes(); }
754
755 /**This private routine updates all (transient) data derived from the primary cachedExhibits map.
756 * This is used after deserialisation, construction or insertion where
757 * it is easier to recompute such derived data from scratch
758 * than change it incrementally as usual.
759 */
760 private synchronized void _recomputeDerivedValues()
761 {
762 final SortedSet<CachedFile> _new_exhibitsLRU = new TreeSet<CachedFile>();
763 long _new_totalBytes = 0;
764
765 for(final Iterator<Name.ExhibitFull> it = cachedExhibits.keySet().iterator(); it.hasNext(); )
766 {
767 final Name.ExhibitFull name = it.next();
768 final CachedFile cf = cachedExhibits.get(name);
769
770 _new_exhibitsLRU.add(cf);
771 _new_totalBytes += cf.calcDiscSpace();
772 }
773
774 // Save new computed values.
775 exhibitsLRU = _new_exhibitsLRU;
776 _setTotalBytes(_new_totalBytes);
777 }
778
779 /**If true, we need a save to disc (ie our state has changed).
780 * Is transient so always false when we recover from disc.
781 * <p>
782 * Is volatile (so accessed with no lock) only by
783 * setNeedsSave() (to set it true) and
784 * saveToDisc() (to clear it after a successful save to disc).
785 */
786 private transient volatile boolean needsSave;
787
788 /**Get the 'needs to be saved to disc' value.
789 * Always false after deserialisation and construction of a new instance.
790 */
791 boolean getNeedsSave() { return(needsSave); }
792
793 /**Set the `needs to be saved to disc' value to be true. */
794 void setNeedsSave() { needsSave = true; }
795
796 /**Save (serialise) to disc; throws IOException in case of difficulty.
797 * Holds a lock on the metaData object while the save takes place;
798 * clears needsSave (to false) if the save appears to be successful.
799 * <p>
800 * FIXME: see if this and writeObject() should simply make the object read-only for the duration
801 *
802 * @param cacheDir specified the top-level cache directory; never null
803 */
804 void saveToDisc(final File cacheDir,
805 final SimpleLoggerIF logger,
806 final StatsLogger.StatsConfig statsIDSCGEN)
807 throws IOException
808 {
809 assert(null != cacheDir);
810
811 final File toSaveTo = new File(cacheDir, CACHE_METADATA_FILENAME);
812
813 final long start = System.currentTimeMillis();
814
815 // Do as little as possible with the lock held...
816 synchronized(this)
817 {
818 FileTools.serialiseToFile(this, toSaveTo, STORE_EXPROPS_GZIPED, !IsDebug.isDebug);
819 needsSave = false; // OK, seems to have worked.
820 }
821
822 logger.log("[ExhibitDataSimpleCache.MetaData.saveToDisc() done: "+ (System.currentTimeMillis()-start) +"ms.]");
823
824 // Note save of meta-data from poll()...
825 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_MDSAVE);
826 }
827
828 /**If true, this cache meta-data is read/write.
829 * This can be used to pin the state of the cache
830 * while we compare it to on-disc state and/or
831 * while we are unsure of its veracity.
832 * <p>
833 * Is transient so that it is deserialised as false,
834 * and is also false on construction of a new empty MetaData instance;
835 * should only be set true after comparing with
836 * or recreating from, real disc state.
837 * <p>
838 * Is volatile so that it can be read without taking a lock.
839 */
840 private volatile transient boolean readWrite;
841
842 /**Get the `cache-is-read/write' value.
843 * Always false after deserialisation and after construction of a new instance.
844 */
845 boolean isReadWrite() { return(readWrite); }
846
847 /**Set the `cache-is-read/write' value.
848 * Only the cache-checking routines should call this at all.
849 * <p>
850 * Insists on grabbing the instance lock to block changes to this
851 * without the instance lock held.
852 */
853 synchronized void setReadWrite(final boolean rw) { readWrite = rw; }
854
855
856 /**Get count of cached exhibits, including partially-cached ones; never negative. */
857 /*synchronized*/ int size() { return(cachedExhibits.size()); }
858
859 /**Is our meta-data store empty, ie no exhibits cached at all? */
860 /*synchronized*/ boolean isEmpty() { return(cachedExhibits.isEmpty()); }
861
862 /**Get cached-file details for a given exhibit; null if exhibit not cached.
863 * This may incrementally verify cache data against the disc copy,
864 * though for performance reasons this may not happen very often.
865 * <p>
866 * This is thread-safe and does not need nor grab any cache locks,
867 * but to ensure this data remains valid during any compound operation,
868 * at least a read lock should usually be obtained.
869 *
870 * @param name the exhibit whose meta-data is required; never null
871 */
872 CachedFile exhibitGetInfo(final Name.ExhibitFull name)
873 { return(cachedExhibits.get(name)); }
874
875 /**Check if the exhibit is fully loaded.
876 * <p>
877 * This does not grab any cache locks,
878 * but to ensure this data remains valid during any compound operation,
879 * at least a read lock should generally be obtained.
880 *
881 * @param esa the exhibit whose meta-data is required; never null
882 *
883 * @return false if no such exhibit or it is not fully loaded
884 */
885 boolean exhibitIsFullyLoaded(final ExhibitStaticAttr esa)
886 //throws IOException
887 {
888 if(esa == null) { return(false); }
889 final CachedFile icf = exhibitGetInfo(esa.getExhibitFullName());
890 if((icf == null) || (icf.cachedLength != esa.length))
891 { return(false); }
892 return(true); // Looks OK.
893 }
894
895 /**Remove cached-file details, adjusting cache data in situ; does nothing if not present.
896 * The argument must not be null.
897 * Private to this instance.
898 * <p>
899 * Does not check the read/write status; should be checked
900 * before this is called.
901 * <p>
902 * Cache write lock must be held by current thread.
903 */
904 private synchronized void _remove(final ReentrantReadWriteLock rwl,
905 final Name.ExhibitFull name)
906 {
907 assert (rwl != null) || (name != null);
908
909 // Write lock must be held by current thread.
910 assert(rwl.isWriteLockedByCurrentThread());
911
912 // Data should be consistent when we start.
913 assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
914
915 final CachedFile oldCf = exhibitGetInfo(name);
916 if(oldCf != null)
917 {
918 // If old data exists, carefully remove it
919 // from all the places it affects.
920 _setTotalBytes(getTotalBytesCurrentlyUsedByCache() - oldCf.cachedLength);
921 exhibitsLRU.remove(oldCf);
922 cachedExhibits.remove(oldCf.name);
923
924 // Note need for meta-data save.
925 setNeedsSave();
926
927 // Data should be consistent after removing old metadata.
928 assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
929 }
930 }
931
932 /**Update or add new cached-file details, adjusting cache data in situ.
933 * The argument must not be null.
934 * Private to this instance.
935 * <p>
936 * Does not check the read/write status; should be checked
937 * before this is called.
938 * <p>
939 * Marks the meta-data as needing to be saved.
940 * <p>
941 * Cache write lock must be held by current thread.
942 */
943 private synchronized void _update(final ReentrantReadWriteLock rwl,
944 final CachedFile newCf,
945 final SimpleLoggerIF logger)
946 {
947 assert (rwl != null) && (newCf != null);
948
949 // Write lock must be held by current thread.
950 assert(rwl.isWriteLockedByCurrentThread());
951
952 // Data should be consistent when we start.
953 assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
954
955 // First, zap any extant entry.
956 _remove(rwl, newCf.name);
957
958 // Now add new data incrementally.
959 cachedExhibits.put(newCf.name, newCf);
960 exhibitsLRU.add(newCf);
961 _setTotalBytes(getTotalBytesCurrentlyUsedByCache() + newCf.cachedLength);
962
963 // Check for internal consistency...
964 if(cachedExhibits.size() != exhibitsLRU.size())
965 {
966 logger.log("ExhibitDataSimpleCache.MetaData._update(): ERROR: internal data structures incosistent: repairing...");
967
968 // Chuck away old values; insert new ones...
969 exhibitsLRU.clear();
970 exhibitsLRU.addAll(cachedExhibits.values());
971 _recomputeDerivedValues();
972 }
973
974 // Note need for meta-data save.
975 setNeedsSave();
976
977 // Data should be consistent when we finish.
978 assert (exhibitsLRU != null) && (exhibitsLRU.size() == cachedExhibits.size()) && (totalBytes >= 0);
979 }
980
981 /**Returns true if this metadata is essentially equivalent to another one.
982 * This has a sufficiently loose notion to allow for discrepancies
983 * in (for example) last-accessed timestamps which cannot be completely
984 * reconstructed from the disc copy, but vital differences are noted.
985 * <p>
986 * The result is false if the argument is null.
987 */
988 synchronized boolean isEquivalent(final MetaData other)
989 {
990 if(other == null) { return(false); }
991
992 // Clearly not equivalent if number of cached exhibits differ...
993 if(size() != other.size()) { return(false); }
994
995 // Check that each cached entry matches,
996 // except in the last-accessed timestamp values.
997 for(final Name.ExhibitFull name : cachedExhibits.keySet())
998 {
999 final CachedFile cf1 = exhibitGetInfo(name);
1000 final CachedFile cf2 = other.exhibitGetInfo(name);
1001
1002 // If we find a significant discrepancy,
1003 // these MetaData sets are not equivalent.
1004 if(!cf1.isEquivalent(cf2)) { return(false); }
1005 }
1006
1007 return(true); // Seem to be essentially the same.
1008 }
1009
1010 /**Install info from new cache object in this one.
1011 * This would allow us to incorporate the results
1012 * of a disc scan to see the actual state of the disc.
1013 * (Usually the argument is the best-efforts reconstruction
1014 * from the disc of the cache state, with the timestamps
1015 * arbitrarily too old.)
1016 * <p>
1017 * This essentially throws away the old state and
1018 * replaces it with that passed in,
1019 * though this may do things such as retain the best
1020 * notion of last-accessed timestamp from both.
1021 * <p>
1022 * Both this and the newData objects are locked for
1023 * the duration of this operation.
1024 * <p>
1025 * The readWrite status is not altered or updated
1026 * by this operation but needsSave may be;
1027 * therefore both objects must be read-only before this is called.
1028 * <p>
1029 * Only the cache-checking routines should call this at all.
1030 */
1031 synchronized void mergeWithNewMetaData(final MetaData newData,
1032 final SimpleLoggerIF logger)
1033 throws IllegalStateException
1034 {
1035 synchronized(newData)
1036 {
1037 if(readWrite || newData.readWrite)
1038 { throw new IllegalStateException("must both be read-only"); }
1039
1040 // By default, assume that this data will need saving.
1041 setNeedsSave();
1042
1043 // Save the old data away.
1044 final Map<Name.ExhibitFull,CachedFile> oldCachedExhibits = new HashMap<Name.ExhibitFull, CachedFile>(cachedExhibits);
1045
1046 // Replace old data with new.
1047 // Don't preserve any data from the old set.
1048 cachedExhibits.clear();
1049 cachedExhibits.putAll(newData.cachedExhibits);
1050
1051 // Now for every new entry,
1052 // if we have a usable old CachedFile entry
1053 // use it but fix it up from disc.
1054 for(final Iterator<Name.ExhibitFull> it = cachedExhibits.keySet().iterator(); it.hasNext(); )
1055 {
1056 final Name.ExhibitFull name = it.next();
1057 final CachedFile newCf = cachedExhibits.get(name);
1058 final CachedFile oldCf = oldCachedExhibits.get(name);
1059 if(!newCf.isEquivalent(oldCf)) { continue; } // Old value not usable.
1060
1061 // Restore old value, fixed-up.
1062 cachedExhibits.put(name, oldCf.fixup(newCf, logger));
1063 }
1064
1065 // Update derived data.
1066 _recomputeDerivedValues();
1067 }
1068 }
1069
1070 /**Mark exhibit as accessed (`touch' it) and returns the possibly-modified CachedFile record.
1071 * Returns null if the named exhibit does not exist in the cache at all,
1072 * else never returns null.
1073 * <p>
1074 * This call does not access disc (ie affects in-memory status only),
1075 * and silently does nothing if there is no entry for the named
1076 * exhibit.
1077 * <p>
1078 * If this metadata not read-write, this action is silently vetoed
1079 * as it is assumed not to be of vital importance.
1080 * <p>
1081 * This marks the last-accessed time in the MetaData as now,
1082 * to be preserved on disc at some point in the future.
1083 * <p>
1084 * This MetaData object is marked as needing to be saved if
1085 * an exhibit was `touched' successfully.
1086 * <p>
1087 * This is preferably used to:
1088 * <ul>
1089 * <li>Mark end-user access to an exhibit's data.
1090 * <li>Mark some expensive operation (such as computing thumbnails)
1091 * which we would like to avoid throwing away too soon.
1092 * </ul>
1093 * <p>
1094 * This holds a write lock on the cache while updating state.
1095 *
1096 * @param cacheDir if not null
1097 * then we force an update of a timestamp on disc
1098 * as a backup in case the serialised metadata is lost;
1099 * this causes extra disc traffic though not necessarily synchronous
1100 */
1101 CachedFile exhibitMarkAsAccessed(final ReentrantReadWriteLock rwl,
1102 final Name.ExhibitFull name,
1103 final SimpleLoggerIF logger,
1104 final File cacheDir)
1105 throws IOException
1106 {
1107 _getWriteLock(rwl, "exhibitMarkAsAccessed()", logger);
1108 try
1109 {
1110 final CachedFile cf = exhibitGetInfo(name);
1111 if(cf == null) { return(null); }
1112
1113 // Ignore `touch' if read-only.
1114 if(!isReadWrite()) { return(cf); }
1115
1116 // Make new touched entry.
1117 final long now = System.currentTimeMillis();
1118 final CachedFile touched = cf.touchedEntry(now);
1119
1120 // Update with touched entry.
1121 _update(rwl, touched, logger);
1122
1123 // Force secondary timestamp backup to disc if requested.
1124 if(cacheDir != null)
1125 {
1126 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
1127 dataFile.setLastModified(now); // Enforce our clock's `now'.
1128 }
1129
1130 // Return new entry.
1131 return(touched);
1132 }
1133 finally { rwl.writeLock().unlock(); }
1134 }
1135
1136 /**If true, when creating cache entry if data found on disc use it rather than refuse to build meta-data entry.
1137 * This is useful for co-existing with old cache mechanism.
1138 */
1139 private static final boolean USE_EXTANT_DISC_ENTRY_ON_CREATE = true;
1140
1141 /**Remove the least-recently-used exhibit cache item, if any.
1142 * We won't remove the given named item if non-null;
1143 * this may prevent us removing anything if it is the only cached item.
1144 * <p>
1145 * If the named item is the oldest then we'll remove the next oldest,
1146 * if there is one.
1147 *
1148 * @param justData if true then we try to retain metadata/thumbnails
1149 *
1150 * @return true if we removed something
1151 */
1152 private boolean _exhibitRemoveLRUCacheEntry(final ReentrantReadWriteLock rwl,
1153 final File cacheDir,
1154 final Name.ExhibitFull dontRemoveName,
1155 final boolean justData,
1156 final SimpleLoggerIF logger,
1157 final StatsLogger.StatsConfig statsIDSCGEN)
1158 throws IOException
1159 {
1160 // Grab both locks in correct order.
1161 _getWriteLock(rwl, "_exhibitRemoveLRUCacheEntry()", logger);
1162 try
1163 {
1164 synchronized(this)
1165 {
1166 final Iterator<CachedFile> it = exhibitsLRU.iterator();
1167
1168 // Get oldest entry.
1169 if(!it.hasNext()) { return(false); } // Cache is empty.
1170 final CachedFile cf = it.next();
1171 if(!cf.name.equals(dontRemoveName))
1172 {
1173 // Good, we can try this one...
1174 if(!exhibitRemoveCacheEntry(rwl, cacheDir, cf.name, justData, logger, statsIDSCGEN))
1175 { return(false); }
1176 }
1177 else if(it.hasNext())
1178 {
1179 // Get second-oldest entry;
1180 // this can't be the named exhibit,
1181 // since we only get here if the oldest entry was
1182 // the special named exhibit...
1183 final CachedFile cf2 = it.next();
1184 // Good, we can try this one...
1185 if(!exhibitRemoveCacheEntry(rwl, cacheDir, cf2.name, justData, logger, statsIDSCGEN))
1186 { return(false); }
1187 }
1188 else
1189 { return(false); /* Nothing removed from the cache. */ }
1190 }
1191 }
1192 finally { rwl.writeLock().unlock(); }
1193
1194 // Note eviction of old exhibit from cache...
1195 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEEVICTLRU);
1196 return(true);
1197 }
1198
1199 /**Remove an exhibit entry from cache, possibly including all its data and metadata.
1200 * Returns true if successful, false if not.
1201 * <p>
1202 * This will fail if the metaData is read only,
1203 * or if an entry for the exhibit does not exist
1204 * in the metadata and on disc.
1205 * <p>
1206 * Since these failures will be by returning false
1207 * rather than by causing an exception then this
1208 * can be used as a gentle unconditional way to make sure that
1209 * an entry does not exist whether or not one did before.
1210 * <p>
1211 * This needs to access disc and so will need to
1212 * hold the main write lock while it does so, grabbing
1213 * the metaData instance lock inside the main lock
1214 * where both need to be held simultaneously.
1215 * <p>
1216 * Can optionally attempt to just remove the exhibit data,
1217 * leaving metadata and any thumbnails intact.
1218 *
1219 * @param rwl the main cache lock object; must not be null
1220 * @param justData if true then we try to retain metadata/thumbnails
1221 */
1222 boolean exhibitRemoveCacheEntry(final ReentrantReadWriteLock rwl,
1223 final File cacheDir,
1224 final Name.ExhibitFull name,
1225 final boolean justData,
1226 final SimpleLoggerIF logger,
1227 final StatsLogger.StatsConfig statsIDSCGEN)
1228 throws IOException
1229 {
1230 assert (rwl != null) && (name != null);
1231
1232 // Fail gently if read-only.
1233 if(!isReadWrite()) { return(false); }
1234
1235 _getWriteLock(rwl, "MetaData.exhibitRemoveCacheEntry()", logger);
1236 try
1237 {
1238 synchronized(this)
1239 {
1240 // Double-check that we haven't gone read-only...
1241 if(!isReadWrite()) { return(false); }
1242
1243 // Does not exist in metaData; fail gently.
1244 final CachedFile cf = exhibitGetInfo(name);
1245 if(cf == null) { return(false); }
1246
1247 /* if(ORG.hd.d.IsDebug.isDebug) */ { logger.log("[MetaData.exhibitRemoveCacheEntry(): INFO: request to remove cache "+(justData?"data":"data and metadata")+" (with extant meta-data): " + name + ", last access: "+(new Date(cf.getLastAccessed()))+", cached size: "+cf.cachedLength+".]"); /* Thread.dumpStack(); */ }
1248
1249 // Only attempt removal of any extant data components.
1250 //
1251 // Update the exhibit timestamp regardless
1252 // to try to ensure progress during purge() etc...
1253 //
1254 // This is "clock daemon" working-set management.
1255 if(justData)
1256 {
1257 // To avoid leaving this exhibit in the LRU slot,
1258 // the timestamp is updated
1259 // not quite to "now" (less good than a real access)
1260 // but enough to avoid being seen again immediately.
1261 final long newStamp = (cf.lastAccessed + System.currentTimeMillis())/2 + 1;
1262
1263 // Explicitly get the file deleted if we have some data cached
1264 // or if the file seems to exist anyway (eg is zero length).
1265 if((cf.cachedLength > 0) ||
1266 (new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString()).exists()))
1267 {
1268 final CachedFile newCf = cf.zapData(cacheDir);
1269 _update(rwl, newCf.touchedEntry(newStamp), logger);
1270 return(true);
1271 }
1272
1273 _update(rwl, cf.touchedEntry(newStamp), logger);
1274 return(false); // No exhibit data to remove.
1275 }
1276
1277 // Does exist: remove metadata...
1278 _remove(rwl, name);
1279 // Does exist: remove file(s)/data...
1280 cf.zapMe(cacheDir);
1281 }
1282 }
1283 finally { rwl.writeLock().unlock(); }
1284
1285 // Note removal of exhibit from cache...
1286 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEREM);
1287
1288 return(true); // Done!
1289 }
1290
1291 /**Create a new exhibit entry.
1292 * Returns true if successful, false if not.
1293 * <p>
1294 * This will fail if the metaData is read only,
1295 * or if an entry for the exhibit already exists
1296 * in the metaData or on disc.
1297 * <p>
1298 * Since these failures will be by returning false
1299 * rather than by causing an exception then this
1300 * can be used as a gentle unconditional way to make sure that
1301 * an entry does exist whether or not one did before.
1302 * <p>
1303 * This needs to access disc and so will need to
1304 * grab a write lock on the main cache lock while it does so, grabbing
1305 * the metaData instance lock inside the main lock
1306 * where both need to be held simultaneously.
1307 * <p>
1308 * Marks the meta-data as needing to be saved
1309 * if a new meta-data entry had to be created...
1310 *
1311 * @param rwl the main lock object; must not be null
1312 * @param esa the basic info on the exhibit to have an entry created
1313 *
1314 * @throws java.io.IOException in case of serious problems
1315 * accessing the on-disc cache state
1316 */
1317 boolean exhibitCreateNewCacheEntry(final ReentrantReadWriteLock rwl,
1318 final File cacheDir,
1319 final ExhibitStaticAttr esa,
1320 final SimpleLoggerIF logger,
1321 final StatsLogger.StatsConfig statsIDSCGEN)
1322 throws IOException
1323 {
1324 assert (rwl != null) && (esa != null);
1325
1326 // Fail gently if read-only.
1327 if(!isReadWrite()) { return(false); }
1328
1329 //if(ORG.hd.d.IsDebug.isDebug) { System.out.println("MetaData.exhibitCreateNewCacheEntry(): requested to create cache file: " + esa.filePath); }
1330
1331 // Path to (possibly partially-) cached exhibit file.
1332 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), esa.getCharSequence().toString());
1333
1334 // Grab both locks in correct order.
1335 _getWriteLock(rwl, "exhibitCreateNewCacheEntry()", logger);
1336 try
1337 {
1338 synchronized(this)
1339 {
1340 // Already exists in metaData; fail gently.
1341 if(exhibitGetInfo(esa.getExhibitFullName()) != null) { return(false); }
1342
1343 CachedFile newCf; // = null;
1344 if(dataFile.exists())
1345 {
1346 // Already exists on disc; fail gently.
1347 if(!USE_EXTANT_DISC_ENTRY_ON_CREATE) { return(false); }
1348
1349 // MORE PERMISSIVE STYLE...
1350 // Already exists on disc; try to use it!
1351 try
1352 {
1353 newCf = CachedFile.recoverExtantCachedFileDetails(
1354 cacheDir, esa.getExhibitFullName());
1355 }
1356 catch(final IOException e)
1357 {
1358 newCf = CachedFile.makeNewDiscCacheFile(
1359 cacheDir, esa.getExhibitFullName(), esa.timestamp);
1360
1361 // Data on disc may be damaged,
1362 // so ignore it and start again...
1363 e.printStackTrace();
1364 }
1365 }
1366 else
1367 {
1368 // OK, create new entry on disc ready for meta-data.
1369 newCf = CachedFile.makeNewDiscCacheFile(
1370 cacheDir, esa.getExhibitFullName(), esa.timestamp);
1371 }
1372
1373 // Post updated entry to metaData.
1374 assert(newCf != null);
1375 _update(rwl, newCf, logger);
1376
1377 if(FORCE_IMMEDIATE_SAVE_ON_EXPANDED_METADATA)
1378 { saveToDisc(cacheDir, logger, statsIDSCGEN); }
1379 }
1380 }
1381 finally { rwl.writeLock().unlock(); }
1382
1383 // Note addition of exhibit to cache...
1384 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEADD);
1385
1386 return(true); // Done!
1387 }
1388
1389 /**Minimum percentage free in cache filesystem for us to expand the cache; non-negative in range ]100,0].
1390 * Set to prevent accidental exhaustion of space in a shared filesystem
1391 * (eg inducing transient write errors for us or other storage users, etc)
1392 * due to incorrect usage estimates by us or unexpected use by others,
1393 * and to avoid very poor performance from a nearly-full filesystem.
1394 * <p>
1395 * A value in the range 1-10 is probably good.
1396 */
1397 private static final int MIN_FS_PERCENT_FREE = 3;
1398
1399 /**Computes the target high-water mark (target maximum cache size); strictly positive.
1400 * If we currently only have a default (zero-timestamp) genProps
1401 * then we use the initial size of the cache as our high-water mark,
1402 * ie do not allow the cache to expand.
1403 * <p>
1404 * We may further restict cache size to leave a given percentage free
1405 * in the underlying filesystem.
1406 */
1407 long computeTargetHighWaterMark()
1408 {
1409 final long lpWMECB = LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES();
1410
1411 // We don't necessarily trust the props value, eg if zero.
1412 final boolean weTrustProps = (lpWMECB > 0);
1413
1414 // If we trust genProps then use its high-water mark
1415 // (which may have a local override),
1416 // else we use the actual high-water mark so far
1417 // (or the fallback if larger).
1418 final long highWaterMark = weTrustProps ? lpWMECB :
1419 Math.max(FALLBACK_MIN_CACHE_SIZE, getTotalBytesHighWaterMark());
1420
1421 return(highWaterMark);
1422 }
1423
1424 /**Compute current definite free-space in cache.
1425 * This returns the amount of space that we have below the low-water mark;
1426 * if this returns a positive number then the cache is within
1427 * that low water mark and can be considered nowhere near full.
1428 * <p>
1429 * If we don't trust the GenProps value then the low-water mark
1430 * is the current actual high-water mark for the cache, ie
1431 * we try to maintain its size approximately.
1432 * <p>
1433 * Is synchronized to maintain consistency while working.
1434 */
1435 synchronized long computeFreeSpaceBelowLowWaterMark(final File cacheDir)
1436 {
1437 final long highWaterMark = computeTargetHighWaterMark();
1438 // final long lowWaterMark = Math.max(0, (long) (highWaterMark * LOW_WATER_FRACTION));
1439 // How much below the high-water mark is the low-water mark,
1440 // ie how much more space has to be available to be below the low-water mark?
1441 final long lhWaterGap = Math.max(0, (long) (highWaterMark * (1-LOW_WATER_FRACTION)));
1442 assert(lhWaterGap >= 0);
1443 return(computeFreeSpaceBelowHighWaterMark(cacheDir) - lhWaterGap);
1444 }
1445
1446 /**Returns true if we have lots of disc space free.
1447 * This means that usage is well below the low-water mark.
1448 * <p>
1449 * If we don't trust GenProps this always returns false.
1450 */
1451 boolean lotsFree(final File cacheDir)
1452 {
1453 final long max_cache_bytes = LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES();
1454 // We don't necessarily trust the props value, eg if zero.
1455 final boolean weTrustProps = (max_cache_bytes > 0);
1456
1457 // Conservative fall-back; maintain cache size for now.
1458 if(!weTrustProps) { return(false); } // Can't be sure.
1459
1460 // Lots of empty room below the low-water mark,
1461 // eg enough to run lots of concurrent cache-expanding threads
1462 // without then finding ourselves to be out of space.
1463 // (We leave a small %age fraction free too.)
1464 return(computeFreeSpaceBelowLowWaterMark(cacheDir) > Math.max(16*MAX_EXTD_TRANSFER_CHUNK_SIZE,
1465 (max_cache_bytes >>> 8) /* ~1% */ ));
1466 }
1467
1468 /**Returns true if we have some (not lots of) disc space free.
1469 * This means that usage is below the low-water mark.
1470 * <p>
1471 * If we don't trust GenProps this always returns false.
1472 */
1473 boolean someFree(final File cacheDir)
1474 {
1475 // We don't necessarily trust the props value, eg if zero.
1476 final boolean weTrustProps = (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0);
1477
1478 // Conservative fall-back; maintain cache size for now.
1479 if(!weTrustProps) { return(false); } // Can't be sure.
1480
1481 return(computeFreeSpaceBelowLowWaterMark(cacheDir) >= MAX_TRANSFER_CHUNK_SIZE);
1482 }
1483
1484 /**Cache of estimated usable free space in the underlying cache filesystem; initially null.
1485 * Notes the cacheDir and time and free space of the last request.
1486 * <p>
1487 * It is in fact assumed that in general:
1488 * <ul>
1489 * <li>The presented cacheDir will the the same each time.</li>
1490 * <li>There will be relatively little thread-racing in any case.</li>
1491 * <li>That filesystem usable status can reasonably be cached for a reasonable time.</li>
1492 * </ul>
1493 * <p>
1494 * Marked volatile for thread-safe unlocked access.
1495 */
1496 private transient volatile Tuple.Triple<File, Long, Long> _cFSBHWM_cache;
1497
1498 /**Maximum time in ms that we may retain a cached 'usable filespace' estimate; strictly positive.
1499 * If the file system in question doesn't contain much activity/data other than our cache,
1500 * and we are checking mainly to deal gracefully with internal usage-estimation errors
1501 * and from slowly-accumulated crud from our (and third-party) logs etc,
1502 * then we could probably cache the value for hours at a time without great loss,
1503 * but probably cacheing for tens of seconds would in practice eliminate much of the cost.
1504 * <p>
1505 * Pick a prime-ish value to minimise clashes with other activity...
1506 */
1507 private static final int MAX_FS_USABLE_SPACE_CACHE_MS = 61001;
1508
1509 /**Compute available free space in cache (bytes); zero or negative if none.
1510 * This ignores the purge state of the system and simply reports what
1511 * space is available before the cache overflows.
1512 * <p>
1513 * This is thread-safe though may be expensive.
1514 */
1515 long computeFreeSpaceBelowHighWaterMark(final File cacheDir)
1516 {
1517 // Compute current free space.
1518 final long freeSpace =
1519 (computeTargetHighWaterMark() - getTotalBytesCurrentlyUsedByCache());
1520 if(freeSpace <= 0) { return(freeSpace); }
1521
1522 // Cap estimate of remaining space if filesystem is (getting) full.
1523 final File dataBaseDir = new File(cacheDir, CACHE_EXDATA_DIR);
1524 final long fsSpaceUsable;
1525 // See if we have a suitable non-stale cached usable-free-space estimate...
1526 final Tuple.Triple<File, Long, Long> cached = _cFSBHWM_cache;
1527 if((null != cached) &&
1528 (cacheDir.equals(cached.first)) &&
1529 (cached.second + MAX_FS_USABLE_SPACE_CACHE_MS >= System.currentTimeMillis()))
1530 { fsSpaceUsable = cached.third; }
1531 else
1532 {
1533 fsSpaceUsable = FileTools.estimatedFreeSpaceBelowReserve(dataBaseDir, MIN_FS_PERCENT_FREE);
1534 // Cache the new usable-space estimate.
1535 _cFSBHWM_cache = new Tuple.Triple<File, Long, Long>(cacheDir, System.currentTimeMillis(), fsSpaceUsable);
1536 }
1537 // If we *cannot* estimate the space free in the filesystem (-ve result)
1538 // then just use our internally-computed value.
1539 // We *do* want to use a zero result to suppress further data cacheing.
1540 if(fsSpaceUsable < 0) { return(freeSpace); }
1541
1542 return(Math.min(freeSpace, fsSpaceUsable));
1543 }
1544
1545 /**If true, we can pre-cache data blocks for (new or existing) exhibits.
1546 * POLICY: only true if we have (lots of) free space
1547 * and the cache is read/write,
1548 * and the server-slowdown factor is no greater than 2 (ie moderate or absent).
1549 */
1550 boolean canPrecacheExhibitData(final File cacheDir)
1551 {
1552 if(!isReadWrite()) { return(false); }
1553 if(LocalProps.getServerSlowdownFactor() > 2) { return(false); }
1554 return(lotsFree(cacheDir));
1555 }
1556
1557 /**If true, we can pre-cache thumbnails for (existing) exhibits.
1558 * POLICY: only true if
1559 * the free space is enough to store another thumbnail
1560 * and fetch the data to do so first if necessary
1561 * and the cache is read/write.
1562 * <p>
1563 * Basically we're prepared to go right up to the wire to generate
1564 * thumbnails because they are so useful to the user.
1565 * <p>
1566 * We leave a little bit of wiggle room on space.
1567 */
1568 boolean canPrecacheThumbnails(final File cacheDir)
1569 { return(isReadWrite() &&
1570 (computeFreeSpaceBelowHighWaterMark(cacheDir) >
1571 2 * (MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL + MAX_TRANSFER_CHUNK_SIZE))); }
1572
1573 /**If true, some precaching may be possible, going by our generally-least-strict limit.
1574 * We will only allow precaching if we have a valid LocalProps value.
1575 */
1576 boolean canPrecache(final File cacheDir)
1577 {
1578 // We don't necessarily trust the props value, eg if zero.
1579 final boolean weTrustProps = (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0);
1580 if(!weTrustProps) { return(false); }
1581 return(canPrecacheThumbnails(cacheDir));
1582 }
1583
1584
1585 /**If true then try to retain metadata and thumbnails of exhibits we purge.
1586 * Thumbnails in particular may be expensive to regenerate/fetch,
1587 * and do not take much space.
1588 */
1589 private static final boolean TRY_TO_RETAIN_TN_IN_PURGE = true;
1590
1591 /**Purges the cache of old entries to make space for new, if needed.
1592 * This will do nothing unless the space used by the cache is
1593 * above the low-water mark. We compute the low-water mark
1594 * from GenProps, or from the actual high-water mark in
1595 * the current use of the cache if the passed GenProps is
1596 * the default (has a zero timestamp). (We attempt to
1597 * roughly maintain the cache or possibly run it down slightly
1598 * while waiting for GenProps to arrive.)
1599 * <p>
1600 * This will avoid purging the cache of the item passed to it
1601 * by name in case it was a candidate for removal.
1602 * <p>
1603 * If the argument is zero this tries to ensure that space
1604 * for at least one new (empty) exhibit cache entry to be created.
1605 * <p>
1606 * If greater than zero this tries to ensure that an existing cache
1607 * entry can be extended by at least the given number of bytes
1608 * (either for the exhibit data or for thumbnail data).
1609 * <p>
1610 * This grabs the cache write lock and instance lock.
1611 * <p>
1612 * This will fail if the cache is marked read-only.
1613 *
1614 * @param howMuch howMuch space we need to ensure is free; positive
1615 */
1616 void purge(final ReentrantReadWriteLock rwl,
1617 final GenProps gp,
1618 final File cacheDir,
1619 long howMuch,
1620 final Name.ExhibitFull dontPurgeName,
1621 final SimpleLoggerIF logger,
1622 final StatsLogger.StatsConfig statsIDSCGEN)
1623 throws IOException
1624 {
1625 assert (rwl != null) && (gp != null) && (cacheDir != null);
1626
1627 if(howMuch < 0)
1628 { throw new IllegalArgumentException(); }
1629
1630 // If lots of space free then return immediately.
1631 if(lotsFree(cacheDir)) { return; }
1632
1633 // We convert a zero-length request
1634 // (which is a request to make space for a new empty entry)
1635 // into the estimated space required for such.
1636 if(howMuch == 0)
1637 { howMuch = CachedFile.MAX_EMPTY_ENTRY_BYTES_ON_DISC; }
1638
1639 assert howMuch > 0;
1640
1641 // We round up the (positive) request
1642 // to a disc block multiple.
1643 howMuch = FileTools.roundUpToFSBlockSize(howMuch);
1644
1645 // Grab both locks in correct order.
1646 _getWriteLock(rwl, "MetaData.purge()", logger);
1647 try
1648 {
1649 synchronized(this)
1650 {
1651 if(!isReadWrite())
1652 { throw new IllegalStateException("Cannot purge() read-only cache"); }
1653
1654 // We don't want to spend very long on this
1655 // as we are probably blocking a user.
1656 // However, as it probably involved real disc I/O,
1657 // then we should not make this too short.
1658 final long stopBy = System.currentTimeMillis() + 1001 +
1659 Math.max(Rnd.fastRnd.nextInt(3000),
1660 3*CoreConsts.MAX_INTERACTIVE_DELAY_MS);
1661
1662 // When this gets set true
1663 // we're getting desperate, eg due to lack of time.
1664 boolean gettingDesperate = false;
1665
1666 // Zap files until we have made enough space
1667 // (or we can't make any more because there are no files).
1668 // We'll allow that the last entry might be the
1669 // exhibit that we are trying to make (more) space for
1670 // so we would have to exit the loop with (possibly us)
1671 // left and rely on other mechanisms to zap this last item.
1672 for(int itemsExamined = 0; size() > 1; ++itemsExamined)
1673 {
1674 // Recompute each time to see how far we've gotten...
1675 final long freeSpace = computeFreeSpaceBelowHighWaterMark(cacheDir);
1676
1677 // Quit immediately if we have enough space...
1678 if(freeSpace >= howMuch)
1679 { break; }
1680
1681 // We may attempt to retain thumbnail/metadata
1682 // unless running out of time.
1683 final boolean justPurgeData =
1684 TRY_TO_RETAIN_TN_IN_PURGE &&
1685 !gettingDesperate;
1686
1687 // Purge the oldest extant cache entry if we can...
1688 if(!_exhibitRemoveLRUCacheEntry(rwl,
1689 cacheDir,
1690 dontPurgeName,
1691 justPurgeData,
1692 logger, statsIDSCGEN))
1693 { logger.log("INFO: ExhibitDataSimpleCache.purge() failed to remove exhibit entry on iteration #"+(1+itemsExamined)); }
1694
1695 // We were not desperate, but maybe we are now, due to:
1696 // * Having examined all the exhibits at least once.
1697 // * Having taken too long searching for a victim.
1698 // * Other threads queueing for the cache.
1699 if(!gettingDesperate)
1700 {
1701 final int cacheSize = size();
1702 if((itemsExamined > cacheSize) ||
1703 (System.currentTimeMillis() > stopBy) ||
1704 ((itemsExamined > cacheSize/8) && rwl.hasQueuedThreads()))
1705 {
1706 // We've spent quite long enough
1707 // trying to gently free up space,
1708 // so it's time to put our big boots on...
1709 gettingDesperate = true;
1710 }
1711 }
1712 }
1713 }
1714 }
1715 finally { rwl.writeLock().unlock(); }
1716 }
1717
1718 /**If true, we NEVER ask upstream to cache for us.
1719 * If true, we try to avoid churning the master's cache
1720 * even if it has lots of space,
1721 * and we are assuming that our local caching is as good as it gets.
1722 * <p>
1723 * If false, we just ask it not to cache our precache requests
1724 * which do not reflect explicit user requests,
1725 * and assume that there will still be shared locality
1726 * between different slaves or that it might be expensive
1727 * for the master to fetch data.
1728 * <p>
1729 * Note that in either case, if data can be satisfied from the
1730 * local cache then it will be, and if we have lots of space we might
1731 * cache locally regardless.
1732 */
1733 private static final boolean NEVER_REQUEST_UPSTREAM_CACHEING = false;
1734
1735 /**Approximate maximum time to wait for another thread to extend an exhibit cache entry (ms); strictly positive. */
1736 private static final int MAX_CONC_EXT_WAIT_TIME_MS = 25001 + Rnd.fastRnd.nextInt(10101);
1737
1738 /**Reads data for an exhibit into the given buffer.
1739 * This will complain with an IOException if an
1740 * attempt is made to read beyond the bounds of the
1741 * exhibit, or if data is unavailable for any reason,
1742 * or if the read is far too long to be sensibly handled,
1743 * else the requested chunk of data is read into the
1744 * caller's buffer.
1745 * <p>
1746 * This will extend the underlying cache entry if necessary
1747 * (and possible) to satisfy the read, and may also
1748 * apply read-ahead to maximise user-perceived performance.
1749 * <p>
1750 * If the metaData is read-only, then any (portion of the) read that
1751 * cannot be satisfied from the cache is passed back up the pipeline.
1752 * <p>
1753 * This needs to access disc and so will need to
1754 * hold the rwl while it does so, grabbing
1755 * the metaData instance lock inside the rwl
1756 * where both need to be held simultaneously.
1757 * <p>
1758 * Note that we control caching to suit our precaching and any
1759 * downstream precache/random activity,
1760 * so at this level we have a 3-way choice.
1761 * <p>
1762 * This may only mark a cache entry as accessed
1763 * if dontCache is FALSE
1764 * and we actually read from or add data to the cache,
1765 * though we may mark it as updated at other times too.
1766 * <p>
1767 * Whenever we (easily) know that we can satisfy the request from cache,
1768 * we only take a read lock to improve concurrency.
1769 * <p>
1770 * We return as soon as we have satisfied some part of the request
1771 * so as minimise internal copying of data, etc.
1772 *
1773 * @param rwl the main cache lock object; must not be null
1774 * @param fetchFromPeer if non-null and we need to extend the cache
1775 * then we will try to do so from the mirror with the given tag
1776 * @param dontCache if FALSE, then cache here if possible and
1777 * ask upstream to cache;
1778 * if null then cache locally but not upstream
1779 * (this is our precaching activity);
1780 * if TRUE then don't cache here or upstream
1781 * (this is downstream precaching or random activity);
1782 * note therefore that we ask upstream to cache
1783 * unless dontCache is FALSE
1784 *
1785 * @throws java.io.IOException in case of serious problems
1786 * accessing the on-disc cache state
1787 * or if request is out of bounds of underlying exhibit
1788 * or too big to handle
1789 */
1790 void exhibitRead(final ReentrantReadWriteLock rwl,
1791 final String fetchFromPeer,
1792 final File cacheDir,
1793 final Name.ExhibitFull name,
1794 final SimpleExhibitPipelineIF upstream,
1795 final AllExhibitImmutableData aeid,
1796 final GenProps gp,
1797 final int dataStart,
1798 final ByteBuffer buf,
1799 final Boolean dontCache,
1800 final SimpleLoggerIF logger,
1801 final StatsLogger.StatsConfig statsIDSCGEN)
1802 throws IOException
1803 {
1804 assert (rwl != null) && (cacheDir != null) && (name != null);
1805 assert (upstream != null) && (aeid != null) && (gp != null);
1806 assert (buf != null);
1807
1808 // Veto out-of-bounds request.
1809 if(dataStart < 0) // Current practical limit.
1810 { throw new IllegalArgumentException(); }
1811
1812 final ExhibitStaticAttr esa = aeid.getStaticAttr(name);
1813 // Immediately deflect requests for bogus exhibits.
1814 if(esa == null)
1815 { throw new FileNotFoundException("exhibit " + name + " does not exist"); }
1816
1817 // Maximum length of request implied by buffer size.
1818 final int maxLen = buf.remaining();
1819 assert(maxLen >= 0);
1820
1821 // Compute actual implicit (max) request length; non-negative.
1822 final int len = Math.min(maxLen,
1823 (int) Math.min(MAX_USER_READ_SIZE,
1824 esa.length - dataStart));
1825 assert(dataStart + len <= esa.length);
1826
1827 // Immediately deal with trivial zero-length reads.
1828 // They don't even mark the cache entry as used.
1829 if(len == 0) { return; }
1830
1831 // Decide whether we will ask upstream to cache for us.
1832 final boolean cacheEverywhere = Boolean.FALSE.equals(dontCache);
1833 final boolean dontCacheUpstream = NEVER_REQUEST_UPSTREAM_CACHEING ||
1834 !cacheEverywhere;
1835 final boolean dontCacheLocally = Boolean.TRUE.equals(dontCache);
1836
1837 // Is this a read of the last block/byte of the exhibit?
1838 final boolean finalByteRead = (dataStart + len == esa.length);
1839
1840
1841 // FAST PATH... (read-lock only)
1842 // Optimistically try for a read lock only to achieve good concurrency,
1843 // hoping that we don't have to go upstream.
1844 // Note that we'll have to take a write lock
1845 // for at least one read/access of the data if we want
1846 // to update timestamps to maintain the LRU cache;
1847 // we use the read of the last byte(s) to do this.
1848 //
1849 // We can use this read-only if ANY of the following holds:
1850 // * we are not reading the last block/byte of the exhibit
1851 // * we don't care about cacheing the file locally anyway
1852 // * the metadata is currently read-only
1853 // * enough of the exhibit is loaded to satisfy the request
1854 // * the cache looks busy so it is worth taking this short-cut
1855 // * this same exhibit is being extended by another thread
1856 //
1857 // If any "fast-path" condition is not met,
1858 // then simply fall through to the usual handler.
1859 //
1860 // Note: if the cache looks to be busy
1861 // (ie it might take us a long time to acquire a write lock)
1862 // then we'll not bother trying to update timestamps.
1863 // Our goal is to give fast responses to users.
1864 // This should particularly help performance for small exhibits
1865 // that may be used as their own thumbnails.
1866 do
1867 {
1868 if(!finalByteRead ||
1869 dontCacheLocally ||
1870 !isReadWrite() ||
1871 rwl.hasQueuedThreads() || (rwl.getReadLockCount() > 0) || // Cache seems busy...
1872 (_beingExtended.get(esa.getExhibitFullName()) != null)) // Being extended by another thread.
1873 {
1874 // Satisfy as much of the read from the cache as possible.
1875 final int n = _readRawDataStartFromCache(rwl, name, dataStart, cacheDir, buf, logger, statsIDSCGEN);
1876 // If we read anything then return it immediately.
1877 if(n > 0) { return; }
1878 }
1879 else { break; /* No conditions met so drop out of "fast path"... */ }
1880
1881 // // Return immediately if request now completely satisfied...
1882 // if(len == 0) { return; }
1883
1884 // If another thread is (now) extending this exhibit
1885 // then we read/sleep again until they are done,
1886 // or until we have been waiting a long time.
1887 final Long extendedSince = _beingExtended.get(esa.getExhibitFullName());
1888 if(extendedSince == null) { break; }
1889
1890 final long waitTime = Math.max(0,
1891 System.currentTimeMillis() - extendedSince.longValue());
1892 if(waitTime > MAX_CONC_EXT_WAIT_TIME_MS/2) { logger.log("[ExhibitDataSimpleCache: waiting (@"+dataStart+", for "+len+"bytes) while another thread extends ("+waitTime+"ms so far) exhibit "+esa+".]"); }
1893 // Ensure that we don't block indefinitely...
1894 if(waitTime > MAX_CONC_EXT_WAIT_TIME_MS)
1895 {
1896 logger.log("[ExhibitDataSimpleCache: WARNING: waited too long for another thread to finish, so dropping though to extend data concurrently...]");
1897 break;
1898 }
1899 synchronized(extendedSince)
1900 {
1901 // Wait a little while (bearable for interactive users)
1902 // or until we are signalled that the extending is done.
1903 // Use a random wait to avoid multiple threads colliding.
1904 // Wait in approximate proportion to time already waited
1905 // to avoid wasting too much time spinning in this loop.
1906 try { extendedSince.wait(CoreConsts.MAX_INTERACTIVE_DELAY_MS/2 + Rnd.fastRnd.nextInt(((int)(waitTime/4)) + CoreConsts.MAX_INTERACTIVE_DELAY_MS)); }
1907 catch(final InterruptedException e) { throw new InterruptedIOException(e.getMessage()); }
1908 }
1909 } while(true);
1910
1911 // // Return if request now completely satisfied...
1912 // if(len == 0) { return; }
1913
1914 // Create the cache entry or update the timestamp on the extant one,
1915 // and purge enough stale data to make room for the new data.
1916 _getWriteLock(rwl, "exhibitRead()", logger); // Grab both locks in correct order.
1917 try
1918 {
1919 synchronized(this)
1920 {
1921 do
1922 {
1923 // Do we know about this exhibit?
1924 // If so mark then it as accessed too,
1925 // providing that this was not a "dontCache" request,
1926 // ie probably not from a local client.
1927 CachedFile cf = dontCacheLocally ? exhibitGetInfo(name) :
1928 exhibitMarkAsAccessed(rwl, name, logger, finalByteRead ? cacheDir : null);
1929 // If we don't, then we want to create it if possible.
1930 if(cf == null)
1931 {
1932 // If the cache is not read/write,
1933 // then prepare to duck through to upstream source.
1934 if(!isReadWrite()) // Can't alter local cache.
1935 { break; }
1936
1937 // Make space for new cache entry if necessary.
1938 purge(rwl, gp, cacheDir, 0, name, logger, statsIDSCGEN);
1939
1940 // Cache is read-write, so create a new entry.
1941 if(!exhibitCreateNewCacheEntry(rwl, cacheDir, esa, logger, statsIDSCGEN))
1942 { throw new IOException("cannot create cache entry"); }
1943 cf = exhibitGetInfo(name);
1944 assert cf != null;
1945 }
1946
1947 // Verify that the in-memory record of the cached length is correct.
1948 // Fix-up the in-memory copy if wrong.
1949 final long actualLength = CachedFile.getCachedDataLength(
1950 cacheDir, name);
1951 if(actualLength != cf.cachedLength)
1952 {
1953 final CachedFile rec =
1954 CachedFile.recoverExtantCachedFileDetails(
1955 cacheDir, name);
1956 final CachedFile fixedUp = cf.fixup(rec, logger);
1957
1958 // If we actually had to fix-up,
1959 // then replace the in-memory copy.
1960 if(fixedUp != cf)
1961 {
1962 cf = fixedUp;
1963 _update(rwl, cf, logger);
1964 }
1965 }
1966
1967 } while(false);
1968 } // Drop out of metaData lock...
1969 } // Drop cache write lock.
1970 finally { rwl.writeLock().unlock(); }
1971
1972 // // Return if request completely satisfied...
1973 // if(len == 0) { return; }
1974
1975 // Can we (now) satisfy any initial portion of the
1976 // request with data that we have in our cache?
1977 // This might be the case now because we just did a fix-up
1978 // or because the cache was extended while we were waiting
1979 // to acquire a write lock.
1980 // (This could happen with the cache read-only.)
1981 final int canDo = _readRawDataStartFromCache(rwl,
1982 name,
1983 dataStart,
1984 cacheDir,
1985 buf,
1986 logger,
1987 statsIDSCGEN);
1988 // If we managed to read from the "found" data
1989 // then we can return what we have immediately!
1990 // This avoids an upstream fetch for now
1991 // and might end up being all that is required.
1992 if(canDo > 0) { return; }
1993
1994 // As long as:
1995 // * the cache is not read-only,
1996 // * we have an entry for this file in the cache,
1997 // * the current request follows on from the cached data
1998 // (or we can extend the request backwards a little to follow on from cached data),
1999 // * we can cache locally (or there is lots of space free),
2000 // then we can fetch the next portion of the data from upstream
2001 // (NOT within the scope of any lock, ie NOT blocking other access)
2002 // and then write it into our cache (and return it to the caller too).
2003 CachedFile cf;
2004 if(isReadWrite() &&
2005 ((cf = exhibitGetInfo(name)) != null) &&
2006 (dataStart <= cf.cachedLength+(MAX_EXTD_TRANSFER_CHUNK_SIZE-MAX_TRANSFER_CHUNK_SIZE)) &&
2007 (!dontCacheLocally || lotsFree(cacheDir)))
2008 {
2009 final long fetchStart = cf.cachedLength;
2010 assert(fetchStart <= Integer.MAX_VALUE);
2011
2012 // Compute what initial portion we are prepared to cache of any one exhibit.
2013 // Limited to a smallish fraction of total cache size,
2014 // but a decent minimum chunk if no explicit local cache size has been set.
2015 final int max_cacheable_bytes_per_exhibit = _getMaximumCacheableBytesForOneExhibit(gp);
2016
2017 // We will extend the cache entry
2018 // by at most MAX_USER_READ_SIZE bytes at a time
2019 // and we will round up small residues to
2020 // MAX_TRANSFER_CHUNK_SIZE for efficiency if possible.
2021
2022 // We allow a small hole just before the current request
2023 // to be backfilled so that we can cache the data fetched.
2024
2025 // Now impose the relevant limits...
2026 // We impose the lowest of several potential limits.
2027 final long maxExtendTo =
2028 // No more than larger of a max-transfer-size block or the request length,
2029 // but capped by the maximum user transfer size,
2030 // starting from the caller's requested start.
2031 Math.min(dataStart + Math.min(Math.max(len, MAX_TRANSFER_CHUNK_SIZE),
2032 SimpleExhibitPipelineIF.MAX_USER_READ_SIZE),
2033
2034 // No more than the max-extended transfer from the current cache start.
2035 Math.min(fetchStart + MAX_EXTD_TRANSFER_CHUNK_SIZE,
2036
2037 // Limit to a valid +ve int size.
2038 Math.min(Integer.MAX_VALUE,
2039
2040 // Limit to no more than the actual exhibit length
2041 // or the maximum initial portion of any exhibit that can be cached.
2042 Math.min(esa.length,
2043 max_cacheable_bytes_per_exhibit))));
2044 // How much would we extend the cache entry by?
2045 final int extendBy = (int) (maxExtendTo - fetchStart);
2046 assert(extendBy >= 0);
2047
2048 // Fetch data from upstream to cache locally
2049 // if we have enough space for the data...
2050 // Indicate to other threads that we are extending this exhibit.
2051 final Object exhibitExtendKey = new String(esa.getCharSequence().toString()); // Create new String instance as a unique key.
2052 if(_beingExtended.put(exhibitExtendKey, new Long(System.currentTimeMillis())) != null)
2053 {
2054 logger.log("[ExhibitDataSimpleCache: WARNING: exhibit already being extended by another thread: "+esa+".]"); // Probably should not happen often.
2055 }
2056 try
2057 {
2058 // Briefly grab a cache lock and purge some old data
2059 // if we don't seem to have enough space to grow the exhibit.
2060 if((extendBy > 0) && (computeFreeSpaceBelowHighWaterMark(cacheDir) < extendBy))
2061 {
2062 _getWriteLock(rwl, "exhibitRead()-purge", logger); // Grab both locks in right order...
2063 try
2064 {
2065 synchronized(this)
2066 { purge(rwl, gp, cacheDir, extendBy, esa.getExhibitFullName(), logger, statsIDSCGEN); }
2067 }
2068 finally { rwl.writeLock().unlock(); }
2069 }
2070
2071 if((extendBy > 0) && (computeFreeSpaceBelowHighWaterMark(cacheDir) >= extendBy))
2072 {
2073 // Note raw data cache miss...
2074 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHERAWDATAMISS);
2075
2076 // Get the data from upstream
2077 // WITHOUT ANY CACHE LOCK HELD.
2078
2079 // Create a short-lived buffer (as non-direct).
2080 final ByteBuffer tmpbuf = ByteBuffer.allocate(extendBy);
2081
2082 // We may allow a direct fetch from our peers
2083 // rather than from upstream
2084 // so as to lighten the load on the master.
2085 if(ALLOW_DATA_FETCH_FROM_PEERS &&
2086 (fetchFromPeer != null))
2087 {
2088 // Create temporary tunnel to the selected peer.
2089 final String url = "http://" +
2090 LoadBalancingUtils.makeMirrorNameFromTag(fetchFromPeer) +
2091 CoreConsts.TUNNEL_URI;
2092 // Suppress logging by these short-lived tunnels...
2093 final SimpleLoggerIF fakeLogger = GenUtils.nullLogger;
2094 final ExhibitDataHTTPTunnelSource tempTunnel =
2095 new ExhibitDataHTTPTunnelSource(url, fetchFromPeer, fakeLogger);
2096 // Get the data or abort with an IOException...
2097 try
2098 {
2099 tempTunnel.getRawFile(tmpbuf,
2100 esa.getExhibitFullName(),
2101 (int) fetchStart,
2102 dontCacheUpstream);
2103 // Log the (non-aborted) fetch.
2104 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_DATAFETCHFROMPEER_PREFIX+fetchFromPeer);
2105 }
2106 finally
2107 { tempTunnel.destroy(); }
2108 }
2109 else // Fetch the data from our upstream source...
2110 { upstream.getRawFile(tmpbuf, esa.getExhibitFullName(), (int) fetchStart, dontCacheUpstream); }
2111
2112 if(tmpbuf.position() == 0)
2113 {
2114 // We failed to fetch anything...
2115 return;
2116 }
2117
2118 // Prepare to read the data fetched from upstream.
2119 tmpbuf.flip();
2120
2121 // Grab a write lock and cache the data fetched from upstream.
2122 // Re-acquire both locks in the right order.
2123 _getWriteLock(rwl, "exhibitRead()-data-save", logger);
2124 try
2125 {
2126 synchronized(this)
2127 {
2128 // Check that the cache has not changed
2129 // too radically for this fetched data to be usable.
2130 final CachedFile newEntry = exhibitGetInfo(name);
2131 if((newEntry != null) &&
2132 (newEntry.cachedLength >= fetchStart) &&
2133 (newEntry.cachedLength < fetchStart + tmpbuf.remaining()) &&
2134 isReadWrite())
2135 {
2136 cf = newEntry.extendCacheFile(cacheDir,
2137 fetchStart,
2138 tmpbuf);
2139 // Update our records.
2140 _update(rwl, cf, logger);
2141 }
2142 }
2143 }
2144 finally { rwl.writeLock().unlock(); }
2145
2146 // Now read what we can from the extended on-disc cache.
2147 // We do this under the read lock.
2148 _readRawDataStartFromCache(rwl,
2149 name,
2150 dataStart,
2151 cacheDir,
2152 buf,
2153 logger,
2154 statsIDSCGEN);
2155
2156 // Return with whatever we got (if anything).
2157 return;
2158 }
2159 }
2160 finally
2161 {
2162 synchronized(exhibitExtendKey)
2163 {
2164 // Wake up at any thread waiting for us to finish
2165 // extending this exhibit.
2166 // If this notification is lost then any waiting threads
2167 // will soon wake/continue anyway.
2168 exhibitExtendKey.notifyAll();
2169
2170 // Allow someone else to extend this exhibit...
2171 _beingExtended.remove(exhibitExtendKey);
2172 }
2173 }
2174 }
2175
2176 // // Return if request completely satisfied...
2177 // if(len == 0) { return; }
2178
2179 // Fall-through catch-all case:
2180 // If we have not managed to satisfy the request,
2181 // then note a raw-data cache miss,
2182 // and out of the scope of any lock,
2183 // directly satisfy the request from upstream.
2184 //
2185 // This should only happen for the tail of huge exhibits,
2186 // or if the cache is read-only and we can't extend/expand it,
2187 // or if requests have been made in a non-sequential order from 0,
2188 // or when a client needs to (re)fetch the tail of something we now don't have cached.
2189 // This is more likely to happen on the master than on slaves.
2190 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHERAWDATAMISS);
2191 //if(cacheEverywhere && isReadWrite()) { logger.log("[ExhibitDataCache: WARNING: having to go upstream to complete request: dataStart/len/name=" + dataStart+"/"+len+"/"+name+"]"); }
2192 assert(buf.remaining() == maxLen); // No data read yet...
2193 upstream.getRawFile(buf, name, dataStart, dontCacheUpstream);
2194 assert(buf.remaining() <= maxLen); // Maybe we have fetched some data...
2195 }
2196
2197 /**Fill as much as possible of the read request from disc cache, returning the number of bytes read; non-negative.
2198 * This is done grabbing a read lock on the entire cache,
2199 * but no exclusive lock so that we can do multiple concurrent reads,
2200 * ie so that we can queue concurrent reads at the OS/disc level
2201 * for maximum throughput.
2202 * <p>
2203 * Likely to be heavily used, so efficiency is important here.
2204 */
2205 private int _readRawDataStartFromCache(final ReentrantReadWriteLock rwl,
2206 final Name.ExhibitFull name,
2207 final int dataStart,
2208 final File cacheDir,
2209 final ByteBuffer buf,
2210 final SimpleLoggerIF logger,
2211 final StatsLogger.StatsConfig statsIDSCGEN)
2212 throws IOException
2213 {
2214 assert(dataStart >= 0);
2215
2216 _getReadLock(rwl, "_readRawDataStartFromCache()", logger);
2217 try
2218 {
2219 // With cache locked read-only with main rwl lock
2220 // then we don't need the MetaData instance lock,
2221 // and not taking this allows concurrent disc reads.
2222 final CachedFile cf = exhibitGetInfo(name);
2223
2224 // Can we satisfy some of the request from cache?
2225 // This can happen with the cache read-only.
2226 if((cf != null) && (dataStart < cf.cachedLength))
2227 {
2228 //System.out.println("Satisfying leading part of exhibitRead() from cache r/o ["+dataStart+","+canDo+"] for " + name);
2229 // Note raw data cache hit...
2230 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHERAWDATAHIT);
2231 // Do the read as fast as possible; no timestamps...
2232 final int posBefore = buf.position();
2233 cf.getCachedData(cacheDir, dataStart, buf, true);
2234 return(buf.position() - posBefore); // # bytes read.
2235 }
2236 }
2237 finally { rwl.readLock().unlock(); }
2238 return(0); // Couldn't read anything.
2239 }
2240
2241 /**Thread-safe Map of which exhibits we are currently extending and when we started extending.
2242 * Used to help reduce the probability of redundant concurrent fetches
2243 * of the same data by multiple threads.
2244 * <p>
2245 * The key is a unique (new) String of the full exhibit name.
2246 * <p>
2247 * We notify on this key object instance when removing an entry.
2248 */
2249 private transient /*final*/ Map<Object, Long> _beingExtended =
2250 new Hashtable<Object, Long>();
2251
2252
2253 /**Get the cached thumbnails for an exhibit, or null if none cached.
2254 * This needs to access disc and so will need to
2255 * hold the main cache while it does so, grabbing
2256 * the metaData instance lock inside the main lock
2257 * where both need to be held simultaneously.
2258 * <p>
2259 * If a problem is encountered deserialising thumbnails,
2260 * the cache is not-read-only, and AUTO_REPAIR_DURING_IO is true,
2261 * we may zap the offending serialised file to try
2262 * to recreate it later.
2263 * <p>
2264 * We will need to hold a write lock if updating timestamps
2265 * when accessing the thumbnails; this may greatly reduce concurrency.
2266 *
2267 * @param rwl the main cache lock object; must not be null
2268 *
2269 * @throws java.io.IOException in case of serious problems
2270 * accessing the on-disc cache state
2271 */
2272 ExhibitThumbnails exhibitGetThumbnails(final ReentrantReadWriteLock rwl,
2273 final File cacheDir,
2274 final Name.ExhibitFull name,
2275 final SimpleLoggerIF logger)
2276 throws IOException
2277 {
2278 //System.out.println("MetaData.exhibitGetThumbnails(): requested to get thumbnail for: " + name);
2279
2280 // Start by grabbing the right flavour of main cache lock.
2281 // We don't need a lock on this metadata instance,
2282 // and not taking one allows concurrent disc reads.
2283 if(THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP) { _getWriteLock(rwl, "exhibitGetThumbnails()", logger); }
2284 else { _getReadLock(rwl, "exhibitGetThumbnails()", logger); }
2285 try
2286 {
2287 // Do we know about this exhibit?
2288 // Also mark exhibit entry as accessed
2289 // if that is our policy.
2290 final CachedFile cf = THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP ?
2291 exhibitMarkAsAccessed(rwl, name, logger, cacheDir) : exhibitGetInfo(name);
2292
2293 if((cf == null) || (cf.tnBytes == 0))
2294 { return(null); } // None cached.
2295
2296 // OK, try to get extant thumbnail,
2297 return(cf.getThumbnails(cacheDir));
2298 }
2299 finally
2300 {
2301 if(THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP) { rwl.writeLock().unlock(); }
2302 else { rwl.readLock().unlock(); }
2303 }
2304 }
2305
2306 /**Saves the thumbnails for an exhibit.
2307 * Returns true if successful, false if not.
2308 * <p>
2309 * This will fail if the metaData is read only.
2310 * <p>
2311 * If the thumbnails argument is null, this will purge
2312 * any extant thumbnails for this exhibit.
2313 * <p>
2314 * This will create a new zero-length exhibit entry if
2315 * necessary.
2316 * <p>
2317 * Since these failures will be by returning false
2318 * rather than by causing an exception then this
2319 * can be used as a gentle unconditional way to make sure that
2320 * an entry does exist whether or not one did before.
2321 * <p>
2322 * This needs to access disc and so will need to
2323 * hold the main lock while it does so, grabbing
2324 * the metaData instance lock inside the main lock
2325 * where both need to be held simultaneously.
2326 * <p>
2327 * Note that this will have to grab a write lock.
2328 *
2329 * @param rwl the main lock object; must not be null
2330 * @param esa the basic info on the exhibit to have an entry created
2331 *
2332 * @throws java.io.IOException in case of serious problems
2333 * accessing the on-disc cache state
2334 */
2335 boolean exhibitSaveThumbnails(final ReentrantReadWriteLock rwl,
2336 final GenProps gp,
2337 final File cacheDir,
2338 final ExhibitStaticAttr esa,
2339 final ExhibitThumbnails tns,
2340 final SimpleLoggerIF logger,
2341 final StatsLogger.StatsConfig statsIDSCGEN)
2342 throws IOException
2343 {
2344 assert (rwl != null) && (gp != null) && (cacheDir != null) && (esa != null);
2345
2346 // Fail gently if read-only.
2347 if(!isReadWrite()) { return(false); }
2348
2349 if(TRACE_THUMBNAIL_ACTIVITY && ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) { (new Throwable("asked to save NO_THUMBNAILS")).printStackTrace(); }
2350 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("MetaData.exhibitSaveThumbnails(): requested to "+(tns==null?"remove":(ExhibitThumbnails.NO_THUMBNAILS.equals(tns)?"save NO_THUMBNAILS":"save"))+" thumbnails for: " + esa.getCharSequence() + ": "+tns); }
2351
2352 // Grab both locks in correct order.
2353 _getWriteLock(rwl, "exhibitSaveThumbnails()", logger);
2354 try
2355 {
2356 synchronized(this)
2357 {
2358 // Do we know about this exhibit?
2359 CachedFile cf = exhibitGetInfo(esa.getExhibitFullName());
2360
2361 if(cf == null)
2362 {
2363 // Apparently would be a new exhibit...
2364
2365 // If asked to purge thumbnails, nothing to do!
2366 if(tns == null) { return(true); } // Easy!
2367
2368 // Make space for new cache entry...
2369 purge(rwl,
2370 gp,
2371 cacheDir,
2372 0,
2373 esa.getExhibitFullName(),
2374 logger, statsIDSCGEN);
2375
2376 // If asked to save thumbnails
2377 // then we have to make a new cache entry.
2378 if(!exhibitCreateNewCacheEntry(rwl,
2379 cacheDir,
2380 esa,
2381 logger,
2382 statsIDSCGEN))
2383 { return(false); } // Could not make new entry.
2384 }
2385
2386 // Mark the cache entry as accessed
2387 // (since we want to avoid discarding what we just saved!)
2388 cf = exhibitMarkAsAccessed(rwl, esa.getExhibitFullName(), logger, THUMBNAIL_ACCESS_UPDATES_ACCESS_TIMESTAMP ? cacheDir : null);
2389 assert(cf != null);
2390
2391 // If zapping thumbnails,
2392 // or if they already exist,
2393 // zap them now.
2394 if((tns == null) || cf.hasThumbnails())
2395 { cf = cf.zapThumbnails(cacheDir); }
2396
2397 // If saving new thumbnails...
2398 if(tns != null)
2399 {
2400 // Make space for new thumbnails,
2401 // using worst-case size estimate.
2402 purge(rwl,
2403 gp,
2404 cacheDir,
2405 Math.max(1, ExhibitThumbnails.MAX_BYTES_EST),
2406 esa.getExhibitFullName(),
2407 logger, statsIDSCGEN);
2408
2409 // Now actually do the save.
2410 cf = cf.saveThumbnails(cacheDir, tns);
2411 }
2412
2413 // Save updated metadata...
2414 _update(rwl, cf, logger);
2415
2416 return(true); // Done!
2417 }
2418 }
2419 finally { rwl.writeLock().unlock(); }
2420 }
2421
2422
2423
2424
2425 /**My serial ID. */
2426 private static final long serialVersionUID = -7596195262594968694L;
2427
2428 /**Write out a less-redundant form of our internal information.
2429 * Prevent (lazy-evaluation-caused) state changes while serialising
2430 * by being synchronized.
2431 * <p>
2432 * The more-efficient on-the-wire format also makes defensive
2433 * reading easier.
2434 */
2435 private synchronized void writeObject(final ObjectOutputStream oos)
2436 throws IOException
2437 {
2438 // Write the fields that we are not trying to optimise.
2439 // Note that this includes our length field.
2440 oos.defaultWriteObject();
2441
2442 // Send the primary map as a count and then the CachedFile objects
2443 // in name-sorted order to attempt to enhance compression
2444 // (by improving locality, with similar items close to one another).
2445 final int size = cachedExhibits.size();
2446 final CachedFile outValues[] = new CachedFile[size];
2447 cachedExhibits.values().toArray(outValues);
2448 Arrays.sort(outValues, (new Comparator<CachedFile>(){
2449 public final int compare(final CachedFile o1, final CachedFile o2)
2450 { return(TextUtils.compare(o1.name, o2.name)); }
2451 }));
2452 oos.writeInt(size);
2453 for(int i = size; --i >= 0; )
2454 { oos.writeObject(outValues[i]); }
2455 }
2456
2457 /**Deserialise. */
2458 private synchronized void readObject(final ObjectInputStream in)
2459 throws IOException, ClassNotFoundException
2460 {
2461 in.defaultReadObject();
2462
2463 // Defensively (and efficiently) read our primary map.
2464 final int size = in.readInt();
2465 if(size < 0)
2466 { throw new InvalidObjectException("bad stream: cachedExhibits.size() < 0"); }
2467 // Make suitably-sized new map.
2468 cachedExhibits = new Hashtable<Name.ExhibitFull, CachedFile>(size * 2 + 1);
2469 // Read in entries from stream.
2470 for(int i = size; --i >= 0; )
2471 {
2472 final CachedFile cf = (CachedFile) in.readObject();
2473 cachedExhibits.put(cf.name, cf);
2474 }
2475 assert(size == cachedExhibits.size()); // Should be no duplicates.
2476
2477 // Update derived data.
2478 _recomputeDerivedValues();
2479
2480 // Nothing currently being extended.
2481 _beingExtended = new Hashtable<Object, Long>();
2482
2483 validateObject(); // Validate state immediately.
2484 }
2485
2486 /**Validate fields/state.
2487 * Called in the constructor and possibly after de-serialising.
2488 * <p>
2489 * Barf if something bad is found.
2490 * (Maybe allow some extra info in debug version.)
2491 * <p>
2492 * Assumes _recomputeDerivedValues() has been called
2493 * before this is called since deserialisation or construction.
2494 */
2495 public synchronized void validateObject()
2496 throws InvalidObjectException
2497 {
2498 // Check that all components are sane and safe.
2499
2500 if(_beingExtended == null)
2501 { throw new InvalidObjectException("bad object: _beingExtended == null"); }
2502
2503 // cachedExhibits must be a Map from String to CachedFile.
2504 if(cachedExhibits == null)
2505 { throw new InvalidObjectException("bad object: cachedExhibits == null"); }
2506 // Check that the Map entries are correct and consistent.
2507 for(final Name.ExhibitFull name : cachedExhibits.keySet())
2508 {
2509 // if(!ExhibitName.validNameSyntax(name))
2510 // { throw new InvalidObjectException("bad object: cachedExhibits has invalid name as key"); }
2511 final CachedFile cf = cachedExhibits.get(name);
2512 if((cf == null) || !name.equals(cf.name))
2513 { throw new InvalidObjectException("bad object: cachedExhibits has invalid value"); }
2514 }
2515
2516 // Do some simple validation of derived (transient) values.
2517 if(totalBytes < 0)
2518 { throw new InvalidObjectException("bad object: totalBytes < 0"); }
2519 if(totalBytesHighWaterMark < 0)
2520 { throw new InvalidObjectException("bad object: totalBytesHighWaterMark < 0"); }
2521 if((exhibitsLRU == null) || (exhibitsLRU.size() != cachedExhibits.size()))
2522 { throw new InvalidObjectException("bad object: exhibitsLRU is invalid"); }
2523 }
2524
2525 /**Cache of estimate of full-cached exhibits; non-negative.
2526 * Private to getFullyCachedCount().
2527 * <p>
2528 * Marked volatile to allow thread-safe lock-free access.
2529 * <p>
2530 * Initially zero to force recomputation.
2531 * <p>
2532 * Not serialised.
2533 */
2534 private transient volatile int _fullyCachedCount;
2535
2536 /**Return estimated count of known-fully-cached exhibits; non-negative.
2537 * It would be wise to force recomputation when a new AEP is loaded,
2538 * or after a precacheing round is complete,
2539 * or after any other major change in cache status.
2540 * <p>
2541 * Note that since the cached count is not serialised with the metadata
2542 * then it will need to be recomputed when metadata is loaded/deserialised.
2543 * <p>
2544 * Even without a recomputation being forced,
2545 * we may decide to do one if it seems that the value might be stale.
2546 * <p>
2547 * This will take time O(n) for an n-exhibit AEP to (re)compute its result,
2548 * but does minimal or no locking, and none for the duration of the method,
2549 * so is safe to launch in a background thread for example.
2550 * <p>
2551 * May be expensive (continually recomputing) if/while no exhibit
2552 * is fully locally cached, but this is assumed to be unlikely in practice.
2553 *
2554 * @param aep if non-null, allow recomputation against this AEP
2555 * @param force if true, force immediate recomputation against this AEP if non-null
2556 *
2557 * @return the estimate of exhibits whose data is entirely cached locally;
2558 * the count does not take account of locally-recreatable data
2559 * such as thumbnails,
2560 * and may be too low or too high
2561 */
2562 int getFullyCachedCount(final AllExhibitProperties aep,
2563 final boolean force)
2564 {
2565 final int fCC = _fullyCachedCount;
2566
2567 // Recompute if an AEP is available (non-null) and:
2568 // * the 'force' parameter is true, OR
2569 // * the cached value is zero or clearly invalid/stale.
2570 if(aep != null)
2571 {
2572 if(force || (fCC < 1) || (fCC > aep.aeid.length))
2573 {
2574 int newFCC = 0;
2575 for(final Name.ExhibitFull name : aep.aeid.getAllExhibitNamesSorted())
2576 {
2577 final CachedFile info = this.exhibitGetInfo(name);
2578 if(info == null) { continue; }
2579 final ExhibitStaticAttr staticAttr = aep.aeid.getStaticAttr(name);
2580 if(staticAttr == null) { continue; }
2581 if(info.cachedLength != staticAttr.length) { continue; }
2582 ++newFCC;
2583 }
2584 // Save the newly-computed value.
2585 _fullyCachedCount = newFCC;
2586 // Return the new value.
2587 if(IsDebug.isDebug) { System.out.println("INFO: fully-cached exhibits: " + newFCC); }
2588 return(newFCC);
2589 }
2590 }
2591
2592 // Return the cached answer.
2593 return(fCC);
2594 }
2595 }
2596
2597 /**In-memory copy of whole-cache meta-data; never null.
2598 * Note that the read/write status may change at any time.
2599 * <p>
2600 * The instance is never replaced; the state is replaced
2601 * in-situ if need be to ensure that we never have two
2602 * instances of this that believe they control the disc cache.
2603 */
2604 private final MetaData metaData;
2605
2606 /**Return directly the number of partly- or fully- cached exhibits; never negative.
2607 * This may be more than the number of exhibits,
2608 * for example before deleted/renamed exhibits are removed.
2609 */
2610 public int getLiveCachedExhibitCount()
2611 {
2612 return(metaData.size());
2613 }
2614
2615
2616 /**Get the an instance copy of this class; may be a singleton.
2617 * If operating as a singleton then this
2618 * creates an instance on the first call; all subsequent requests/calls
2619 * are vetoed (at least in this servlet context and thus namespace)
2620 * unless the cacheDir matches that for the extant instance
2621 * in which case the new dataSource is ignored and the extant cache
2622 * instance is returned.
2623 * <p>
2624 * If the upstream source is an ExhibitDataFileSource
2625 * then this instance may assume that data access from the ExhibitDataFileSource
2626 * is only slightly more expensive than accessing its own local cache
2627 * (accessing the file source may involve powering-up bulk storage).
2628 * This will typically be the case on the master for example.
2629 *
2630 * @throws java.lang.IllegalStateException if this is a singleton,
2631 * and a request to create with a different cache dir to an
2632 * extant instance is made
2633 *
2634 * @throws IOException if cache directory does not exist
2635 * and/or cannot be created
2636 * (the containing directory passed in must always exist)
2637 */
2638 public static ExhibitDataSimpleCache cacheFactory(
2639 final SimpleExhibitPipelineIF dataSource,
2640 final File cacheDir,
2641 final SimpleLoggerIF logger)
2642 throws IOException,
2643 IllegalStateException
2644 { return(new ExhibitDataSimpleCache(dataSource, cacheDir, logger)); }
2645
2646
2647 /**Remove a persistent cache.
2648 * Pass in the same _cacheDir as for a call to cacheFactory(),
2649 * but this must not be called if a cache instance is using the cache.
2650 * <p>
2651 * Will not remove entries in cacheDir unrelated to the cache.
2652 * <p>
2653 * Do not use this lightly; it may discard gigabytes of useful state.
2654 *
2655 * @throws IOException in case of I/O error
2656 */
2657 public static void rmCache(final File cacheDir)
2658 throws IOException
2659 {
2660 if(cacheDir == null)
2661 { throw new IllegalArgumentException(); }
2662
2663 // Potentially very expensive/significant event,
2664 // so always report it.
2665 System.out.println("ExhibitDataSimpleCache: removing cache below: " + cacheDir);
2666
2667 final File baseDir = new File(cacheDir, CACHE_BASE_DIR);
2668 FileTools.rmRecursively(baseDir);
2669 }
2670
2671
2672
2673 /**The upstream data source; never null. */
2674 private final SimpleExhibitPipelineIF source;
2675
2676 /**The cache dir, else null.
2677 * If this is not a valid dir at class creation time we ensure
2678 * that we save a null here.
2679 */
2680 private final File cacheDir;
2681
2682 /**The read/write lock for the whole cache except system variables; never null.
2683 * Any access that may update the cache state in memory or on disc
2684 * must be protected by a write lock.
2685 * <p>
2686 * Any access that may just read the cache need only have a read lock.
2687 * <p>
2688 * Most accesses may have to start by taking a write lock
2689 * (for example because they may have to fetch data from upstream
2690 * and insert it into the cache)
2691 * but can downgrade it to a read lock as soon as they know that they
2692 * will not be altering the cache at all or any further.
2693 * <p>
2694 * The main exception is any state component held in volatile fields.
2695 * <p>
2696 * Note that the variable store is internally thread-safe and does
2697 * not require protection by this lock.
2698 * <p>
2699 * Ideally we want performance (ie best throughput) rather than fairness,
2700 * but starvation of some users is not good.
2701 * <p>
2702 * We generally do not want lock attempts to block forever,
2703 * which means that we give up attempting to obtain a lock after
2704 * a given number of attempts (with a maximum time per attempt).
2705 * While this is intended to prevent to limit delays in the face of I/O problems,
2706 * this <em>may</em> rescue us from logic errors in extremis.
2707 */
2708 private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock(true);
2709
2710 /**Maximum number of consecutive attempts to obtain lock for read or write; strictly positive. */
2711 private static final int MAX_LOCK_ATTEMPTS = 10;
2712
2713 /**Stack offset for _getXXXXLock() to find caller's stack frame. */
2714 private static final int _gXL_offset = 3;
2715
2716 /**Get the cache write lock, complaining/aborting if we have to wait for a long time.
2717 * @param rwl cache lock; never null
2718 * @throws InterruptedIOException if the thread is interrupted or locking is aborted
2719 */
2720 private static void _getWriteLock(final ReentrantReadWriteLock rwl,
2721 final String detail,
2722 final SimpleLoggerIF logger)
2723 throws InterruptedIOException
2724 {
2725 int lockAttemptsLeft = MAX_LOCK_ATTEMPTS;
2726 try
2727 {
2728 while(!rwl.writeLock().tryLock(30, TimeUnit.SECONDS))
2729 {
2730 final StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
2731 final String caller =
2732 ((stackTrace == null) || (stackTrace.length <= _gXL_offset) || (stackTrace[_gXL_offset] == null)) ? "[no trace]" :
2733 (stackTrace[_gXL_offset].toString());
2734 logger.log("[ExhibitDataSimpleCache: waiting to obtain write lock: "+rwl.writeLock()+" "+detail+": "+caller+" isWriteLocked()="+rwl.isWriteLocked()+" getReadLockCount()="+rwl.getReadLockCount()+"]");
2735 if(--lockAttemptsLeft < 0) { throw new InterruptedIOException("too write many lock attempts"); }
2736 }
2737 }
2738 catch(final InterruptedException x)
2739 {
2740 x.printStackTrace();
2741 throw new InterruptedIOException("ExhibitDataSimpleCache: interrupted while waiting to obtain write lock..." +detail+": " + x.getMessage());
2742 }
2743 }
2744
2745 /**Get a cache read lock, complaining/aborting if we have to wait for a long time.
2746 * We complain sooner waiting for a read lock rather than the write lock,
2747 * since read locks are expected to be easier/quicker to obtain.
2748 *
2749 * @param rwl cache lock; never null
2750 * @throws InterruptedIOException if the thread is interrupted or locking is aborted
2751 */
2752 private static void _getReadLock(final ReentrantReadWriteLock rwl,
2753 final String detail,
2754 final SimpleLoggerIF logger)
2755 throws InterruptedIOException
2756 {
2757 int lockAttemptsLeft = MAX_LOCK_ATTEMPTS;
2758 try
2759 {
2760 while(!rwl.readLock().tryLock(11, TimeUnit.SECONDS))
2761 {
2762 final StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
2763 final String caller =
2764 ((stackTrace == null) || (stackTrace.length <= _gXL_offset) || (stackTrace[_gXL_offset] == null)) ? "[no trace]" :
2765 (stackTrace[_gXL_offset].toString());
2766 logger.log("[ExhibitDataSimpleCache: waiting to obtain read lock: "+rwl.writeLock()+" "+detail+": "+caller+"]");
2767 if(--lockAttemptsLeft < 0) { throw new InterruptedIOException("too many read lock attempts"); }
2768 }
2769 }
2770 catch(final InterruptedException x)
2771 {
2772 x.printStackTrace();
2773 throw new InterruptedIOException("ExhibitDataSimpleCache: interrupted while waiting to obtain read lock..." +detail+": " + x.getMessage());
2774 }
2775 }
2776
2777
2778 /**Wrap a new cache instance around a data source.
2779 * This is private so that we can enforce a singleton pattern
2780 * and avoid multiple simultaneous users of the underlying
2781 * file-based cache.
2782 * <p>
2783 * We try to load the cache meta-data and exhibit properties
2784 * from persisted copies. We can survive without the exhibit
2785 * properties, but if we can't load our meta data the default
2786 * value we use is read-only so that we don't trust it until
2787 * it's been checked against disc, presumably in the background.
2788 *
2789 * @throws IOException if cache directory does not exist
2790 * and/or cannot be created
2791 * (the containing directory passed in must always exist)
2792 */
2793 private ExhibitDataSimpleCache(final SimpleExhibitPipelineIF dataSource,
2794 final File cacheDir,
2795 final SimpleLoggerIF logger)
2796 throws IOException
2797 {
2798 if(dataSource == null)
2799 { throw new IllegalArgumentException("null source"); }
2800 if(cacheDir == null)
2801 { throw new IllegalArgumentException("null cacheDir"); }
2802 if(logger == null)
2803 { throw new IllegalArgumentException("null logger"); }
2804
2805 final long startTime = System.currentTimeMillis();
2806
2807 source = dataSource;
2808
2809 this.logger = logger;
2810 statsIDSCGEN =
2811 new StatsLogger.StatsConfig("SIMPLECACHE-GENERAL",
2812 logger,
2813 false, // Only dump summaries...
2814 8 * 3600, // About every 8 hours.
2815 true); // Adaptive.
2816
2817 // Set up our variable manager.
2818 varMgr = new PipelineVarMgr(dataSource, false);
2819 if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [1] "+(System.currentTimeMillis()-startTime)+".]"); }
2820
2821 // Use the cache dir only if it looks reasonably plausible.
2822 final String message1 = "[ExhibitDataSimpleCache: cache dir = " + cacheDir + ", max cache size = " + TextUtils.sizeAsText(LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES(), true) + "]";
2823 logger.log(message1);
2824 if(IsDebug.isDebug) { System.out.println(message1); }
2825 if(!cacheDir.isDirectory())
2826 { throw new IOException("missing cache directory " + cacheDir); }
2827 this.cacheDir = cacheDir;
2828
2829 if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [2] "+(System.currentTimeMillis()-startTime)+".]"); }
2830
2831 // If things are still looking rosy then try to load the
2832 // exhibit properties and cache meta-data immediately...
2833 // We quietly give up if we have any problems with loading,
2834 // eg due to a corrupt file or a changed class definition.
2835
2836 // Attempt to make our base cache directory if necessary.
2837 final File baseDir = new File(this.cacheDir, CACHE_BASE_DIR);
2838 if(!baseDir.isDirectory()) { baseDir.mkdirs(); }
2839 if(!baseDir.isDirectory())
2840 { throw new IOException("cannot make cache data store directory: " + baseDir); }
2841
2842
2843 // Load any persisted/cached event history data that we can find
2844 // *iff* our upstream source is not already 'local' and thus fast to access
2845 // which would make any cacheing here redundant.
2846 // Any such cached state may be too stale to be usable,
2847 // but if so then that will be dealt with automatically.
2848 // Absorb but report any errors encountered...
2849 // We may have to make the history dir, eg if this is the first run.
2850 if(!upstreamSourceIsLocal())
2851 {
2852 final File evhd = new File(cacheDir, EVENT_HISTORY_DIR);
2853 if(!evhd.isDirectory()) { evhd.mkdirs(); }
2854 // We need this done before we can do much else...
2855 varMgr.loadEventHistories(evhd, true);
2856 }
2857 if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [3] "+(System.currentTimeMillis()-startTime)+".]"); }
2858
2859
2860 // Initialise cache metadata state.
2861 MetaData md = new MetaData(); // Read-only empty cache info.
2862 // Try to load cached meta-data...
2863 try {
2864 final File f = new File(this.cacheDir, CACHE_METADATA_FILENAME);
2865 if(f.isFile() && f.canRead())
2866 {
2867 final MetaData tmpMd = (MetaData) FileTools.deserialiseFromFile(f, STORE_EXPROPS_GZIPED);
2868 if(tmpMd != null)
2869 {
2870 md = tmpMd;
2871 logger.log("[ExhibitDataSimpleCache: loaded metaData, exhibit count: "+tmpMd.size()+".]");
2872 }
2873 }
2874 }
2875 catch(final Exception e) { e.printStackTrace(); } // Complain!
2876 metaData = md; // Capture cache meta-data.
2877 if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [4] "+(System.currentTimeMillis()-startTime)+".]"); }
2878
2879
2880 // Try to load cached exhibit properties ASAP
2881 // (after everything else is set up and ready).
2882 // This may take significant time so is worth doing asynchronously.
2883 // We allow this to continue asynchronously after the constructor returns
2884 // to overlap with other work,
2885 // and we attempt to exclude polling upstream for a new AEP until it is finished.
2886 // We assume that this is significantly CPU-bound.
2887 // FIXME : exposes this not-completely-constructed instance to the pool thread, so move out of the thread to the factory method.
2888 ThreadUtils.computeIntensiveThreadPool.submit(new Runnable(){
2889 public final void run()
2890 {
2891 try {
2892 final File f = new File(cacheDir, CACHE_EXPROPS_FILENAME);
2893 if(f.isFile() && f.canRead())
2894 {
2895 logger.log("[ExhibitDataSimpleCache: reloading cached AEP from disc...]");
2896 _gAEP_lock.lock();
2897 try
2898 {
2899 final AllExhibitProperties aep =
2900 (AllExhibitProperties) FileTools.deserialiseFromFile(f, STORE_EXPROPS_GZIPED);
2901 if(aep != null)
2902 {
2903 // Now cache in memory.
2904 _getAllExhibitProperties_postUpdate(aep, true);
2905 logger.log("[ExhibitDataSimpleCache: loaded AEP, exhibit count: "+aep.aeid.size()+".]");
2906 }
2907 }
2908 finally { _gAEP_lock.unlock(); }
2909 }
2910 }
2911 catch(final Exception e) { e.printStackTrace(); } // Complain!
2912 }
2913 });
2914 if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons [5] "+(System.currentTimeMillis()-startTime)+".]"); }
2915
2916
2917 // If we loaded some exhibit properties but no metadata,
2918 // then this may indicate a nasty problem...
2919 // We may not be able to tell if the AEP has not yet completed loading,
2920 // so we do this test as late as possible.
2921 if(md.isEmpty() && (_AEP.aeid.size() > 0))
2922 {
2923 logger.log("WARNING: ExhibitDataSimpleCache: NOT loaded metaData.");
2924 // Leave metadata read-only, and check it ASAP...
2925 }
2926 // Else, if we being optimistic, mark meta-data read/write immediately.
2927 else if(ASSUME_LOADED_METADATA_OK)
2928 {
2929 md.setReadWrite(true);
2930 // Put off first metadata check until after warm-up as all is probably OK,
2931 // and there's no point doing this for a short-lived process anyway.
2932 _checkMetaData_notBefore = System.currentTimeMillis() +
2933 (15 * 60 * 1000) + Rnd.fastRnd.nextInt(30 * 60 * 1000);
2934 }
2935
2936 final long endTime = System.currentTimeMillis();
2937 if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache: cons time "+(endTime-startTime)+".]"); }
2938
2939 // A little logging of cache/filesystem state.
2940 final long fsSpaceUsable = FileTools.estimatedFreeSpaceBelowReserve(cacheDir, MetaData.MIN_FS_PERCENT_FREE);
2941 logger.log("INFO: ExhibitDataSimpleCache: cache filesystem ["+cacheDir+"] usable space (bytes): " + fsSpaceUsable);
2942 if(IsDebug.isDebug) { System.out.println("ExhibitDataSimpleCache: cache filesystem ["+cacheDir+"] usable space (bytes): " + fsSpaceUsable); }
2943 final long freeSpace = md.computeFreeSpaceBelowHighWaterMark(cacheDir);
2944 if(freeSpace < 0) { logger.log("WARNING: ExhibitDataSimpleCache: cache (over)full (bytes): " + (-freeSpace)); }
2945 else { logger.log("INFO: ExhibitDataSimpleCache: free space in cache (bytes): " + freeSpace); }
2946 if(freeSpace <= 0) { System.err.println("WARNING: ExhibitDataSimpleCache: cache full: " + freeSpace); }
2947 if(IsDebug.isDebug) { System.out.println("ExhibitDataSimpleCache: cache space free (bytes): " + freeSpace); }
2948 logger.log("INFO: ExhibitDataSimpleCache: estimated cache size (bytes): " + md.getTotalBytesCurrentlyUsedByCache());
2949 if(IsDebug.isDebug) { System.out.println("ExhibitDataSimpleCache: estimated cache size (bytes): " + md.getTotalBytesCurrentlyUsedByCache()); }
2950 }
2951
2952 /**An AllExhibitProperties.ExhibitDataSource wrapping ourselves; never null. */
2953 private final AllExhibitProperties.ExhibitDataSource exhibitDataSource =
2954 (new AllExhibitProperties.ExhibitDataSource(){
2955 @Override public final void getRawFile(final ByteBuffer buf, final ExhibitFull exhibitName, final int position)
2956 throws IOException
2957 { ExhibitDataSimpleCache.this.getRawFile(buf, exhibitName, position, false); }
2958 /**Fully loaded if all in cache.
2959 * False if exhibit not present or not full length.
2960 */
2961 @Override public final boolean isExhibitFullyLoaded(final ExhibitStaticAttr esa)
2962 //throws IOException
2963 {
2964 assert(ExhibitDataSimpleCache.this != null);
2965 assert(metaData != null);
2966 return(metaData.exhibitIsFullyLoaded(esa));
2967 }
2968 });
2969
2970
2971 /**Maximum number of threads that may run in _asyncTNFetch() and other local discardable data read-ahead tasks; strictly positive.
2972 * We limit the amount of threading by:
2973 * <ul>
2974 * <li>The number of available CPUs (as sometimes this work may be CPU intensive).
2975 * <li>The likely limit on back-end connectivity (eg HTTP-connection-count limited)
2976 * (given that this cache will often by upstream of an HTTP tunnel).
2977 * <li>Likely limits on local and upstream disc throughput.
2978 * <li>The likely strain on other resources, such as memory.
2979 * </ul>
2980 * This limit/count/cap should generally be >1 since the work is mainly I/O bound
2981 * and may be subject to significant latency,
2982 * but should generally be not much more than (say)
2983 * half the maximum simultaneous outbound tunnel HTTP connection count
2984 * since overuse of concurrency for such connections may be vetoed anyway.
2985 */
2986 private static final int MAX_THREADS_aTWQ = 2; // Math.max(2, Math.min((ExhibitDataHTTPTunnelSource.MAX_CONCURRENT_CONNECTIONS/2), ThreadUtils.AVAILABLE_PROCESSORS));
2987
2988 /**Maximum number of async thumbnail fetches to queue; strictly positive.
2989 * Enough to allow all of (say) one of the 'new' or 'best' pages' thumbnails to be queued.
2990 */
2991 private static final int MAX_QUEUED_TN_FETCHES = 128;
2992
2993 /**Shared thread pool for I/O-bound activities (for thumbnail fetching).
2994 * Suitable for mainly-I/O-bound threads, thus we have a fixed thread limit.
2995 * This ceiling also protects upstream servers from excess load.
2996 * <p>
2997 * A limited amount of work can be queued,
2998 * but excess is handled by discarding the oldest queued items silently.
2999 * <p>
3000 * The threads in the pool are daemon threads,
3001 * so will not prevent the JVM from exiting.
3002 * <p>
3003 * All threads can time out (and thus release resources) when idle.
3004 */
3005 private final ThreadPoolExecutor discardableReadAheadTaskThreadPool =
3006 new ThreadPoolExecutor(Math.min(2, MAX_THREADS_aTWQ), MAX_THREADS_aTWQ,
3007 120L, TimeUnit.SECONDS, // Keep worker threads alive for 2 minutes...
3008 new ArrayBlockingQueue<Runnable>(MAX_QUEUED_TN_FETCHES), // Allow some work to be queued.
3009 new ThreadUtils.DaemonThreadFactory("ExhibitDataSimpleCache.discardableReadAheadTaskThreadPool", false),
3010 new ThreadPoolExecutor.DiscardOldestPolicy());
3011 { discardableReadAheadTaskThreadPool.allowCoreThreadTimeOut(true); }
3012
3013
3014 /**Get a chunk of the raw exhibit binary.
3015 * The call may return less than the the buffer capacity,
3016 * though will block until it has read at least one byte unless at EOF or for a zero-byte request;
3017 * this will be clear from the state of the buffer.
3018 * <p>
3019 * The name, start byte offset/position and a buffer to fill are supplied.
3020 *
3021 * @param position must be non-negative and less than the exhibit size in bytes
3022 * @param dontCache if true do not cache locally, unless we have lots of free space
3023 */
3024 public void getRawFile(final ByteBuffer buf,
3025 final Name.ExhibitFull exhibitName, final int position,
3026 final boolean dontCache)
3027 throws IOException
3028 {
3029 // Note inbound request for raw exhibit data.
3030 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXDATAREQIN);
3031 if(dontCache)
3032 { StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXDATAREQINDC); }
3033
3034 // try
3035 // {
3036 final AllExhibitImmutableData aeid = _AEP.aeid;
3037 boolean succeeded = false;
3038 try
3039 {
3040 metaData.exhibitRead(rwl,
3041 null, // From upstream/master.
3042 cacheDir,
3043 exhibitName,
3044 source,
3045 aeid,
3046 genProps,
3047 position,
3048 buf,
3049 dontCache ? Boolean.TRUE : Boolean.FALSE,
3050 logger, statsIDSCGEN);
3051 succeeded = true;
3052
3053 // Any (successful) activity against this cache instance spurs us into life.
3054 if(!_userRequestedDataFromCache)
3055 {
3056 _userRequestedDataFromCache = true;
3057 logger.log("INFO: user requested data from cache: precacheing may start");
3058 }
3059 }
3060 finally
3061 {
3062 // Be prepared to spin off a read-ahead thread for any uncached tail portion.
3063 // Only do this if precacheing is currently allowed,
3064 // and if this appears to be a normal data request from an end user,
3065 // eg via the exhibit servlet.
3066 // This prevents precache requests (ie not directly human-requested)
3067 // from being wastefully routed through many intermediate servers
3068 // and/or causing a huge avalanche of unnecessary activity.
3069 //
3070 // IF THE NORMAL FETCH FAILED (eg disc trouble, or master down)
3071 // THEN RETRY THIS FETCH FROM A PEER TO ATTEMPT RECOVERY OF THE DATA.
3072 // (This recovery attempt happens in the background, ie asynchronously.)
3073 if(ALLOW_DATA_FETCH_FROM_PEERS && (!dontCache) &&
3074 ((!succeeded) || metaData.canPrecacheExhibitData(cacheDir)))
3075 {
3076 final boolean peerFetchForRecovery = !succeeded;
3077
3078 final ExhibitStaticAttr esa = aeid.getStaticAttr(exhibitName);
3079 final CachedFile cf = metaData.exhibitGetInfo(exhibitName);
3080
3081 // Only proceed if it looks like there is something useful to be done.
3082 if((esa != null) && (cf != null))
3083 {
3084 // Compute maximum number of tail bytes of exhibit we could logically cache.
3085 final int tailBytesStillCacheable =
3086 (int) Math.max(0, Math.min(esa.length, _getMaximumCacheableBytesForOneExhibit(genProps)) - cf.cachedLength);
3087 // Only actually do the precacheing if the end of user's request
3088 // (and thus where we anticipate that the next one may start)
3089 // is close to (or beyond) the end of what we have already cached.
3090 // (Or if we are recovering from an error.)
3091 if((tailBytesStillCacheable > 0) &&
3092 (!succeeded || (position + 4*MAX_TRANSFER_CHUNK_SIZE >= cf.cachedLength)))
3093 {
3094 // Never block.
3095 // Don't do this speculative work at all if the pool is full.
3096 ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
3097 public final void run()
3098 {
3099 try
3100 {
3101 // Recheck just before running that this is still appropriate.
3102 if(!peerFetchForRecovery && !metaData.canPrecacheExhibitData(cacheDir)) { return; /* Abort. */ }
3103 final CachedFile cf2 = metaData.exhibitGetInfo(exhibitName);
3104 if((cf2 == null) || (cf2.cachedLength != cf.cachedLength)) { return; /* Abort. */ }
3105
3106 // Cache from the end of what we have already cached,
3107 // regardless of the caller's start position.
3108 _getExhibitDataFromUpstreamToPrecache(esa,
3109 aeid,
3110 genProps,
3111 cf.cachedLength,
3112 Math.min(tailBytesStillCacheable, MAX_TRANSFER_CHUNK_SIZE),
3113 peerFetchForRecovery); // Is this an error-recovery attempt?
3114 }
3115 catch(final IOException e)
3116 { /* Silently discard errors from this speculative work. */ }
3117 }
3118 });
3119 }
3120 }
3121 }
3122 }
3123 // }
3124 // finally
3125 // {
3126 // // We must not be holding a (write) lock on exit.
3127 // // We log any held lock and break it and propagate an Error.
3128 // // Of course, this should NEVER happen,
3129 // // but the cost of checking for it is low, so we'll always check.
3130 // if(rwl.isWriteLockedByCurrentThread())
3131 // {
3132 // rwl.writeLock().unlock();
3133 // System.err.println("SEVERE ERROR: getRawFile(): holding write lock on exit from cache");
3134 // throw new Error("SEVERE ERROR: getRawFile(): holding write lock on exit from cache");
3135 // }
3136 // }
3137 }
3138
3139 /**Computes the maximum number of bytes to cache from (the start of) any one exhibit; strictly positive.
3140 * Ensures that no one exhibit can monopolise the entire cache,
3141 * but also that at least a small chunk of the start of any exhibit
3142 * <em>is</em> logically permitted,
3143 * <p>
3144 * No one exhibit is allowed to grow to more than a few percent of the cache space,
3145 * though this limit may only be checked at each point that an exhibit might be extended in cache.
3146 */
3147 static int _getMaximumCacheableBytesForOneExhibit(final GenProps gp)
3148 {
3149 assert(gp != null);
3150 return(Math.min(gp.getWEBSVR_MAX_CACHEABLE_EX_BYTES(), Math.max(MAX_EXTD_TRANSFER_CHUNK_SIZE,
3151 (int) Math.min(LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() >> 5, Integer.MAX_VALUE))));
3152 }
3153
3154 /**Get the static attributes for a given exhibit; null if no such exhibit.
3155 * We get this from our cache of the immutable data rather than
3156 * going to the source directly.
3157 * We don't block or hold any locks to fetch this.
3158 * <p>
3159 * Returns null if the named exhibit does not exist.
3160 */
3161 public final ExhibitStaticAttr getStaticAttr(final ExhibitFull name)
3162 //throws IOException
3163 {
3164 // Return value from cache without blocking...
3165 return(_AEP.aeid.getStaticAttr(name));
3166 }
3167
3168 /**Gets all static exhibit data if its timestamp is not that specified.
3169 * If the time specified is negative the object will be returned unconditionally.
3170 * <p>
3171 * If no exhibits are currently installed then a default set with a zero
3172 * timestamp is returned.
3173 * <p>
3174 * If the caller's copy appears to be up-to-date (eg the oldStamp
3175 * matches that that would have been returned) null is returned.
3176 * <p>
3177 * We get this from our cache of the immutable data rather than
3178 * going to the source directly.
3179 * We don't block or hold any locks to fetch this.
3180 */
3181 public final AllExhibitImmutableData getAllExhibitImmutableData(final long oldStamp)
3182 //throws IOException
3183 {
3184 final AllExhibitImmutableData cached = _AEP.aeid;
3185
3186 if((oldStamp < 0) || (oldStamp != cached.timestamp))
3187 { return(cached); }
3188
3189 // Caller seems to have up-to-date copy already.
3190 return(null);
3191 }
3192
3193 /**Gets set of all exhibit properties if its hash is not that specified.
3194 * If the hash specified is negative the object will be returned unconditionally.
3195 * <p>
3196 * If no exhibits are currently installed a default set with a zero
3197 * timestamp is returned.
3198 * <p>
3199 * If the caller's copy appears to be up-to-date (eg the oldHash
3200 * matches that that would have been returned) null is returned.
3201 * <p>
3202 * We get this from our cache rather than
3203 * going to the source directly.
3204 * We don't block or hold any locks to fetch this.
3205 */
3206 public AllExhibitProperties getAllExhibitProperties(final long oldHash)
3207 //throws IOException
3208 {
3209 final AllExhibitProperties cached = _AEP;
3210
3211 if((oldHash < 0) || (oldHash != cached.longHash))
3212 { return(cached); }
3213
3214 // Caller seems to have up-to-date copy already.
3215 return(null);
3216 }
3217
3218 /**Private lock for _getAllExhibitProperties()/constructor to prevent re-entry and multiple concurrent AEP fetches. */
3219 private final ReentrantLock _gAEP_lock = new ReentrantLock();
3220
3221 /**Minimum time before attempting to poll again for AEP while we don't have a real one loaded (ms). */
3222 private static final int MIN_AEP_POLL_TIME_UNTIL_LOADED_MS = LocalProps.fastStartMode() ? 9991 : 41291;
3223
3224 /**Attempts to get all exhibit properties if our cached copy may be stale.
3225 * Because fetching/computing this value can take a very long time
3226 * (upwards of several tens of minutes)
3227 * we attempt to split the activity into two parts, and
3228 * have the actual computation/fetch done in the background,
3229 * and then an atomic post of the results back to the cache proper.
3230 * <p>
3231 * We also adaptively attempt to use an AEP diff fetch if one is available
3232 * (ie if the underlying connection is a tunnel).
3233 */
3234 private void _getAllExhibitProperties()
3235 {
3236 // Quit immediately if we're actually busy doing a fetch...
3237 if(_gAEP_lock.isLocked())
3238 { return; } // Still busy...
3239
3240 // Normally we try polling for the AEP much more frequently
3241 // while we don't yet have a non-empty instance.
3242 final boolean aepNotYetLoaded = (_AEP.aeid.length == 0);
3243
3244 final int targetMaxAEPAge =
3245 LocalProps.getServerSlowdownFactor() * genProps.getWEBSVR_MIN_EX_IMATTR_RECHECK_MS();
3246 // Absolute max time we are prepared to postpone recheck/poll.
3247 // Should be plenty of time to warm up (many minutes).
3248 final int absMaxAge = Math.max(21*60*1000, CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S/2);
3249 // Postpone somewhat if not wallowing in free memory or if (temporarily) conserving power.
3250 // TODO: postpone also if CPU is busy.
3251 final int maxAge = (MemoryTools.lotsFree() && !upstreamStratum.isUpstreamConserving() && !GenUtils.mustConservePower()) ?
3252 targetMaxAEPAge : Math.max(targetMaxAEPAge, absMaxAge);
3253 // If we haven't yet loaded any AEP, poll quickly until we get one...
3254 if(System.currentTimeMillis() - _lastPollAEP <=
3255 (aepNotYetLoaded ? MIN_AEP_POLL_TIME_UNTIL_LOADED_MS : maxAge))
3256 { return; }
3257
3258 // Use low-priority background thread to get AEP...
3259 // Use 'discardable' pool to avoid starting poll if we're busy with other work.
3260 // The poll is memory and , and CPU intensive at least in parts.
3261 ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable(){
3262 public final void run()
3263 {
3264 final long startTime = System.currentTimeMillis();
3265
3266 // If we can't get the lock immediately
3267 // then give up immediately to try again later.
3268 if(!_gAEP_lock.tryLock()) { return; }
3269 try
3270 {
3271 // Postpone next fetch now just in case we fail to start new thread
3272 // or the old one is still running, ie very slow.
3273 _lastPollAEP = System.currentTimeMillis();
3274
3275 // Capture current AEP.
3276 final AllExhibitProperties curAEP = _AEP;
3277
3278 try
3279 {
3280 final AllExhibitProperties new_AEP;
3281 // Try to fetch new value; this might take a while or even fail...
3282 logger.log("Polling for new AEP: current hash " + curAEP.longHash);
3283 //System.out.println("*** EDSC: POLLING FOR NEW AEP: current hash " + curAEP.longHash);
3284 if(source instanceof ExhibitDataTunnelSource)
3285 {
3286 // New, potentially faster way to fetch AEP.
3287 new_AEP = ((ExhibitDataTunnelSource) source).getAllExhibitProperties(curAEP, true);
3288 }
3289 else // Use generic mechanism.
3290 {
3291 new_AEP = source.getAllExhibitProperties(curAEP.longHash);
3292 }
3293 // Post the update back to the cache...
3294 // even if the value is null
3295 // so as to let us the save current values to disc.
3296 _getAllExhibitProperties_postUpdate(new_AEP, false);
3297 }
3298 // If a transient error occurs then we won't post an update
3299 // of the AEP value and will have to try again later.
3300 catch(final InterruptedIOException e) // We'll note that an error occurred, but otherwise ignore it.
3301 {
3302 logger.log("[ExhibitDataSimpleCache: transient error trying to fetch and update AEP in background: " + e.getMessage() + ".]");
3303 }
3304 // If an error occurs then we won't post an update
3305 // of the AEP value and will have to try again later.
3306 catch(final Throwable e) // We'll note that an error occurred, with full trace, but otherwise ignore it.
3307 {
3308 logger.log("[ExhibitDataSimpleCache: error trying to fetch and update AEP in background: " + e.getMessage() + ".]");
3309 e.printStackTrace();
3310 }
3311 finally
3312 {
3313 final long endTime = System.currentTimeMillis();
3314 _lastPollAEP = endTime; // Schedule next poll.
3315
3316 // For the record note how long we took to get AEP
3317 // unless it took very little time at all.
3318 final long ms = endTime - startTime;
3319 if(ms >= 10001) { logger.log("[ExhibitDataSimpleCache: fetch/update of AEP in background took "+ms+"ms.]"); }
3320 }
3321
3322 }
3323 finally { _gAEP_lock.unlock(); }
3324 }
3325 });
3326 }
3327
3328 /**Accepts a (new) AEP value posted from a background thread.
3329 * May be called at initialisation to reload cached state,
3330 * and when a poll of the upstream source returns a new AEP.
3331 * <p>
3332 * Must grab a write lock to (potentially) update/change the cache.
3333 * <p>
3334 * If the static exhibit data was stale then we also clear our in-memory
3335 * raw-exhibit data cache entirely, to be refilled by slower means.
3336 *
3337 * @param inCons if true then this was called from the constructor
3338 * so we don't save the AEP nor do some other expensive things
3339 * that may rely on external mechanisms not yet set up
3340 */
3341 private void _getAllExhibitProperties_postUpdate(final AllExhibitProperties new_AEP,
3342 final boolean inCons)
3343 throws IOException
3344 {
3345 // If the size of the new exhibit list is zero
3346 // then discard it on the grounds that it is almost certainly an error.
3347 if((new_AEP != null) && (new_AEP.aeid.size() == 0))
3348 {
3349 logger.log("[ExhibitDataSimpleCache: rejecting null/zero-size AEP update at "+(new Date())+".]");
3350 return;
3351 }
3352
3353 // We only need to consider doing an update if we got
3354 // a non-null response.
3355 if(new_AEP != null)
3356 {
3357 // OK, we got an updated value.
3358
3359 // Log the receipt of a new exhibit set.
3360 final String message = "[ExhibitDataSimpleCache: new AEP: " +
3361 "old/new timestamp|count|hash: " +
3362 (new Date(_AEP.aeid.timestamp)) + "|" + _AEP.aeid.length + "|" + _AEP.longHash +
3363 " / " +
3364 (new Date(new_AEP.aeid.timestamp)) + "|" + new_AEP.aeid.length + "|" + new_AEP.longHash +
3365 " at " +(new Date()) + ".]";
3366 logger.log(message);
3367
3368 // Just in case we've somehow been sent a duplicate of what we already have,
3369 // whinge loudly and discard it to avoid lots of unnecessary work downstream.
3370 if(new_AEP.equals(_AEP))
3371 {
3372 logger.log("ERROR: duplicate AEP received and discarding...");
3373 System.err.println("ERROR: duplicate AEP received and discarded: " + message);
3374 return;
3375 }
3376
3377 // We attempt to recover some expensive-to-recompute data...
3378 // This has to be done before any other AEP activity
3379 // else it may be blocked for security reasons
3380 // and any extant expensive-to-compute old EPCM may be lost.
3381 new_AEP.recoverOldExhibitPropsComputableMutableData(_AEP, logger);
3382
3383 // Register the new AEP to be automatically compacted when memory is stressed.
3384 MemoryTools.registerCompactable(new_AEP);
3385
3386 // OK, post the new cache-visible AEP value (atomically)...
3387 // This should cause some AEP-linked downstream caches to be cleared soon.
3388 _AEP = new_AEP;
3389 logger.log("[ExhibitDataSimpleCache: posted new AEP.]");
3390
3391 // Compact the AEP now to try to free some memory before saving
3392 // and other work that will consume resources...
3393 new_AEP.compact();
3394 logger.log("[ExhibitDataSimpleCache: compacted AEP.]");
3395
3396 // Do some speculative warm-up work if we have spare resources...
3397 // Always do this warm-up work if in fast-start mode
3398 // since this may be an especially-important customer-facing site
3399 // for which we want to minimise time to see the first page
3400 // after any AEP update.
3401 if(LocalProps.fastStartMode() || !GenUtils.mustConservePower())
3402 {
3403 // Try to bring the vote cache up-to-date
3404 // using a pool thread to allow other activities to proceed.
3405 // This is likely to be I/O bound and may take seconds-->minutes.
3406 // If this doesn't run it's not fatal, just annoying to whoever waits for it.
3407 ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
3408 public final void run()
3409 { try { new_AEP.updateVoteCache(varMgr, true); } catch(final Throwable t) { t.printStackTrace(); } }
3410 });
3411
3412 // In a pool thread precompute/cache some often-used values.
3413 // This should result in better performance seen by users.
3414 // But if this doesn't run it's not fatal, just annoying to whoever waits for it.
3415 ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable(){
3416 public final void run()
3417 {
3418 new_AEP.getTotalExhibitBytes(); // Used on all HTML catalogue pages: high priority.
3419 new_AEP.getCategoryExhibitCounts(); // Used on the front page: lower priority.
3420 // Force recompute of 'fully-loaded' measure for this new AEP.
3421 metaData.getFullyCachedCount(new_AEP, true);
3422 }
3423 });
3424
3425 logger.log("[ExhibitDataSimpleCache: warming up AEP...]");
3426 }
3427
3428 // Save new AEP to disc after posting to memory
3429 // so that even if the save causes an OOM we still see the new AEP!
3430 if(!inCons)
3431 {
3432 _save_AEP();
3433 logger.log("[ExhibitDataSimpleCache: saved new AEP to disc, hash "+new_AEP.longHash+".]");
3434 }
3435 }
3436 }
3437
3438 /**Save AllExhibitProperties to disc.
3439 * We should do this when we receive a new set from downstream
3440 * so that we can restart with the appropriate set,
3441 * and periodically to save any cached state that we have
3442 * accumulated.
3443 * <p>
3444 * In principle this needs a write lock to alter state on disc.
3445 * In practice at most one thread at once will ever try to call this,
3446 * and the serialiseToFile() routine attempts to atomically replace the file,
3447 * and this can take a long time and thus needlessly block cache activity,
3448 * so we do not take a cache lock here.
3449 */
3450 private void _save_AEP()
3451 throws IOException
3452 {
3453 // _getWriteLock(rwl, "_save_AEP()");
3454 // try
3455 {
3456 // Replace any extant file atomically if possible.
3457 final AllExhibitProperties aep = _AEP;
3458 if((aep != null) && (aep.aeid.length > 0) &&
3459 (cacheDir != null) &&
3460 (cacheDir.isDirectory()) &&
3461 (cacheDir.canWrite()))
3462 {
3463 // Abort at last moment if cache instance shut down.
3464 if(destroyed) { return; }
3465
3466 final File f = new File(cacheDir, CACHE_EXPROPS_FILENAME);
3467 if(ORG.hd.d.IsDebug.isDebug) { logger.log("[_save_AEP() start: "+ System.currentTimeMillis() +".]"); }
3468 FileTools.serialiseToFile(aep, f, STORE_EXPROPS_GZIPED, true);
3469 if(ORG.hd.d.IsDebug.isDebug) { logger.log("[_save_AEP() end: "+ System.currentTimeMillis() +".]"); }
3470 }
3471 }
3472 // finally { rwl.writeLock().unlock(); }
3473 }
3474
3475 /**Cached AllExhibitProperties; never null.
3476 * Volatile so that it can be safely accessed without a lock.
3477 */
3478 private volatile AllExhibitProperties _AEP = new AllExhibitProperties();
3479
3480 /**Last time we polled for AllExhibitImmutableData; initially 'now' to postpone first poll.
3481 * Private to _getAllExhibitProperties().
3482 */
3483 private volatile transient long _lastPollAEP = System.currentTimeMillis();
3484
3485
3486 /**Time before which next _checkMetaData() call should not be initiated.
3487 * The initial check is usually put off a few minutes
3488 * since the system is often very busy on start-up,
3489 * and we don't expect significant problems anyway most of the time.
3490 * <p>
3491 * Volatile for thread-safe access without a lock.
3492 * <p>
3493 * Private to _checkMetaData() and _checkMetaData_postResults().
3494 */
3495 private volatile transient long _checkMetaData_notBefore;
3496
3497 /**Private lock for _checkMetaData() to avoid starting more than one thread; non-null. */
3498 private final ReentrantLock _cMD_lock_ = new ReentrantLock();
3499
3500 /**Initiates, in the background, a check of the in-memory cache meta data against disc.
3501 * Grabs a write lock on the cache and makes the cache read-only
3502 * while it works.
3503 * <p>
3504 * Refuses to do the check if it is too soon since the last one
3505 * or if the cache seems busy.
3506 * <p>
3507 * May postpone a check if the system is short of power or otherwise stressed.
3508 */
3509 private void _checkMetaData()
3510 {
3511 // Don't try to check the cache if there isn't one!
3512 if(cacheDir == null) { return; }
3513
3514 // Check that it is not too soon for another check;
3515 // if so, quit for another try later.
3516 // Compute how many ms too early this call is...
3517 // This value will be positive when it is too early for another check
3518 // and then progressively more negative as the check is more and more overdue.
3519 final long tooEarlyMs = _checkMetaData_notBefore - System.currentTimeMillis();
3520 if(tooEarlyMs > 0)
3521 { return; } // Too soon; try again later!
3522
3523 // We can postpone the check a long time (maybe several days)
3524 // if the system is (temporarily) short of power,
3525 // eg if running on solar PV then we may have to wait for the sun to come out...
3526 // This gives us a chance to run as soon as power is available.
3527 if(GenUtils.mustConservePower() &&
3528 (-tooEarlyMs < Math.max(CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS, 2*DISC_RECHECK_INTERVAL_MS)))
3529 { return; } // Try again later...
3530
3531 // Don't try to start the check now if the cache seems busy.
3532 if((rwl.getReadLockCount() > 0) ||
3533 (rwl.isWriteLocked()))
3534 { return; }
3535
3536 // If a run already seems to be in progress
3537 // then don't try to start a new one...
3538 if(_cMD_lock_.isLocked())
3539 { return; }
3540
3541 // Start (I/O-bound) background thread for immediate execution if possible,
3542 // discardable so as never to block the calling poll() thread.
3543 // This starts unencumbered by locks.
3544 ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
3545 public final void run()
3546 {
3547 if(!_cMD_lock_.tryLock()) { return; } // Quit immediately if already in progress.
3548 try
3549 {
3550 // Note when we start...
3551 final long startTime = System.currentTimeMillis();
3552 logger.log("[ExhibitDataSimpleCache: cache metadata check starting at "+(new Date(startTime))+".]");
3553
3554 try
3555 {
3556 // Compute the base dir of the exhibit data cache.
3557 final File dataBaseDir = new File(cacheDir, CACHE_EXDATA_DIR);
3558 // Try to make sure that the cache directory actually exists
3559 // and is a directory and is writable;
3560 // else zap it and try to remake it.
3561 if(!dataBaseDir.isDirectory() || !dataBaseDir.canWrite())
3562 {
3563 dataBaseDir.delete();
3564 dataBaseDir.mkdirs();
3565 }
3566
3567 // Go and compute meta-data from what is on disc
3568 // (and knowing what current exhibits to look for),
3569 final MetaData newMd = new MetaData(_AEP, cacheDir, logger);
3570
3571 // Abort if cache is being shut down.
3572 if(destroyed) { return; }
3573
3574 // Lock out other cache activity while we make sure
3575 // that the meta-data is correct,
3576 // or fix it if not.
3577 // Grab both locks in correct order...
3578 _getWriteLock(rwl, "_checkMetaData() thread", logger);
3579 try
3580 {
3581 synchronized(metaData)
3582 {
3583 try
3584 {
3585 // Set working meta-data read-only just before we before
3586 // we cross-check in-memory and reconstructed versions.
3587 metaData.setReadWrite(false);
3588
3589 // If the in-memory meta-data is empty,
3590 // mergeWithNewMetaData the new one if it is not empty.
3591 if(metaData.isEmpty())
3592 {
3593 if(!newMd.isEmpty())
3594 {
3595 logger.log("[ExhibitDataSimpleCache: loading in-memory cache metadata: exhibits/bytes = "+newMd.size()+"/"+newMd.getTotalBytesCurrentlyUsedByCache()+".]");
3596 metaData.mergeWithNewMetaData(newMd, logger);
3597 }
3598 }
3599 // ELSE if there is a serious inconsistency,
3600 // (and it's not just that the in-memory copy is empty)
3601 // complain and try to fix the in-memory copy.
3602 else if(!newMd.isEquivalent(metaData))
3603 {
3604 logger.log("[ExhibitDataSimpleCache: WARNING: on-disc and in-memory cache metadata inconsistent: attempting to fix.]");
3605 metaData.mergeWithNewMetaData(newMd, logger);
3606 }
3607 }
3608 finally
3609 {
3610 // OK, the cache should be sane now,
3611 // so make it read/write,
3612 // and allow normal cache operations to resume.
3613 metaData.setReadWrite(true);
3614 }
3615 }
3616 }
3617 finally { rwl.writeLock().unlock(); }
3618
3619 // Note when we finish main work...
3620 final long endTimeMainWork = System.currentTimeMillis();
3621 final long mainWorkTime = endTimeMainWork - startTime;
3622
3623 logger.log("[ExhibitDataSimpleCache: cache metadata check ended at "+(new Date(endTimeMainWork))+".]");
3624
3625 // NOW NORMAL CACHE OPERATION CAN RESUME WHILE WE DO SOME OTHER TIDY-UP...
3626
3627 // Spend a proportion of the time that we spent checking the metadata
3628 // now actually checking the data (if the AEP is not empty).
3629 // Give up if we find that the cache has become write locked,
3630 // ie if we may not be able to get in.
3631 // Reduce this time considerably if (temporarily) conserving power,
3632 // but don't entirely eliminate it to ensure that it gets done.
3633 final long finishDataCheckBy = System.currentTimeMillis() + 1 +
3634 (GenUtils.mustConservePower() ? (mainWorkTime/2) : (mainWorkTime*3));
3635 final Set<Name.ExhibitFull> done = new HashSet<Name.ExhibitFull>();
3636 if(!metaData.isEmpty())
3637 {
3638 do
3639 {
3640 // Check one more cache entry for data corruption.
3641 _incrCheckMRUExhibitEntries(done);
3642 } while((System.currentTimeMillis() < finishDataCheckBy) &&
3643 !rwl.isWriteLocked() &&
3644 (done.size() < _AEP.aeid.length) /* Give up when no more to check. */ );
3645 }
3646 logger.log("[ExhibitDataSimpleCache: cached exhibit files fully/partially validated: "+done.size()+"]");
3647
3648 // Remove orphaned temporary files
3649 // and empty directories.
3650 // Anything not modified for over a month is fair game.
3651 // TODO: Zap any other crud that builds up.
3652 final long old = System.currentTimeMillis() - (32 * 24 * 3600 * 1000L);
3653 FileTools.rmRecursively(new File(cacheDir, CACHE_EXDATA_DIR),
3654 new FileFilter(){
3655 /**Tests whether or not the file with the specified abstract pathname should be removed.
3656 * If a directory then everything within it
3657 * is dealt with (eg possibly removed) first.
3658 * <p>
3659 * Only remove plain files that appear to be old
3660 * temporary files (starting with the temp prefix),
3661 * and empty dirs (can be recreated on demand).
3662 *
3663 * @param pathname The abstract pathname to be tested
3664 * @return true iff <code>pathname</code>
3665 * should be deleted
3666 */
3667 public final boolean accept(final File pathname)
3668 {
3669 // Must be "old" (not modified for ~1M).
3670 // This avoids races with other components
3671 // actively creating/using temporary files
3672 // and allows some grace time to allow for
3673 // glitches in the exhibit set, etc.
3674 if(pathname.lastModified() >= old)
3675 { return(false); }
3676
3677 // Empty directories can be zapped.
3678 if(pathname.isDirectory())
3679 {
3680 final String[] files = pathname.list();
3681 if((files != null) && (files.length == 0))
3682 {
3683 logger.log("INFO: ExhibitDataSimpleCache: found empty cache directory for removal: " + pathname);
3684 return(true);
3685 }
3686 return(false); // Cannot zap this dir.
3687 }
3688
3689 // Anything not a dir and not a plain file
3690 // is skipped/rejected.
3691 if(!pathname.isFile())
3692 { return(false); }
3693
3694 // Temporary files start with a known prefix.
3695 if(pathname.getName().startsWith(FileTools.F_tmpPrefix))
3696 {
3697 logger.log("WARNING: ExhibitDataSimpleCache: found orphaned/foreign cached temporary file for removal: " + pathname);
3698 return(true);
3699 }
3700
3701 // Whatever this is, we can't zap it...
3702 return(false);
3703 }
3704 });
3705
3706 // Note when we finish all the work...
3707 final long endTime = System.currentTimeMillis();
3708
3709 if(IsDebug.isDebug) { logger.log("[ExhibitDataSimpleCache._checkMetaData(): took "+(endTime - startTime)+"ms.]"); }
3710
3711 // Compute a postponement long enough
3712 // that we don't spend more than about
3713 // 1% of system (CPU/IO) time in the scan,
3714 // though bounded from above and below
3715 // in case of unusual events.
3716 final long nextDiscCacheRecheckDue =
3717 endTime +
3718 Math.min(DISC_RECHECK_INTERVAL_MS * 3L,
3719 Math.max(((endTime - startTime) * 100), // Max 1% of wall-clock time.
3720 DISC_RECHECK_INTERVAL_MS / 3L)) +
3721 Rnd.fastRnd.nextInt(0xfff | (DISC_RECHECK_INTERVAL_MS / 8));
3722
3723 // Set the (volatile) 'do-not-try-again-before' value...
3724 _checkMetaData_notBefore = nextDiscCacheRecheckDue;
3725
3726 logger.log("[ExhibitDataSimpleCache: next cache metadata check due not before "+(new Date(nextDiscCacheRecheckDue))+".]");
3727 }
3728 catch(final IOException e)
3729 {
3730 // Complain lots if we could not even reload the cache.
3731 e.printStackTrace();
3732 }
3733 }
3734 // catch(final IllegalStateException e)
3735 // {
3736 // e.printStackTrace();
3737 // }
3738 finally { _cMD_lock_.unlock(); }
3739 }
3740 });
3741 }
3742
3743
3744 /**Time before which not to to save metaData again; private to _cleanAndSaveMetaData(). */
3745 private transient long _saveMetaData_notBefore; // Initially zero for immediate save.
3746
3747 /**Lock to prevent concurrent attempts to save metadata; non-null. */
3748 private final ReentrantLock _metadataSave_lock_ = new ReentrantLock();
3749
3750 /**Saves the cache metadata if needed.
3751 * Grabs a write lock to update disc (and memory) state.
3752 * <p>
3753 * Aims to avoid saving the metaData more than once every
3754 * METADATA_MIN_SAVE_INTERVAL_MS, though if no save has taken
3755 * place for a while then the next save will happen on the
3756 * next call.
3757 * <p>
3758 * If saving the meta-data is taking a long time this aims
3759 * to postpone the next save at least a reasonable multiple of that
3760 * to avoid wasting too much system/CPU/disc bandwidth,
3761 * though we do put a cap on the maximum delay in case of weirdness...
3762 * <p>
3763 * This may also incrementally purge stale meta-data and data
3764 * just before the save to avoid the need for an extra meta-data save
3765 * to account for the purge-induced changes themselves.
3766 * <p>
3767 * This may also pick one or more exhibits at random to spot-check
3768 * for consistency with the master copy (eg looking for data corruption).
3769 *
3770 * @param force if true, force an immediate save to disc before return,
3771 * else run asynchronously (if possible, else discard) and never block
3772 */
3773 private void _cleanAndSaveMetaData(final boolean force)
3774 {
3775 // If no save is needed then return immediately.
3776 if(!metaData.getNeedsSave()) { return; }
3777
3778 // Too soon to consider another save, so return.
3779 if(!force && (_saveMetaData_notBefore >= System.currentTimeMillis())) { return; }
3780
3781 // Abort if apparently already in progress even if forcing...
3782 if(_metadataSave_lock_.isLocked()) { return; }
3783
3784 // Attempt to run this (I/O-bound thread) immediately
3785 // or if not forcing a synchronous immediate save
3786 // then if need be discard the attempt so as never to block the poll() thread.
3787 final Runnable r = (new Runnable() { public void run() {
3788 if(!_metadataSave_lock_.tryLock()) { return; } // Save already in progress.
3789 try
3790 {
3791 logger.log("Starting ExhibitDataSimpleCache metadata cleanup and save...");
3792
3793 // Attempt to clean the metadata before saving.
3794 // If this produces an error,
3795 // don't let it stop us doing the save anyway.
3796 final long start = System.currentTimeMillis();
3797 try
3798 {
3799 // Incrementally purge any orphaned entries...
3800 _incrPurgeOrphanedExhibits();
3801 // Incrementally test integrity of most-recently-used entries...
3802 _incrCheckMRUExhibitEntries(null);
3803 }
3804 catch(final Exception e)
3805 { e.printStackTrace(); } // Unexpected error, so log, but don't abort the save.
3806
3807 // Abort at last moment if cache instance shut down.
3808 if(destroyed) { return; }
3809
3810 // Now actually save the cleaned-up metadata to disc.
3811 try { metaData.saveToDisc(cacheDir, logger, statsIDSCGEN); }
3812 catch(final IOException e) { e.printStackTrace(); } // Log error but don't abort.
3813
3814 // Since the metadata needed saving,
3815 // then the fully-cached count may be stale.
3816 metaData.getFullyCachedCount(_AEP, true);
3817
3818 // Postpone the next metadata save.
3819 // Limit from above and below the time before the next save.
3820 //
3821 // Push the next save back a little longer if (temporarily) economising on power,
3822 // though since this is semi-critical data
3823 // (we can only partially reconstruct it if absent/corrupt)
3824 // we save it more often than non-critical info.
3825 final long end = System.currentTimeMillis();
3826 final long took = end - start;
3827 final long notBefore = System.currentTimeMillis() +
3828 METADATA_MIN_SAVE_INTERVAL_MS +
3829 Rnd.fastRnd.nextInt(1 + METADATA_MIN_SAVE_INTERVAL_MS/3) +
3830 (GenUtils.mustConservePower() ? Math.max(CoreConsts.ASYNC_MIN_POWER_SAVE_NON_CRITICAL_DATA_FLUSH_MS/4, METADATA_MIN_SAVE_INTERVAL_MS) : 0) +
3831 Math.min(79*took, DISC_RECHECK_INTERVAL_MS/11);
3832 _saveMetaData_notBefore = notBefore;
3833
3834 logger.log("Finished ExhibitDataSimpleCache metadata cleanup and save in "+took+"ms; next not before: "+(new Date(notBefore)));
3835 }
3836 finally { _metadataSave_lock_.unlock(); }
3837 } });
3838
3839 // Do the save blocking (when forced) or non-blocking.
3840 if(force) { r.run(); } // Blocking.
3841 else { ThreadUtils.nonCPUThreadPoolDiscardable.submit(r); } // Non-blocking.
3842 }
3843
3844 /**Private iterator over all cached full exhibit names for _incrCheckMRUExhibitEntries().
3845 * Must only be accessed under a lock on the metadata object
3846 * to prevent concurrent/unsafe access to the iterator object.
3847 * The underlying data being iterated over is guaranteed not to change,
3848 * though may become stale wrt the metadata and cache,
3849 * so some items returned by the iterator may no longer be relevant.
3850 * <p>
3851 * May be null.
3852 * <p>
3853 * Marked transient to avoid being serialised.
3854 */
3855 private transient Iterator<Name.ExhibitFull> _iCMEE_iterator;
3856
3857 /**Incrementally check cached exhibits for integrity.
3858 * This will attempt to remove any entry it finds that is corrupt.
3859 * <p>
3860 * This concentrates on the most-recently-used cache entries
3861 * as data corruption in these would probably be the most serious,
3862 * though may also attempt to systematically scan all cache entries.
3863 * <p>
3864 * This may examine any cached entry.
3865 * <p>
3866 * This may not examine any entry at all if the cache seems to be busy.
3867 * <p>
3868 * This will stop after removing at most one corrupt entry.
3869 *
3870 * @param done if not null then this routine adds to this Set
3871 * the full name any exhibit that it checks (just before checking)
3872 * and avoid checking any exhibit in this Set;
3873 * this need not be thread-safe for one unshared instance
3874 *
3875 * @throws IOException
3876 */
3877 private void _incrCheckMRUExhibitEntries(final Set<Name.ExhibitFull> done)
3878 throws IOException
3879 {
3880 final AllExhibitProperties aep = _AEP;
3881 final ExhibitStaticAttr esa;
3882
3883 // We need only a cache read lock to find an exhibit to test.
3884 // We may then later need to grab a different lock to test data
3885 // or to remove corrupt data.
3886
3887 // Grab both locks in correct order.
3888 // But give up if we can't get a cache lock immediately...
3889 if(!rwl.readLock().tryLock()) { return; }
3890 try
3891 {
3892 synchronized(metaData)
3893 {
3894 // We directly access the LRU list to get the most-recently-used
3895 // (and thus, probably, the most-frequently-used)
3896 // cached exhibits.
3897 //
3898 // We must hold the metaData instance lock
3899 // while reading this LRU data,
3900 // but we do *not* directly update it.
3901 //
3902 // The LRU data may change if we update/remove a corrupt entry.
3903 final SortedSet<CachedFile> lru = metaData.exhibitsLRU;
3904 int size = lru.size();
3905 // If nothing to do then return immediately.
3906 if(size < 1) { return; }
3907
3908 // Half the time get the next exhibit name from a comprehensive list.
3909 if(Rnd.fastRnd.nextBoolean())
3910 {
3911 Iterator<Name.ExhibitFull> sit = _iCMEE_iterator;
3912 // If the iterator is not live then attempt to create a new one.
3913 if((sit == null) || !sit.hasNext())
3914 {
3915 // If the AEP is empty (no work to do) then return immediately.
3916 if(aep.aeid.isEmpty()) { return; }
3917 // Set iterator up with ALL the (full) exhibit names
3918 // as a working cache should have some data for ALL exhibits.
3919 // The iterator is over an immutable view to avoid accidents.
3920 // The ordering is likely to be semi-random
3921 // which should help avoid systematic failures to check thoroughly.
3922 _iCMEE_iterator = sit = aep.aeid.getAllExhibitNamesSorted().iterator();
3923 }
3924
3925 assert(sit.hasNext());
3926 // If exhibit has gone away since iterator was created,
3927 // then esa will be null and we'll skip any test attempt below...
3928 esa = aep.aeid.getStaticAttr(sit.next());
3929 }
3930 // Pick amongst all exhibits, weighted towards MRU.
3931 else
3932 {
3933 // Get as list of all cached exhibits, MRU last.
3934 final List<CachedFile> cfLRU = new ArrayList<CachedFile>(lru);
3935 // Remove any already inspected.
3936 if((done != null) && !done.isEmpty())
3937 {
3938 cfLRU.removeAll(done);
3939 // Recompute the size...
3940 size = lru.size();
3941 // If nothing to do then return immediately.
3942 if(size < 1) { return; }
3943 }
3944
3945 // Pick an entry, heavily weighted towards the MRU/popular,
3946 // since the most recently/frequently used entries
3947 // are assumed to be the most important to keep healthy.
3948 //
3949 // We work hard to distribute these checks well.
3950 final int entry = size - 1 -
3951 Rnd.goodRnd.nextInt(
3952 Rnd.goodRnd.nextInt(
3953 Rnd.goodRnd.nextInt(size) + 1) + 1);
3954 final CachedFile cf = cfLRU.get(entry);
3955 esa = aep.aeid.getStaticAttr(cf.name);
3956 }
3957 }
3958 }
3959 finally
3960 { rwl.readLock().unlock(); }
3961
3962 // If we have been handed a legitimate exhibit to inspect
3963 // then test it for validity
3964 // outside any locks as far as possible.
3965 if(esa != null)
3966 {
3967 if(done != null)
3968 {
3969 // Note that we are about to check this exhibit,
3970 // and abort (return immediately) if it is already checked.
3971 if(!done.add(esa.getExhibitFullName())) { return; }
3972 }
3973
3974 // Test validity of entry.
3975 _doCacheDataValidityTest(aep, esa);
3976 }
3977 }
3978
3979 /**Do incremental purge of orphaned cache entries conditions are right.
3980 * Tries to grab a write lock to do its work;
3981 * if it can't get one immediately (ie the cache is busy)
3982 * then it returns immediately.
3983 * <p>
3984 * Doesn't attempt any purging if there is an empty exhibit set
3985 * or if there is no cache size currently set
3986 * since this cache may not even be properly initialised yet...
3987 * <p>
3988 * We clear at most one orphaned entry on each call.
3989 */
3990 private void _incrPurgeOrphanedExhibits()
3991 throws IOException
3992 {
3993 if(ORPHANED_EXHIBIT_EXPIRY_ALLOWED &&
3994 (_AEP.aeid.length > 0) &&
3995 (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0))
3996 {
3997 if(!rwl.writeLock().tryLock()) { return; }
3998 try
3999 {
4000 // We have run out of space to freely precache (new) exhibits,
4001 // so a preemptive orphaned-exhibit purge may well be worthwhile.
4002 if(!metaData.someFree(cacheDir))
4003 {
4004 // Now look for exhibits in the metadata not in the current exhibit set.
4005 final Set<Name.ExhibitFull> orphaned = metaData.getKnownExhibits();
4006 orphaned.removeAll(_AEP.aeid.getAllExhibitNamesSorted());
4007
4008 // There are some candidate orphaned exhibits.
4009 if(orphaned.size() > 0)
4010 {
4011 logger.log("[ExhibitDataSimpleCache: WARNING: orphaned exhibits in cache: " + orphaned.size() + ".]");
4012
4013 // Work out minimum time since last use that we will purge.
4014 // Note that for a small cache with a fast turnover of exhibits,
4015 // we may have to purge sooner than we'd consider ideal.
4016 final long thresholdAccessTime;
4017 synchronized(metaData)
4018 {
4019 final CachedFile leastRecentlyUsed = metaData.exhibitsLRU.first();
4020 final CachedFile mostRecentlyUsed = metaData.exhibitsLRU.last();
4021 // Make threshold for purge much closer to now than to LRU exhibit.
4022 thresholdAccessTime = (leastRecentlyUsed.timestamp + 3*mostRecentlyUsed.timestamp) / 4;
4023 }
4024 final long purgeThreshold = Math.max(thresholdAccessTime,
4025 System.currentTimeMillis() - ORPHANED_EXHIBIT_MIN_UNUSED_TIME_MS);
4026
4027 // Only consider removing any that have not been used/updated in a while.
4028 for(final Name.ExhibitFull name : orphaned)
4029 {
4030 // Get info on cache entry, if any.
4031 final CachedFile info = metaData.exhibitGetInfo(name);
4032
4033 // Found unloved orphan, so purge it.
4034 if((info != null) && (info.getLastAccessed() < purgeThreshold) &&
4035 (info.getLastAccessed() != 0)) // Ensure that this is not broken/partial meta-data.
4036 {
4037 // Only remove one orphaned exhibit each time
4038 // so as to avoid taking too long,
4039 // thus stop if successful in removing this one.
4040 if(metaData.exhibitRemoveCacheEntry(rwl, cacheDir, name, false, logger, statsIDSCGEN))
4041 {
4042 logger.log("[ExhibitDataSimpleCache: completely removed orphaned exhibit from cache: " + name + ".]");
4043 break;
4044 }
4045 }
4046 }
4047 }
4048 }
4049 }
4050 finally { rwl.writeLock().unlock(); }
4051 }
4052 }
4053
4054
4055 /**Gets the general properties as a GenProps object if its timestamp is not that specified.
4056 * If the time specified is negative the object will be returned unconditionally.
4057 * <p>
4058 * If no fresh props have yet been fetched
4059 * then a default set with a zero timestamp is returned.
4060 * <p>
4061 * If the caller's copy appears to be up-to-date (eg the oldStamp
4062 * matches that that we would have been returned) null is returned.
4063 * <p>
4064 * We get this from our cache of the immutable data rather than
4065 * going to the source directly.
4066 * So we don't block or grab any lock to fetch this value.
4067 * <p>
4068 * We do not attempt to persist this data since carrying old GenProps values
4069 * across a restart may be a very poor idea.
4070 */
4071 public org.hd.d.pg2k.svrCore.props.GenProps getGenProps(final long oldStamp)
4072 /* throws IOException */
4073 {
4074 final GenProps cached = genProps;
4075
4076 if((oldStamp < 0) || (oldStamp != cached.timestamp))
4077 { return(cached); }
4078
4079 // Caller seems to have up-to-date copy already.
4080 return(null);
4081 }
4082
4083 /**Last time we polled for genProps.
4084 * Private to _getGenProps() and is volatile to avoid needing locked access.
4085 */
4086 private transient volatile long _lastPollGp;
4087
4088 /**Attempts to get sysprops if our cached copy may be stale.
4089 * Slightly strange is that we use our cached sys props value
4090 * to determine the frequency at which we recheck the cache; the
4091 * default value is short so we should initially poll quickly until
4092 * we get a kosher value.
4093 * <p>
4094 * A special case here: if we have a GenProps object with a non-zero
4095 * timestamp (presumably pulled over from a running master) and then
4096 * we get one with a zero timestamp, we ignore the new, zero, instance
4097 * since it probably means that the master has just been restarted
4098 * and has not yet loaded new GenProps.
4099 * <p>
4100 * This does not need to hold any locks since all the values it touches
4101 * are volatile.
4102 */
4103 private void _getGenProps()
4104 throws IOException
4105 {
4106 final GenProps currentGenProps = genProps;
4107
4108 // We return immediately if no need to poll,
4109 // because last poll was recent enough.
4110 if(System.currentTimeMillis() - _lastPollGp <=
4111 (currentGenProps.getWEBSVR_SYSPROPS_RECHECK_MS() * LocalProps.getServerSlowdownFactor()))
4112 { return; }
4113
4114 // Postpone next poll, whether this succeeds or not.
4115 _lastPollGp = System.currentTimeMillis();
4116
4117 // Spin off work into new thread if possible to avoid delaying poll() unduly.
4118 ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
4119 public final void run()
4120 {
4121 // Capture old props timestamp.
4122 final long currentGPTimestamp = currentGenProps.timestamp;
4123
4124 // Try to fetch new value; this might take a while or even fail...
4125 final GenProps newGp;
4126 try { newGp = source.getGenProps(currentGPTimestamp); }
4127 catch(final IOException e) { e.printStackTrace(); return; }
4128 // ...but don't let an uninitialised master GenProps displace
4129 // an initialised one that we already hold.
4130 if((newGp != null) &&
4131 ((currentGPTimestamp == 0) || (newGp.timestamp != 0)))
4132 {
4133 // OK, we got an updated value.
4134 // No lock required to update this volatile value.
4135 genProps = newGp;
4136
4137 // Log the receipt of a changed props set.
4138 logger.log("[ExhibitDataSimpleCache: new GenProps: " +
4139 "old/new timestamp: " +
4140 (new Date(currentGPTimestamp)) + " / " +
4141 (new Date(newGp.timestamp)) +
4142 " at " +(new Date()) + ".]");
4143 }
4144 _lastPollGp = System.currentTimeMillis(); // Schedule next poll.
4145 }
4146 });
4147 }
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159 /**Gets the security properties as a Properties object if its timestamp is not that specified.
4160 * If the time specified is negative the object will be returned unconditionally.
4161 * <p>
4162 * If no props are currently installed/available a default set with a zero
4163 * timestamp is returned.
4164 * <p>
4165 * If the caller's copy appears to be up-to-date (eg the oldStamp
4166 * matches that that would have been returned) null is returned.
4167 * <p>
4168 * We get this from our cache of the immutable data rather than
4169 * going to the source directly.
4170 * We don't block or grab any locks to fetch this.
4171 * <p>
4172 * We do not attempt to persist this data since carrying old values
4173 * across a restart may be a very poor idea.
4174 * <p>
4175 * We wrap this as the defaults to a new Properties object
4176 * to protect our copy against accidental alteration.
4177 */
4178 public java.util.Properties getGenSecProps(final long oldStamp)
4179 /* throws IOException */
4180 {
4181 final Properties cached = genSecProps;
4182
4183 if((oldStamp < 0) || (oldStamp != Long.parseLong(cached.getProperty(SecurityProps.PNAME_GENSECPROPS_TIMESTAMP))))
4184 { return(new Properties(cached)); }
4185
4186 // Caller seems to have up-to-date copy already.
4187 return(null);
4188 }
4189
4190 /**Last time we polled for genSecProps.
4191 * Private to _getGenSecProps(); is volatile to avoid the need for locking.
4192 */
4193 private volatile transient long _lastPollGSp;
4194
4195 /**Our record of the current GenProps; never null.
4196 * Maintained by poll(); is volatile to avoid the need for locking.
4197 */
4198 private volatile Properties genSecProps = (new SecurityProps()).getGenSecProps(); // Start with defaults.
4199
4200 /**Attempts to get gensecprops if our cached copy may be stale.
4201 * This does not need any locks since the state is mainatined
4202 * in volatile values.
4203 */
4204 private void _getGenSecProps()
4205 throws IOException
4206 {
4207 // We return immediately if no need to poll,
4208 // because last poll was very recent.
4209 if(System.currentTimeMillis() - _lastPollGSp <=
4210 (genProps.getWEBSVR_SYSPROPS_RECHECK_MS() * LocalProps.getServerSlowdownFactor()))
4211 { return; }
4212
4213 // Put off next poll for a while, even if this one fails...
4214 _lastPollGSp = System.currentTimeMillis(); // Schedule/postpone next poll.
4215
4216 ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
4217 public final void run()
4218 {
4219 try
4220 {
4221 // Capture old props timestamp.
4222 final Properties currentGSP = genSecProps;
4223 final String timestamp = currentGSP.getProperty(SecurityProps.PNAME_GENSECPROPS_TIMESTAMP, "0");
4224 final long currentGSPTimestamp = Long.parseLong(timestamp);
4225
4226 // Try to fetch new value; this might take a while or even fail...
4227 final Properties newGSp = source.getGenSecProps(currentGSPTimestamp);
4228 // ...but don't let an uninitialised master GenSecProps displace
4229 // any initialised one that we already hold.
4230 if((newGSp != null) &&
4231 ((currentGSPTimestamp == 0) || (Long.parseLong(newGSp.getProperty(SecurityProps.PNAME_GENSECPROPS_TIMESTAMP, "0")) != 0)))
4232 {
4233 // OK, we got an updated value.
4234 // No lock is needed to update this volatile value.
4235 genSecProps = newGSp;
4236 }
4237 _lastPollGSp = System.currentTimeMillis(); // Schedule/postpone next poll.
4238 }
4239 catch(final Exception e)
4240 {
4241 e.printStackTrace(); // Whinge and stop in case of error.
4242 }
4243 }
4244 });
4245 }
4246
4247 /**Maximum wait time between attempts to fetch or generate thumbnails (ms) by long-running cache; strictly positive.
4248 * We have this in order to avoid pestering a master server unnecessarily
4249 * or wasting CPU cycles attempting to build a thumbnail.
4250 * <p>
4251 * A value of several times the allowed system latency/skew
4252 * up to of the order of a day in the expectation of a daily
4253 * exhibit-accession and thumbnail-build cycle on the server
4254 * is probably reasonable.
4255 * <p>
4256 * We randomise the value so that different clients will not conflict
4257 * with one another unduly.
4258 * <p>
4259 * We may wait longer than this when resource-constrained.
4260 * <p>
4261 * We may wait less than this when the cache is relatively young.
4262 */
4263 private static final long MAX_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS =
4264 Math.min(25 * 3600 * 1000L, // Retries no more than ~1 day apart...
4265 1013L * (3*CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S +
4266 Rnd.fastRnd.nextInt(4*CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S)));
4267
4268 /**Normal wait time between attempts to fetch or generate thumbnails (ms) by long-running cache; strictly positive.
4269 * We have this in order to avoid pestering a master server unnecessarily
4270 * or wasting CPU cycles attempting to build a thumbnail;
4271 * this is used where the upstream server doesn't seem too busy
4272 * or in case of apparent transient network error.
4273 * <p>
4274 * A value of a few minutes is good for this purpose.
4275 * <p>
4276 * We randomise the value so that different clients will not conflict
4277 * with one another unduly.
4278 * <p>
4279 * We may wait longer than this when resource-constrained.
4280 * <p>
4281 * We may wait less than this when the cache is relatively young.
4282 */
4283 private static final long NORMAL_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS =
4284 (5 * 60 * 1000L) + Rnd.fastRnd.nextInt(1 * 60 * 1000);
4285
4286 // /**Enqueueable SoftReference for cached-in-memory thumbnails. */
4287 // private final static class TIMSoftReference extends SoftReference<ExhibitThumbnails>
4288 // {
4289 // /**Factory method that also enqueues automatically... */
4290 // static TIMSoftReference create(final Name.ExhibitFull name, final ExhibitThumbnails tns, final ReferenceQueue<ExhibitThumbnails> refQ)
4291 // {
4292 // final TIMSoftReference result = new TIMSoftReference(name, tns, refQ);
4293 // return(result);
4294 // }
4295 // /**Create an instance with the thumbnail name and thumbnails (all non-null); only call from factory method. */
4296 // private TIMSoftReference(final Name.ExhibitFull name, final ExhibitThumbnails tns, final ReferenceQueue<ExhibitThumbnails> refQ)
4297 // {
4298 // super(tns, refQ);
4299 // this.name = name;
4300 // assert(name != null);
4301 // assert(tns != null);
4302 // assert(refQ != null);
4303 // }
4304 // /**Valid full exhibit name; never null. */
4305 // final Name.ExhibitFull name;
4306 // }
4307
4308 /**Private in-memory cache of deserialised thumbnails; never null.
4309 * This Map is guaranteed thread-safe and highly-concurrent.
4310 * <p>
4311 * Holding a lock on this object <em>will not</em> prevent updates to it.
4312 * <p>
4313 * This is a mapping:
4314 * <ul>
4315 * <li>from String name to TIMSoftReference (wrapping ExhibitThumbnails), or
4316 * <li>from String name to Long last time when we failed to generate a thumbnail.
4317 * </ul>
4318 * If we run very low on space then discard the thumbnails and just keep the do-not-retry Long timestamps.
4319 */
4320 private final MemoryTools.SoftReferenceMap<Name.ExhibitFull,Object> _thumbnailsInMemory =
4321 MemoryTools.SoftReferenceMap.create(Math.max(256, MAX_QUEUED_TN_FETCHES), true, "_thumbnailsInMemory");
4322 // ConcurrentMap<Name.ExhibitFull,Object> _thumbnailsInMemory =
4323 // new ConcurrentHashMap<Name.ExhibitFull, Object>(Math.max(256, MAX_QUEUED_TN_FETCHES));
4324 /**If we run very low on space then discard the thumbnails and just keep the do-not-retry Long timestamps.
4325 * This will free up the bulk of the memory but may continue to save lots of nugatory effort.
4326 * <p>
4327 * We have to hold a reference to the handle to prevent it expiring.
4328 */
4329 private final RecurrentEmergencyFreeHandle _timREFH = (new RecurrentEmergencyFreeHandle(){
4330 public void run()
4331 {
4332 int thumbnailsCleared = 0;
4333 for(final Name.ExhibitFull key : _thumbnailsInMemory.keySet())
4334 {
4335 final Object o = _thumbnailsInMemory.get(key);
4336 if(o == null) { continue; }
4337 if(o.getClass() == Long.class) { continue; }
4338 _thumbnailsInMemory.remove(key);
4339 ++thumbnailsCleared;
4340 }
4341 System.err.println("WARNING: emergency-freed thumbnails: " + thumbnailsCleared);
4342 }
4343 });
4344 { MemoryTools.registerRecurrentEmergencyFreeHandle(_timREFH); }
4345
4346 /**If true then allow missing thumbnails to be fetched synchronously at the risk of blocking for extened periods. */
4347 private static final boolean ALLOW_SYNC_TN_FETCH = false;
4348
4349 /**Gets the thumbnails for an exhibit.
4350 * A data source is at liberty to refuse to compute thumbnails
4351 * in which case it may return null, else it returns a
4352 * non-null value which may include the `could-not-compute'
4353 * value to indicate that a thumbnail/sample can never be made
4354 * for this exhibit and no attempt need be made again.
4355 * <p>
4356 * This retains a private in-memory cache of
4357 * deserialised thumbnails held by SoftReference,
4358 * and it tries first to recover them from there.
4359 * <p>
4360 * This tries to retrieve thumbnails from the cache,
4361 * and returns them if they are there.
4362 * <p>
4363 * Else, if create is true, this tries to create the thumbnails,
4364 * cache them, and return the value. But we won't bother unless
4365 * the main image is fully loaded.
4366 * <p>
4367 * Note that only the read and write of cache is done under lock;
4368 * the thumbnail generation is unlocked and concurrency is restricted,
4369 * if at all, by the handler routine(s).
4370 * <p>
4371 * Partly because this routine is called by our own
4372 * precache routines, we do not regard reading a thumbnail
4373 * as proving user access to the cache
4374 * (exhibit data has to be read for that).
4375 *
4376 * @param create if true, and no thumbnail yet exists, try to
4377 * create one if possible; else if create is false
4378 * only return an existing one and return null if none is to hand
4379 * (or possibly allow fetch of pre-built remote one)
4380 */
4381 public ExhibitThumbnails getThumbnails(final ExhibitFull name,
4382 final boolean create)
4383 {
4384 // Note inbound request for exhibit thumbnails.
4385 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXTHUREQIN);
4386 if(!create)
4387 { StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXTHUREQINDC); }
4388
4389 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("getThumbnails("+name+", "+create+")..."); }
4390
4391 // try
4392 // {
4393 // Call our internal fetcher/generator.
4394 final ExhibitThumbnails exhibitThumbnails = _getThumbnails(name,
4395 create, ALLOW_SYNC_TN_FETCH);
4396
4397 // If we could not get the requested thumbnails
4398 // (or any firm indication that they do not exist)
4399 // then try again in the background as they may still be useful.
4400 if(exhibitThumbnails == null)
4401 { _asyncTNFetch(name); }
4402
4403 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("getThumbnails("+name+", "+create+") = "+exhibitThumbnails); }
4404
4405 return(exhibitThumbnails);
4406 // }
4407 // finally
4408 // {
4409 // // We must not be holding a (write) lock on exit.
4410 // // We log any held lock and break it and propagate an Error.
4411 // // Of course, this should NEVER happen,
4412 // // but the cost of checking for it is low, so we'll always check.
4413 // if(rwl.isWriteLockedByCurrentThread())
4414 // {
4415 // rwl.writeLock().unlock();
4416 // System.err.println("SEVERE ERROR: getThumbnails(): holding write lock on exit from cache");
4417 // throw new Error("SEVERE ERROR: getThumbnails(): holding write lock on exit from cache");
4418 // }
4419 // }
4420 }
4421
4422
4423 /**Get the thumbnails for an exhibit; null if not available.
4424 * A data source is at liberty to refuse to compute thumbnails
4425 * in which case it may return null, else it returns a
4426 * non-null value which may include the `could-not-compute'
4427 * value to indicate that a thumbnail/sample cannot be made
4428 * for this exhibit and no attempt need be made in future.
4429 * <p>
4430 * This retains a private in-memory cache of
4431 * deserialised thumbnails held by SoftReference,
4432 * and it tries first to recover them from there.
4433 * This is very important to fast delivery of thumbnails
4434 * for building pages referencing many thumbnails.
4435 * <p>
4436 * This tries to retrieve thumbnails from the cache,
4437 * and returns them if they are there.
4438 * <p>
4439 * Else, if create is true, this tries to create the thumbnails,
4440 * cache them, and return the value. But we won't bother unless
4441 * the main image is fully loaded.
4442 * <p>
4443 * Note that only the read and write of our tn cache is done under lock;
4444 * the thumbnail generation is unlocked and concurrency is restricted,
4445 * if at all, by the handler routine(s).
4446 * <p>
4447 * Partly because this routine is called by our own
4448 * precache routines, we do not regard reading a thumbnail
4449 * as proving user access to the cache
4450 * (exhibit data has to be read for that).
4451 *
4452 * @param create if true, and no thumbnail yet exists, try to
4453 * create one if possible; else if create is false
4454 * only return an existing one and return null if none is to hand
4455 * (or possibly allow fetch of pre-built remote one)
4456 * @param allowSyncFetch if they then allow a synchronous fetch from upstream
4457 *
4458 * @return null if no such exhibit or a transient problem,
4459 * NO_THUMBNAILS if this exhibit type can never have thumbnails
4460 * or it appears impossible for this particular exhibit,
4461 * or else a non-null non-NO_THUMBNAILS value
4462 */
4463 private ExhibitThumbnails _getThumbnails(final Name.ExhibitFull name,
4464 final boolean create,
4465 final boolean allowSyncFetch)
4466 {
4467 // In the simple case where we know that thumbnails
4468 // definitely can't be created (from the file type)
4469 // return the NO_THUMBNAILS value immediately,
4470 // ignoring the create parameter.
4471 // We never cache this (negative) value.
4472 //
4473 // NOTE: in the special case of an empty AEP then return null
4474 // for a non-existent AEP since the empty AEP may be transient.
4475 final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(name));
4476 if((type == null) || (type.handler == null) ||
4477 !type.canPossiblyCreateThumbnailOfSameMIMEType())
4478 { return(ExhibitThumbnails.NO_THUMBNAILS); }
4479 final AllExhibitProperties aep = _AEP;
4480 final ExhibitStaticAttr esa = _AEP.aeid.getStaticAttr(name);
4481 if(esa == null)
4482 { return(null); } // Can't make thumbnail yet...
4483
4484 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... past arg checks"); }
4485
4486 // Get some of our important system state for later.
4487 final GenProps gp = getGenProps(-1L);
4488
4489 // Get a handle on the private in-memory thumbnail cache.
4490 // Read access to this can in practice be highly concurrent.
4491 final SoftReferenceMap<ExhibitFull, Object> _tIM = _thumbnailsInMemory;
4492 // final ConcurrentMap<Name.ExhibitFull, Object> _tIM = _thumbnailsInMemory;
4493
4494 // Check the memory cache to see if it is still valid
4495 // and see if we can find our thumbnails in there.
4496 // Do with a lock held on the table for atomicity.
4497 //
4498 // There are two sorts of things in this table:
4499 // * A mapping from an exhibit name to ExhibitThumbnails
4500 // * A Long with the time that we last tried to fetch the thumbnail
4501 // from the datasource (or generated it locally);
4502 // we should postpone trying again until reasonable time elapses
4503 // to avoid wasting bandwidth and/or CPU cycles.
4504 //
4505 // Don't attempt to fetch from upstream if:
4506 // * We were told not to fetch it.
4507 // * The caller said not to bother creating the thumbnail.
4508 // * It isn't long enough since we last tried to create/fetch it.
4509 // This last case is also used as a crude 'lock'
4510 // to try to avoid concurrent fetched/generation of the same thumbnail.
4511 boolean dontFetchFromSource = !allowSyncFetch || !create;
4512
4513 // OK, try from in-memory cache.
4514 final Object memCachedValue = _tIM.get(name);
4515
4516 if(memCachedValue instanceof Long)
4517 {
4518 // This was the time we last tried to do a fetch/generate.
4519 final long lastTime = ((Long) memCachedValue).longValue();
4520
4521 // If we have not yet waited long enough
4522 // before another fetch/generate attempt,
4523 // then veto any fetch attempt at least.
4524 if(lastTime +
4525 Math.min(1+(_instanceLifems()>>>1), // A new cache is desperate to populate itself...
4526 upstreamStratum.isUpstreamConserving() ? MAX_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS : NORMAL_WAIT_BETWEEN_THUMBNAIL_REPEAT_FETCHES_MS) >
4527 System.currentTimeMillis())
4528 {
4529 dontFetchFromSource = true;
4530 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... negatively cached since "+(new Date(lastTime))+": dontFetchFromSource = true"); }
4531 }
4532 else
4533 {
4534 // OK, passed the wait time, so purge this time value to save space
4535 // (though avoid races with new values being inserted).
4536 _tIM.remove(name);
4537 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... negative cache expired"); }
4538 }
4539 }
4540
4541 // See if we have a cached copy of the thumbnail.
4542 else if(memCachedValue instanceof ExhibitThumbnails)
4543 {
4544 final ExhibitThumbnails tns = (ExhibitThumbnails) memCachedValue;
4545 if(tns != null) // Got it! Excellent!
4546 {
4547 // Note hit from in-memory thumbnail cache...
4548 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHETNMEMHIT);
4549 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... returning from in-memory cache: return " + tns); }
4550
4551 return(tns);
4552 }
4553 // else // Expunge expired reference to save some space (while avoiding races).
4554 // { _tIM.remove(name, memCachedValue); }
4555 }
4556
4557 else { assert(memCachedValue == null) : "incorrectly typed entry in tn cache: " + memCachedValue.getClass().getName(); }
4558
4559 // If we've got the thumbnails in the disc cache
4560 // then return the (disc) cached value
4561 // having saved it to the in-memory cache.
4562 // Fetching from disc cache grabs a read lock on the cache.
4563 try {
4564 final ExhibitThumbnails tns =
4565 metaData.exhibitGetThumbnails(rwl, cacheDir, name, logger);
4566 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... on-disc metadata cache result: " + tns); }
4567
4568 if(tns != null)
4569 {
4570 // Unconditionally save the thumbnails to the in-memory cache.
4571 _tIM.put(name, tns);
4572 // _tIM.put(esa.getExhibitFullName(), TIMSoftReference.create(esa.getExhibitFullName(), tns, _tIM_deadRefQueue));
4573
4574 // Note hit from on-disc thumbnail cache...
4575 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHETNHIT);
4576 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... return " + tns); }
4577
4578 return(tns);
4579 }
4580 }
4581 catch(final IOException e) { } // Ignore.
4582
4583 // Note cache miss if we have gotten this far...
4584 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHETNMISS);
4585
4586 final Long now = new Long(System.currentTimeMillis());
4587 // If we can get it from the underlying data source,
4588 // then return it having saved it to the disc and in-memory caches.
4589 // We *don't* hold any locks in the cache while fetching from upstream.
4590 if(!dontFetchFromSource)
4591 {
4592 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... trying to fetch from upstream"); }
4593
4594 try {
4595 // Try to prevent any concurrent attempt to fetch this thumbnail from upstream...
4596 if((memCachedValue == null) && (!(_tIM.get(name) instanceof ExhibitThumbnails)))
4597 { _tIM.put(name, now); }
4598
4599 ExhibitThumbnails tns = null;
4600 if(ALLOW_TN_FETCH_FROM_PEERS)
4601 {
4602 // Try to fetch extant thumbnails from a peer
4603 // to minimise traffic to the master.
4604 // Since all mirrors/peers should cache all thumbnails once fetched
4605 // (unless very short of cache space indeed)
4606 // then any peer should have what we want
4607 // or should be getting it shortly anyway,
4608 // so this should not induce significant extra traffic overall.
4609 try
4610 {
4611 tns = _getThumbnailsFromPeer(name);
4612 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... from peer: "+tns); }
4613 }
4614 catch(final IOException e) { if(TRACE_THUMBNAIL_ACTIVITY) { e.printStackTrace(); } } // Absorb P2P error; possibly log it.
4615 }
4616 // If the peer gave us nothing at all or an apparent permanent fail
4617 // then try the master for a definitive answer if possible.
4618 // This prevents spurious failures at peers from spreading laterally.
4619 if((null == tns) || ExhibitThumbnails.NO_THUMBNAILS.equals(tns))
4620 {
4621 // Try to get thumbnails from directly upstream
4622 // (since we could not get them from a peer)
4623 // ultimately from the master server.
4624 final long startFetchFromMaster = System.currentTimeMillis();
4625 tns = source.getThumbnails(name, create); // Fetch from upstream.
4626 final long endFetchFromMaster = System.currentTimeMillis();
4627 final long timeTaken = endFetchFromMaster - startFetchFromMaster;
4628 if(ALLOW_TN_FETCH_FROM_PEERS) // Only record successes here for simplicity...
4629 { _updatePeerStats(MASTER_FAKE_TAG, true, timeTaken); }
4630 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... from upstream ("+timeTaken+"ms): "+tns); }
4631 }
4632 // If we retrieved some thumbnails then save them and return them...
4633 if(null != tns)
4634 {
4635 if(TRACE_THUMBNAIL_ACTIVITY && ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) { System.err.println("WARNING: _getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing upstream NO_THUMBNAILS"); }
4636 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing upstream thumbnails in memory and metadata disc caches: " + tns); }
4637
4638 // Unconditionally save in the in-memory cache.
4639 _tIM.put(name, tns);
4640 // _tIM.put(esa.getExhibitFullName(), TIMSoftReference.create(esa.getExhibitFullName(), tns, _tIM_deadRefQueue));
4641
4642 // Now save in the disc cache...
4643 // Writing to disc cache grabs a write lock on the cache.
4644 boolean saved = false;
4645 try { saved = metaData.exhibitSaveThumbnails(rwl, gp, cacheDir, esa, tns, logger, statsIDSCGEN); }
4646 catch(final IOException e) { } // Ignore.
4647
4648 if(TRACE_THUMBNAIL_ACTIVITY_ALL && saved) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... saved upstream thumbnails to metadata disc caches: return " + tns); }
4649
4650 // Return the thumbnails to the caller.
4651 return(tns);
4652 }
4653
4654 // Note failure to fetch by putting off any further tries
4655 // by noting at what time we experienced the failure,
4656 // unless one of the following mechanisms
4657 // manages to overwrite our Long with a real set of thumbnails.
4658 // This is not race-free.
4659 // FIXME: should use shorter timeout for temporary network/congestion issues
4660 // FIXME: verify that any race is harmless...
4661 if((memCachedValue == null) && (!(_tIM.get(name) instanceof ExhibitThumbnails)))
4662 {
4663 _tIM.put(name, now);
4664 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... failed fetch so postponing next attempt"); }
4665 }
4666
4667 // // We make the update race-free in case a concurrent call succeeds.
4668 // // THUS ANY FOLLOWING METHODS MUST UPDATE THE MEMORY CACHE.
4669 // if(memCachedValue == null) { _tIM.putIfAbsent(esa.getExhibitFullName(), now); }
4670 // else { _tIM.replace(esa.getExhibitFullName(), memCachedValue, now); }
4671 }
4672 catch(final IOException e) { logger.equals("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") remote fetch FAILED: " + e.getMessage()); } // Log tersely...
4673 }
4674 // If not allowed to fetch the thumbnail immediately,
4675 // then at least make space in the cache for it (and other missing ones)
4676 // for later (pre)caching if enough space is not currently available...
4677 else if(metaData.isReadWrite() && !metaData.canPrecacheThumbnails(cacheDir) &&
4678 (LocalProps.getWEBSVR_MAX_EX_CACHE_BYTES() > 0)) // We trust the cache-sizing properties?
4679 {
4680 try
4681 {
4682 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... making space for thumbnail for next time"); }
4683
4684 // Make space for thumbnail(s) to be (pre)cached later.
4685 // Make space for several in one go here, for efficiency,
4686 // on the grounds that if we are missing one thumbnail
4687 // then we may well be missing more than one.
4688 metaData.purge(rwl,
4689 gp,
4690 cacheDir,
4691 // Very generous notion of elbow room needed...
4692 (ExhibitThumbnails.MAX_BYTES_EST +
4693 CachedFile.MAX_EMPTY_ENTRY_BYTES_ON_DISC +
4694 MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL +
4695 MAX_TRANSFER_CHUNK_SIZE) << 3,
4696 name,
4697 logger, statsIDSCGEN);
4698 }
4699 catch(final IOException e) { e.printStackTrace(); } // Absorb error but whinge...
4700 }
4701
4702
4703 // Not asked to create thumbnails here,
4704 // so return a null to indicate that
4705 // the requested thumbnails were not immediately to hand.
4706 if(!create)
4707 {
4708 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... NOT trying to create missing thumbnails: return(null)"); }
4709
4710 return(null);
4711 }
4712
4713
4714 ExhibitThumbnails tns = null;
4715 try
4716 {
4717 // Don't try to make the thumbnail
4718 // if a synchronous fetch is not allowed by the caller
4719 // and the exhibit is not fully cached locally,
4720 // or there is too much exhibit data not yet in local cache
4721 // (because it might take an indefinitely long time
4722 // to load a big exhibit across the Net).
4723 final CachedFile exhibitMetadata = metaData.exhibitGetInfo(name);
4724 boolean getRawDataFromOwnCache = true;
4725 if(esa.length > (allowSyncFetch ? MAX_REMOTE_FETCH_TO_MAKE_THUMBNAIL : 0) +
4726 ((null == exhibitMetadata) ? 0 : exhibitMetadata.cachedLength))
4727 {
4728 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... NOT enough data cached locally to create thumbnails (metadata="+exhibitMetadata+")"); }
4729
4730 // Usually only trust/treat our own cache as being 'local'
4731 // to help ensure a reasonably-bounded thumbnail build time for example.
4732 // However, if our upstream source is an ExhibitDataFileSource
4733 // and system power is not currently too tight
4734 // (eg so we can power up underlying bulk storage if necessary)
4735 // then we can consider going directly upstream for data.
4736 final boolean allowRawExhibitDataFromUpstream =
4737 upstreamSourceIsLocal() &&
4738 !GenUtils.mustConservePower();
4739
4740 // If we can't go directly upstream for the missing raw data
4741 // then we have to give up for now and return null.
4742 if(!allowRawExhibitDataFromUpstream) { return(null); }
4743
4744 // Otherwise we can fetch the data directly from upstream
4745 // to (try to) construct the thumbnails.
4746 getRawDataFromOwnCache = false;
4747 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... allowing direct upstream access to get exhibit data to build thumbnail"); }
4748 }
4749
4750 // Block any other attempt at generation of this thumbnail concurrently.
4751 // FIXME: show any remaining race to be harmless...
4752 if((memCachedValue == null) && (!(_tIM.get(name) instanceof ExhibitThumbnails)))
4753 {
4754 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... locking out concurrent creation of this thumbnail"); }
4755
4756 _tIM.put(name, now);
4757 }
4758
4759 // // This value will be overwritten if the thumbnail is generated successfully.
4760 // // We make the update race-free in case a concurrent call succeeds.
4761 // if(memCachedValue == null) { _tIM.putIfAbsent(esa.getExhibitFullName(), now); }
4762 // else { _tIM.replace(esa.getExhibitFullName(), memCachedValue, now); }
4763
4764
4765 // As selected above, fetch raw exhibit data from local cache or upstream.
4766 final AllExhibitProperties.ExhibitDataSource dataSourceTN = getRawDataFromOwnCache ? exhibitDataSource :
4767 (new AllExhibitProperties.ExhibitDataSource(){
4768 @Override public final void getRawFile(final ByteBuffer buf, final ExhibitFull exhibitName, final int position)
4769 throws IOException
4770 { source.getRawFile(buf, exhibitName, position, false); }
4771 /**Assume data is always available immediately from upstream in this case. */
4772 @Override public final boolean isExhibitFullyLoaded(final ExhibitStaticAttr esa)
4773 { return(true); }
4774 });
4775
4776 // Attempt to generate the thumbnails here, not under any visible/blocking lock.
4777 // Claim that we can take as much memory/time (etc) as we need
4778 // ***iff we don't appear to be memory-stressed at the moment to try to avoid OOMEs***
4779 // for at most one thumbnail build out of any concurrent builds;
4780 // others will be vetoed if not enough memory is available.
4781 // We expect a strictly limited number of these concurrently anyway,
4782 // and no user should be blocked waiting for this to finish.
4783 // Without this we may never easily find enough space to build thumbnails on small servers.
4784 // Note: an OOME here is not a disaster.
4785 final boolean isFirst = _gTfirstTNBuildLock.tryLock();
4786 try
4787 {
4788 tns = type.handler.makeThumbnails(
4789 esa,
4790 dataSourceTN,
4791 aep,
4792 isFirst && !MemoryTools.isMemoryStressed());
4793 }
4794 finally { if(isFirst) { _gTfirstTNBuildLock.unlock(); } }
4795
4796 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... result of attempt to makeThumbnails(): " + tns); }
4797 }
4798 catch(final OutOfMemoryError e)
4799 {
4800 // We may not actually be able to catch OOME in practice, but if we can then
4801 // note that OOME is neither unexpected nor disasterous here.
4802 logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... OutOfMemoryError; retry may be possible...");
4803 }
4804 catch(final Exception e)
4805 {
4806 // Absorb errors but whinge...
4807 logger.log("Exception while generating thumbnail during precache for "+esa+" "+e.getMessage());
4808 if(TRACE_THUMBNAIL_ACTIVITY) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... makeThumbnails() FAILED: " + e); }
4809 if(TRACE_THUMBNAIL_ACTIVITY) { e.printStackTrace(); }
4810 }
4811
4812 // Having generated it, cache it if not null.
4813 // This will overwrite the timestamp temporarily put in place to prevent concurrent access.
4814 if(tns != null)
4815 {
4816 if(TRACE_THUMBNAIL_ACTIVITY && ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) { System.err.println("WARNING: _getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing generated NO_THUMBNAILS"); }
4817 if(TRACE_THUMBNAIL_ACTIVITY) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... cacheing generated thumbnails in memory and metadata disc caches: " + tns); }
4818
4819 // Unconditionally save to the in-memory cache.
4820 _tIM.put(name, tns);
4821 // _tIM.put(esa.getExhibitFullName(), TIMSoftReference.create(esa.getExhibitFullName(), result, _tIM_deadRefQueue));
4822
4823 // Save to disc cache.
4824 boolean saved = false;
4825 try { saved = metaData.exhibitSaveThumbnails(rwl, gp, cacheDir, esa, tns, logger, statsIDSCGEN); }
4826 catch(final IOException e) { } // Ignore.
4827
4828 if(TRACE_THUMBNAIL_ACTIVITY_ALL && saved) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... saved generated thumbnails to metadata disc caches: " + tns); }
4829
4830 // Note local creation of thumbnails...
4831 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_EXTHUCREATED);
4832 }
4833
4834 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnails("+name+", "+create+", "+allowSyncFetch+") ... return " + tns); }
4835
4836 return(tns);
4837 }
4838
4839 /**Time of construction. */
4840 private final long consTime = System.currentTimeMillis();
4841
4842 /**How long this instance has been alive in milliseconds.
4843 * Will be non-negative when system clock is monotonic.
4844 */
4845 private long _instanceLifems() { return(System.currentTimeMillis() - consTime); }
4846
4847
4848 /**Returns true iff upstream is local disc so some operations should be cheap without cacheing.
4849 * Can avoid significant redundant effort.
4850 */
4851 private boolean upstreamSourceIsLocal()
4852 { return(source instanceof ExhibitDataFileSource); }
4853
4854 /**A lock to allow only one of concurrent thumbnail builds to assume unlimited resources.
4855 * Other thumbnail may try with whatever memory (etc) remains,
4856 * but at most one gets special privileges.
4857 */
4858 private final ReentrantLock _gTfirstTNBuildLock = new ReentrantLock();
4859
4860 /**Private lock for _getThumbnailsFromPeer() to prevent concurrent P2P thumbnail fetches; never null. */
4861 private final ReentrantLock _gTFP_lock = new ReentrantLock();
4862
4863 /**Attempt to fetch the specified thumbnails from any peer; may be null if currently unavailable.
4864 * This never attempts to force creation of a thumbnail remotely,
4865 * but rather tries to fetch an already-present value.
4866 * <p>
4867 * This updates the P2P stats as if an exhibit-data-block transfer,
4868 * which is reasonable since this only attempts a fetch of data,
4869 * never a create which may take signifiant remote CPU time.
4870 * Note that only an (IO)Exception (not having null returned) is 'failure'.
4871 * <p>
4872 * This may potentially 'loop' between peers consuming resources uselessly
4873 * unless some other mechanism is used to prevent such behaviour.
4874 * However, only one outgoing P2P thumbnail request is allowed at once here,
4875 * which should limit any such problem and resources consumed by it.
4876 *
4877 * @param name full exhibit name; never null
4878 */
4879 private ExhibitThumbnails _getThumbnailsFromPeer(final Name.ExhibitFull name)
4880 throws IOException
4881 {
4882 // No P2P thumbnail request concurrency allowed.
4883 if(!_gTFP_lock.tryLock()) { return(null); }
4884 try
4885 {
4886 // Choose the 'best' peer to try...
4887 final String peerToTry = _pickPeer();
4888 if(TRACE_P2P_ACTIVITY || TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnailsFromPeer("+name+") selected peer: "+peerToTry); }
4889
4890 // If the master was selected rather than any peer
4891 // then give up and return null immediately.
4892 if(!MASTER_FAKE_TAG.equals(peerToTry))
4893 {
4894 if(TRACE_P2P_ACTIVITY || TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_getThumbnailsFromPeer("+name+") attempting fetch from: "+peerToTry); }
4895
4896 // Create temporary tunnel to the selected peer.
4897 final String url = "http://" +
4898 LoadBalancingUtils.makeMirrorNameFromTag(peerToTry) +
4899 CoreConsts.TUNNEL_URI;
4900 // Suppress logging from this short-lived tunnel...
4901 final ExhibitDataHTTPTunnelSource tempTunnel =
4902 new ExhibitDataHTTPTunnelSource(url, peerToTry, GenUtils.nullLogger);
4903 try
4904 {
4905 boolean successful = false;
4906 final long startFetch = System.currentTimeMillis();
4907 final ExhibitThumbnails tns;
4908 try {
4909 // Get extant thumbnails or abort with an IOException...
4910 tns = tempTunnel.getThumbnails(name, false); // No 'create' requested.
4911 successful = true; // Only an exception is 'failure' for these purposes.
4912 }
4913 finally
4914 {
4915 final long endFetch = System.currentTimeMillis();
4916 final long timeTaken = endFetch - startFetch;
4917 _updatePeerStats(peerToTry,
4918 successful,
4919 timeTaken);
4920 if(TRACE_P2P_ACTIVITY || TRACE_THUMBNAIL_ACTIVITY) { logger.log("_getThumbnailsFromPeer("+name+") fetch ("+timeTaken+"ms) from "+peerToTry+(successful?" successful":" FAILED")); }
4921 }
4922 return(tns);
4923 }
4924 finally // Ensure that the tunnel is cleared down.
4925 { tempTunnel.destroy(); }
4926 }
4927 }
4928 finally { _gTFP_lock.unlock(); }
4929
4930 return(null); // Not currently possible...
4931 }
4932
4933 /**Attempt to asynchronously fetch/create thumbnail that we have failed to return to the user.
4934 * We so this to attempt to fetch soon any thumbnail that was recently
4935 * requested but was not immediately available on the grounds that
4936 * it may be needed again soon.
4937 * <p>
4938 * This only uses a strictly limited number of threads,
4939 * but avoids a wait for general precacheing, which may never happen.
4940 *
4941 * @param exhibitName full exhibit name; non-null valid exhibit name
4942 */
4943 private void _asyncTNFetch(final Name.ExhibitFull exhibitName)
4944 {
4945 // Don't bother trying to fetch thumbnails in the background
4946 // for exhibits that cannot have them,
4947 // ie don't waste time queueing requests that we cannot ever satisfy.
4948 final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(exhibitName));
4949 if((type == null) || (type.handler == null) ||
4950 !type.canPossiblyCreateThumbnailOfSameMIMEType())
4951 { return; }
4952
4953 if(TRACE_THUMBNAIL_ACTIVITY_ALL) { logger.log("_asyncTNFetch("+exhibitName+") queued request..."); }
4954
4955 // Put this in the queue of things to be done.
4956 discardableReadAheadTaskThreadPool.submit(new Runnable(){
4957 public final void run()
4958 {
4959 // Always allow a sync fetch from upstream.
4960 // Always allow local thumbnail creation attempt.
4961 _getThumbnails(exhibitName, true, true);
4962 }
4963 });
4964 }
4965
4966 /**Manages our local cache of variables, etc; never null.
4967 * We set this up to be:
4968 * <ul>
4969 * <li><em>Not</em> write-through,
4970 * ie we reduce "set" (write) traffic to the upstream source.
4971 * </ul>
4972 * <p>
4973 *
4974 */
4975 private final PipelineVarMgr varMgr;
4976
4977 /**Variable flush/retrieval interval (ms); strictly positive.
4978 * Based on the allowed distribution latency as centrally defined.
4979 * <p>
4980 * We randomise this a little to help avoid many slaves
4981 * bothering the master simultaneously.
4982 */
4983 private static final int VAR_CACHE_HOLD_TIME_MS = Math.min(30001 + Rnd.fastRnd.nextInt(16384),
4984 (SystemVariables.MAX_VALUE_DISTRIBUTION_LATENCY_MS / 4) + 1);
4985
4986 /**Last time we flushed/fetched variables, private to _handleSysVars().
4987 * We do another flush/fetch when we find this to be null (eg initially)
4988 * or more than VAR_CACHE_HOLD_TIME_MS in the past.
4989 * <p>
4990 * This is volatile so that we do not need to hold a lock to access it.
4991 */
4992 private transient volatile Long _handleSysVars_lastFetch;
4993
4994 /**Last time we saved (any) event histories, private to _handleSysVars(), initially zero.
4995 * This is volatile so that we do not need to hold a lock to access it.
4996 */
4997 private transient volatile long _handleSysVars_evSave;
4998
4999 /**Handle (update, sync, persist) system variables as required.
5000 * Recompute any local values generated by the cache itself,
5001 * flush any outbound values,
5002 * and retrieve any upstream values periodically.
5003 * <p>
5004 * We recompute local variables when we would be prepared to
5005 * flush/fetch system variables.
5006 * <p>
5007 * Timing is handled with volatile values,
5008 * so we do not need to take out any other locks while working.
5009 * <p>
5010 * We rely on the locking within varMgr to ensure consistency,
5011 * including during the save. Bad things may happen if trying to
5012 * remove a cache while we are trying to save event histories!
5013 * <p>
5014 * We also use this to recompute any vote/correlation factors
5015 * and update our notion of this instance's stratum.
5016 *
5017 * @param force if true, force an immediate complete save of state upstream,
5018 * and to disc (if upstream source not already local)
5019 */
5020 private void _handleSysVars(final boolean force)
5021 throws IOException
5022 {
5023 final long now = System.currentTimeMillis();
5024 final Long lastTime = _handleSysVars_lastFetch;
5025 // If still not time for another round of variable updates,
5026 // then return immediately.
5027 if(!force &&
5028 (lastTime != null) &&
5029 (lastTime.longValue() + VAR_CACHE_HOLD_TIME_MS > now))
5030 { return; }
5031
5032 // OK, recompute and set locally-generated variable values.
5033 // 100 - percentage-of-cache-space free over target size, to float precision...
5034 final Float percentUsed = new Float(100f - ((100.0f * metaData.computeFreeSpaceBelowHighWaterMark(cacheDir)) /
5035 metaData.computeTargetHighWaterMark()));
5036 final SimpleVariableValue pU = new SimpleVariableValue(
5037 SystemVariables.ExhibitDataSimpleCache_CACHE_AVAIL_SPACE_PERCENT_USED,
5038 percentUsed);
5039 varMgr.setVariable(pU);
5040 // Exhibit (partially or fully) cached count...
5041 varMgr.setVariable(new SimpleVariableValue(
5042 SystemVariables.ExhibitDataSimpleCache_CACHED_EXHIBIT_COUNT,
5043 new Integer(metaData.size())));
5044 // Exhibit fully-cached-exhibit percentage (do NOT force recomputation here).
5045 final Float percentFullyCached = new Float((100.0f * metaData.getFullyCachedCount(_AEP, false)) /
5046 Math.max(1, _AEP.aeid.length));
5047 final SimpleVariableValue pFC = new SimpleVariableValue(
5048 SystemVariables.ExhibitDataSimpleCache_EXHIBITS_FULLY_CACHED_PERCENT,
5049 percentFullyCached);
5050 varMgr.setVariable(pFC);
5051
5052 // Push locally-cached updates upstream,
5053 // and bring upstream values into our local cache.
5054 // Always request an incremental update.
5055 try { varMgr.syncVariables(false); }
5056 catch(final IOException e)
5057 {
5058 /* Ignore (though log) the error so that events can still be saved... */
5059 logger.log("ExhibitDataSimpleCache: failed to syncVariables() upstream");;
5060 }
5061
5062 // Save updated event histories from time-to-time
5063 // unless upstream source is local/fast.
5064 // Use the form which has bounded space requirements,
5065 // and persist to the cache area, if extant.
5066 // We can probably risk saving at most
5067 // a reasonable fraction of our temporal slackness...
5068 if(!upstreamSourceIsLocal() &&
5069 (cacheDir != null) &&
5070 (force || (_handleSysVars_evSave < now)))
5071 {
5072 final File evhd = new File(cacheDir, EVENT_HISTORY_DIR);
5073 if((evhd != null) &&
5074 (evhd.isDirectory()) &&
5075 (evhd.canWrite()))
5076 {
5077 if(ORG.hd.d.IsDebug.isDebug) { logger.log("[event history save start: "+ System.currentTimeMillis() +".]"); }
5078 // Usually save incrementally so as to be quick,
5079 // Only recording histories with updates since the last save
5080 // and don't leave a trail (eg use bounded disc space).
5081 varMgr.saveEventHistories(evhd, true, false, !force);
5082 if(ORG.hd.d.IsDebug.isDebug) { logger.log("[event history save end: "+ System.currentTimeMillis() +".]"); }
5083 }
5084 else
5085 {
5086 logger.log("[ExhibitDataSimpleCache: WARNING: could not save event history in: " + evhd);
5087 }
5088
5089 // Postpone the next event save.
5090 // Make the wait a small fraction of the default slackness time.
5091 // Postpone the next save significantly longer if this or upstream are (temporarily) conserving power.
5092 _handleSysVars_evSave = System.currentTimeMillis() +
5093 (CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S * 128) +
5094 ((upstreamStratum.isUpstreamConserving() || GenUtils.mustConservePower()) ?
5095 CoreConsts.ASYNC_MIN_POWER_SAVE_NON_CRITICAL_DATA_FLUSH_MS : (CoreConsts.ASYNC_MIN_POWER_SAVE_NON_CRITICAL_DATA_FLUSH_MS >>> 3)) +
5096 (Rnd.fastRnd.nextInt(1 + CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S/13) * 1007);
5097 }
5098
5099 // Having done the vars, etc, try to update the upstream stratum info,
5100 try {
5101 final Stratum stratum = source.getStratum();
5102 if(null != stratum) { upstreamStratum = stratum; } // Belt and braces, though null shouldn't happen!
5103 }
5104 catch(final IOException e)
5105 {
5106 // Note the failure to fetch a current value.
5107 upstreamStratum = Stratum.UNKNOWN;
5108 logger.log("ExhibitDataSimpleCache: failed to getStratum() upstream");;
5109 }
5110
5111 // Note successful finish.
5112 _handleSysVars_lastFetch = new Long(System.currentTimeMillis());
5113 }
5114
5115 /**Set variable.
5116 * Set local cached value immediately;
5117 * store global values to periodically propagate upstream to master
5118 * but show last global values obtained from master on periodic poll.
5119 *
5120 * @throws java.lang.IllegalArgumentException on attempt to:
5121 * set variable with value of wrong type or incompatible definition,
5122 * set non-existent or read-only variable (or these can be ignored)
5123 */
5124 public void setVariable(final SimpleVariableValue newValue)
5125 throws IOException
5126 {
5127 varMgr.setVariable(newValue);
5128 }
5129
5130 /**Update number of variables at once for efficiency.
5131 * Is passed a Set of SimpleVariableValues and behaves as if it
5132 * operates on all of them by calling setVariable() for each item
5133 * in the Set.
5134 * <p>
5135 * This implementation "fails fast" on the first error.
5136 * <p>
5137 * This implementation never throws an IOException.
5138 *
5139 * @throws java.lang.IllegalArgumentException on attempt to:
5140 * set variable with value of wrong type or incompatible definition,
5141 * set non-existent or read-only variable (or these can be ignored)
5142 */
5143 public int setVariables(final SimpleVariableValue[] newValues)
5144 throws IOException
5145 {
5146 return(varMgr.setVariables(newValues));
5147 }
5148
5149 /**Get a single variable value; returns null if no such value or wrong type.
5150 * Always get from local cache.
5151 * <p>
5152 * This implementation never throws an IOException.
5153 */
5154 public SimpleVariableValue getVariable(final SimpleVariableDefinition var)
5155 {
5156 return(varMgr.getVariable(var));
5157 }
5158
5159 /**Get immutable Set of variable values altered on or after a given time, or all for -1.
5160 * Always get from local cache
5161 * (the variable cache being periodically updated from the master).
5162 * <p>
5163 * This may be slow if there are many live variables.
5164 * <p>
5165 * This implementation never throws an IOException.
5166 */
5167 public SimpleVariableValue[] getVariables(final long changedSince)
5168 {
5169 return(varMgr.getVariables(changedSince));
5170 }
5171
5172 /**Get the current partial, or previous full, event set at the specified interval; never returns null.
5173 * This is a simplified interface to return either the current event set
5174 * that is being collected, or the previous completed set.
5175 * <p>
5176 * The current set is the most timely, but may not contain enough data
5177 * to be meaningful if the new interval has just started.
5178 * <p>
5179 * The previous set is complete and thus most likely to have enough samples
5180 * to be useful, but is not completely current.
5181 * <p>
5182 * If the requested event set is not (immediately) available,
5183 * an empty synthetic one is created and returned.
5184 * Thus, with this interface, it is not possible to distinguish between
5185 * there being no events in the given interval or simply no data.
5186 * <p>
5187 * TODO: This attempts to limit the amount of time that may be spent blocking,
5188 * eg due to upstream I/O issues,
5189 * but its ability to do so may depend on availability of threads, etc.
5190 *
5191 * @param def event definition (must be for an event); never null
5192 * @param intervalSelector never null
5193 * @param current if true the current event set is returned,
5194 * else the previous complete set is returned
5195 *
5196 * @return requested event set; never null
5197 *
5198 * @throws IllegalArgumentException if the request arguments are invalid
5199 */
5200 public EventVariableValue getEventValue(final SimpleVariableDefinition def,
5201 final EventPeriod intervalSelector,
5202 final boolean current)
5203 {
5204 return(varMgr.getEventValue(def, intervalSelector, current));
5205 }
5206
5207 /**Get the specified event sets for the specified intervals; never null.
5208 * This allows retrieval of zero or more event sets for the specified
5209 * interval size.
5210 * <p>
5211 * Requests for more than SystemVariables.EVENT_SAMPLES_RETAINED in the
5212 * past (or for the future!) cannot be satisfied and data will not be
5213 * returned for them.
5214 * <p>
5215 * Usually not more than SystemVariables.EVENT_SAMPLES_RETAINED samples
5216 * will be returned in response to any one request as a safety measure.
5217 * <p>
5218 * (An implementation that is not an end-point may go upstream to fetch
5219 * missing values and cache them to satisfy future requests.)
5220 *
5221 * @param def event definition (must be for an event); never null
5222 * @param intervalSelector never null
5223 * @param intervalNumber a time (as from System.currentTimeMillis())
5224 * which identifies the first interval for which data is potentially
5225 * required; if too far in the past or future then possibly no data
5226 * will be available,
5227 * zero is used to select the "all" bucket
5228 * @param whichValues each true bit represents a slot for which data is
5229 * required, bit 0 indicating data from the slot within which
5230 * firstIntervalTime is located, bit 1 the previous slot, etc
5231 *
5232 * @return as many of the requested values as available,
5233 * at least long enough to return all the available values,
5234 * with [0] corresponding to bit 0 in the BitSet;
5235 * may contain nulls or be zero-length but is never null
5236 */
5237 public EventVariableValue[] getEventValues(final SimpleVariableDefinition def,
5238 final EventPeriod intervalSelector,
5239 final long intervalNumber,
5240 final BitSet whichValues)
5241 { return(varMgr.getEventValues(def, intervalSelector, intervalNumber, whichValues)); }
5242
5243 /**Synchronise with upstream values.
5244 * Pushes updated values upstream to the source,
5245 * calls sync on the source with the same "force" argument,
5246 * and then retrieves changed values from upstream.
5247 * <p>
5248 * Holds no externally-visible locks,
5249 * but if called by multiple threads this will serialise the calls.
5250 *
5251 * @param force if true, this will force a full sync on the read side
5252 * by using getVariables(-1) rather than attempting to choose a
5253 * nearer timestamp for efficiency;
5254 * the implementation is at liberty to use getVariables(-1)
5255 * at any time whatever the argument value,
5256 * and almost certainly should use it on the first call
5257 *
5258 * @throws java.io.IOException if one is received from upstream
5259 */
5260 public void syncVariables(final boolean force)
5261 throws IOException
5262 { varMgr.syncVariables(force); }
5263
5264
5265 /**Flag to adjust the aggressiveness of the cache; by default not aggressive.
5266 * Aggressive cacheing may include read-ahead, and fetching exhibits
5267 * or at least some leading portion of them to keep the cache full
5268 * or at least primed with exhibits to improve the user experience.
5269 * <p>
5270 * This can be set false when the system is overloaded to eliminate most
5271 * effort not strictly necessary.
5272 * <p>
5273 * Volatile to eliminate the need for locking.
5274 */
5275 private volatile boolean _aggressive = LocalProps.fastStartMode();
5276
5277 /**Set the aggressiveness of the cache; by default not aggressive.
5278 */
5279 public void setAggressive(final boolean isAggressive)
5280 { _aggressive = isAggressive; }
5281
5282
5283 /**Get requested Properties selected by key and versionID.
5284 * Fetches a Properties set unconditionally (versionID == -1)
5285 * else if the versionID presented is not current.
5286 *
5287 * @param key selector (with possible embedded sub-key)
5288 * for desired properties set; never null
5289 * @param versionID if -1 then map is always returned if available,
5290 * else must be non-negative and null is returned if the versionID
5291 * presented matches that of the current version
5292 * (ie if the caller has presumably got the up-to-date version);
5293 * may be a timestamp or a hash or other value,
5294 * and by convention is zero only for an empty properties set
5295 *
5296 * @return null, or Properties map guaranteed to contain only
5297 * String keys and values
5298 */
5299 public java.util.Properties getProperties(final PropsKey key,
5300 final long versionID)
5301 throws IOException
5302 {
5303 return(source.getProperties(key, versionID)); // FIXME: needs cacheing.
5304 }
5305
5306
5307 /**Poll periodically (of the order of a second) to do cache maintenance.
5308 * We keep the poll up to date to keep the work out of a servlet
5309 * response; the data retrieval might easily take a long time...
5310 * <p>
5311 * This routine takes care of calling the upstream poll().
5312 * <p>
5313 * We have to be careful about not restricting servlet callers'
5314 * concurrency here... We try not to do fetches from the back-end,
5315 * which may be very slow, with the instance lock, which would shut
5316 * out all foreground users needlessly.
5317 * <p>
5318 * We ignore the caller's GenProps and fetch and cache our own...
5319 */
5320 public void poll(final GenProps _gp)
5321 throws IOException
5322 {
5323 // Last error encountered...
5324 IOException lastErr = null;
5325
5326 // Do any upstream work...
5327 try { source.poll(genProps); }
5328 catch(final IOException e) { lastErr = e; }
5329
5330 // Make sure that the sysprops cache is not stale.
5331 try { _getGenProps(); }
5332 catch(final IOException e) { lastErr = e; }
5333
5334 // Make sure that the generic-security-props cache is not stale.
5335 try { _getGenSecProps(); }
5336 catch(final IOException e) { lastErr = e; }
5337
5338 // Make sure that the immutable-attr cache is not stale.
5339 /* try */ { _getAllExhibitProperties(); }
5340 // catch(final IOException e) { lastErr = e; }
5341
5342 // Do any incremental precacheing.
5343 /* try */ { _doPreCache(genProps); }
5344 // catch(final IOException e) { lastErr = e; }
5345
5346 // Check our cache meta-data against the filesystem.
5347 /* try */ { _checkMetaData(); }
5348 // catch(final IOException e) { lastErr = e; }
5349
5350 // Save our cache meta-data if we need to.
5351 /* try */ { _cleanAndSaveMetaData(false); }
5352 // catch(final IOException e) { lastErr = e; }
5353
5354 // Handle system variables if necessary...
5355 try { _handleSysVars(false); }
5356 catch(final IOException e) { lastErr = e; }
5357
5358 // Incrementally purge the thumbnail image cache.
5359 _thumbnailsInMemory.compact();
5360
5361 // Update any vote/correlation values as required
5362 // (if not (temporarily) conserving power here or upstream).
5363 // Entirely ignore IOExceptions that are usually benign.
5364 if(!upstreamStratum.isUpstreamConserving() && !GenUtils.mustConservePower())
5365 {
5366 ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){
5367 public final void run()
5368 {
5369 try { _AEP.updateVoteCache(varMgr, false); }
5370 catch(final IOException e) { }
5371 }
5372 });
5373 }
5374
5375 // Log if the cache seems to be busy...
5376 final int ql = rwl.getQueueLength();
5377 if(ql > 0)
5378 { logger.log("[ExhibitDataSimpleCache: busy: threads queued for lock: "+ql+".]"); }
5379
5380 // Rethrow any subordinate error.
5381 // However, trim simple master-not-in-service errors.
5382 if(lastErr != null)
5383 {
5384 if(ORG.hd.d.IsDebug.isDebug) { lastErr.printStackTrace(); }
5385
5386 if(!(lastErr instanceof PGMasterNotInServiceException))
5387 { throw lastErr; }
5388 else
5389 { logger.log("PGMasterNotInServiceException: " + lastErr.getMessage()); }
5390 }
5391 }
5392
5393 /**Our record of the current GenProps; never null.
5394 * Maintained by poll() under the instance lock.
5395 * <p>
5396 * Is volatile so can be accessed without a lock.
5397 */
5398 private volatile GenProps genProps = new GenProps(); // Start with defaults.
5399
5400
5401
5402
5403 /**If true, we are in purge mode, zapping files before answering each query.
5404 * Set true when we hit or exceed the cache size limit,
5405 * set false when we drop below the cache low-water mark.
5406 * <p>
5407 * This must be accessed only under the instance lock,
5408 * and only by _getRawFileViaPersistentCache().
5409 */
5410 //private boolean discCachePurgeMode; // Initially false.
5411
5412 /**Object representing one (partially) cached file on disc.
5413 * This is immutable, serialisable, and totally ordered.
5414 * <p>
5415 * The natural sort order is LRU (Least-Recently Used)
5416 * first, with ties (which should be very rare) broken
5417 * by increasing cached length (ignoring thumbnails, etc)
5418 * (to represent the cost expended getting the data to cache),
5419 * and then exhibit name.
5420 * <p>
5421 * This carries the minimum that need be known about each
5422 * file on disc.
5423 * <p>
5424 * The item is immutable to avoid causing difficulties when
5425 * already inserted in a sorted collection.
5426 * <p>
5427 * We store the timestamp of the original source exhibit file;
5428 * if it doesn't match the current
5429 * exhibit timestamp then this cache entry and its cached data
5430 * has to be regarded as invalidated and discarded.
5431 */
5432 private static final class CachedFile implements Comparable<CachedFile>,
5433 Serializable, ObjectInputValidation,
5434 Cloneable
5435 {
5436 /**The official name of the exhibit; never null and always a valid exhibit name. */
5437 final Name.ExhibitFull name;
5438
5439 /**The master-exhibit timestamp; strictly positive. */
5440 final long timestamp;
5441
5442 /**The initial portion cached on disc (bytes); never negative. */
5443 final long cachedLength;
5444
5445 /**The time last written or read; non-negative.
5446 * May be zero if no exhibit data is cached.
5447 */
5448 public long getLastAccessed()
5449 {
5450 return(lastAccessed);
5451 }
5452
5453 /**The time last written or read; non-negative.
5454 * We will use setLastModified(System.currentTimeMillis())
5455 * to enforce our notion of `now', since the filesystem
5456 * may be mounted from a remote server with a different
5457 * clock to ours.
5458 * <p>
5459 * We only do the setLastModified() when writing to the
5460 * filesystem anyway, or in the special case of
5461 * reading the first byte of an exhibit. We do this to
5462 * save (synchronous) disc traffic updating timestamps
5463 * when in fact this should all be in the metadata
5464 * and is only needed if that is lost for some reason.
5465 * <p>
5466 * Logically final, but made mutable for touchedEntry()
5467 * to be able to overwrite in a cloned copy.
5468 * <p>
5469 * May be zero if only metadata but no exhibit data is present.
5470 */
5471 /* final */ long lastAccessed;
5472
5473 /**The size of the serialised thumbnail object, zero if none; never negative. */
5474 final int tnBytes;
5475
5476
5477 /**Creates a raw entry given all the data about the entry.
5478 * The name must be a syntactically-correct exhibit name.
5479 * <p>
5480 * Should not be called directly from outside the class.
5481 */
5482 private CachedFile(final Name.ExhibitFull _name,
5483 final long _ts,
5484 final long _len,
5485 final long _lastA,
5486 final int _tnBytes)
5487 throws IllegalArgumentException
5488 {
5489 name = _name;
5490 timestamp = _ts;
5491 cachedLength = _len;
5492 lastAccessed = _lastA;
5493 tnBytes = _tnBytes;
5494
5495 // Verify object state.
5496 try { validateObject(); }
5497 catch(final InvalidObjectException e)
5498 { throw new IllegalArgumentException(e.getMessage()); }
5499 }
5500
5501 /**Returns true if this exhibit's metadata is essentially equivalent to another one.
5502 * This insists that everything but the last-accessed timestamp is correct.
5503 * <p>
5504 * The result is false if the argument is null.
5505 */
5506 boolean isEquivalent(final CachedFile other)
5507 {
5508 if(other == null) { return(false); }
5509 return(name.equals(other.name) &&
5510 (timestamp == other.timestamp) &&
5511 (cachedLength == other.cachedLength) &&
5512 (tnBytes == other.tnBytes));
5513 }
5514
5515 /**Do we have thumbnails?
5516 * True if the thumbnails file is non-zero length.
5517 * Does not prove that we can load and/or use the thumbnail
5518 * file even if this returns true.
5519 */
5520 boolean hasThumbnails()
5521 { return(tnBytes != 0); }
5522
5523 /**Returns thumbnails; never null.
5524 * In case of difficulty, eg if thumbnails do not exist
5525 * or cannot be deserialised, throws IOException.
5526 * <p>
5527 * We're fairly dumb about this,
5528 * assuming that all aurgument have been validated.
5529 */
5530 final ExhibitThumbnails getThumbnails(final File cacheDir)
5531 throws IOException
5532 {
5533 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5534 final File dataFileDir = new File(dataFile.getParent());
5535 final String fileComponent = dataFile.getName();
5536 final File tnFile = new File(dataFileDir,
5537 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5538 return((ExhibitThumbnails) FileTools.deserialiseFromFile(tnFile, false));
5539 }
5540
5541 /**Return data chunk from cached file in the result argument.
5542 * This file must live in the usual place, in CACHE_EXDATA_DIR,
5543 * and the name must be a syntactically-correct exhibit name.
5544 * <p>
5545 * We touch the file (set its stamp to `now') to show that it
5546 * has been used iff we retrieve the first byte otherwise
5547 * we rely on the in-memory metadata to hold the last access time;
5548 * this is a belt-and-braces measure in case we have to reconstruct
5549 * the metadata from the disc cache for some reason.
5550 * <p>
5551 * This will throw an IOException or IllegalArgumentException
5552 * if it cannot find the file or there is some other problem.
5553 * <p>
5554 * We're fairly dumb about this, assuming that everything has
5555 * been checked elsewhere.
5556 * <p>
5557 * If the read length is zero the buffer may be null.
5558 *
5559 * @param quick be as fast as possible, eg don't update timestamps
5560 */
5561 CachedFile getCachedData(final File cacheDir,
5562 final int start,
5563 final ByteBuffer buf,
5564 final boolean quick)
5565 throws IOException
5566 {
5567 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5568
5569 RandomAccessFile raf = null;
5570 try
5571 {
5572 raf = new RandomAccessFile(dataFile, "r");
5573 raf.getChannel().read(buf, start);
5574 if(quick) { return(this); }
5575 final long now = System.currentTimeMillis();
5576 if(start == 0) { dataFile.setLastModified(now); } // Enforce our clock's `now'.
5577 return(touchedEntry(now));
5578 }
5579 finally
5580 {
5581 if(raf != null) { raf.close(); }
5582 }
5583 }
5584
5585
5586 /**Recovers the actual on-disc cached exhibit prefix length, or 0 if not present.
5587 * Can be used to double-check the on-disc cached data length before
5588 * adjusting it, but may be slow since it requires a real disc access.
5589 * Use sparingly.
5590 * <p>
5591 * All arguments must be validated and safe before calling this routine;
5592 * for speed it does no further validation.
5593 *
5594 * @param cacheDir non-null directory containing cache
5595 * @param exhibitName name of exhibit; syntactically valid, non-null
5596 *
5597 * @return length cached on disc; 0 if no data cached for this exhibit
5598 */
5599 static long getCachedDataLength(final File cacheDir,
5600 final Name.ExhibitFull exhibitName)
5601 {
5602 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), exhibitName.toString());
5603 return(dataFile.length());
5604 }
5605
5606 /**Fixes-up in-memory cached meta-data for exhibit with that from disc.
5607 * Reconstructs a set of details for the exhibit from disc,
5608 * makes sure that the record of the cached length and timestamps is OK,
5609 * and returns a new record if not (else the original is returned).
5610 * <p>
5611 * In particular this returns a new item if:
5612 * <ul>
5613 * <li>this record of the cached prefix length is wrong, or
5614 * <li>the timestamp in this record is older than that recovered, or
5615 * <li>the record of cached thumbnail data is wrong.
5616 * </ul>
5617 * <p>
5618 * This instance is not altered, nor is the disc-based data.
5619 * <p>
5620 * TODO: Possibly discard all cached data for an exhibit with a changed timestamp.
5621 *
5622 * @param rec the meta-data as reconstructed from disc; never null
5623 */
5624 CachedFile fixup(final CachedFile rec,
5625 final SimpleLoggerIF logger)
5626 {
5627 if(!name.equals(rec.name))
5628 { throw new IllegalArgumentException(); }
5629
5630 // If something is wrong with this meta-data
5631 // then we must do a fixup.
5632 final boolean lengthWrong = (rec.cachedLength != cachedLength);
5633 if(lengthWrong ||
5634 (rec.lastAccessed > lastAccessed) || /* old timestamp. */
5635 (rec.tnBytes != tnBytes))
5636 {
5637 // Warn if the problem was cached-data-length,
5638 // since it might indicate significant lost data.
5639 if(lengthWrong) { logger.log("[ExhibitDataSimpleCache: WARNING: doing fix-up because in-memory ("+cachedLength+") and on-disc ("+rec.cachedLength+") cached length meta-data differ for "+name+".]"); }
5640
5641 // Return fixed-up value...
5642 return(new CachedFile(name, timestamp,
5643 rec.cachedLength,
5644 Math.max(rec.lastAccessed, lastAccessed),
5645 rec.tnBytes));
5646 }
5647
5648 // This instance does not need fixing up, so return as-is.
5649 return(this);
5650 }
5651
5652 /**Recovers an approximate entry for an extant cached file; never null.
5653 * This file must live in the usual place, in CACHE_EXDATA_DIR,
5654 * and the name must be a syntactically-correct exhibit name.
5655 * <p>
5656 * This will throw an IOException or IllegalArgumentException
5657 * if it cannot find the file or other data that it needs.
5658 * <p>
5659 * This does not adjust the in-memory records,
5660 * nor alter anything on disc.
5661 *
5662 * @return synthesised cache record; never null
5663 */
5664 static CachedFile recoverExtantCachedFileDetails(final File cacheDir,
5665 final Name.ExhibitFull exhibitName)
5666 throws IOException
5667 {
5668 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), exhibitName.toString());
5669 final File dataFileDir = new File(dataFile.getParent());
5670 final String fileComponent = dataFile.getName();
5671 final File stampFile = new File(dataFileDir,
5672 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TIMESTAMP_KW + '.' + fileComponent);
5673 final File tnFile = new File(dataFileDir,
5674 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5675
5676 return(new CachedFile(exhibitName,
5677 // Read the contents of the timestamp file
5678 // as a decimal signed long.
5679 // (Note that this file need not be read again since it
5680 // is assumed not to change and so the in-memory value
5681 // can be used instead.)
5682 // We take a missing timestamp file to indicate
5683 // potential data corruption.
5684 Long.parseLong(FileTools.readTextFile(stampFile).trim(), 10),
5685
5686 dataFile.length(), // Must be present, so must be >= 0.
5687
5688 // We take the newest of the last-modified times of
5689 // the exhibit data and the thumbnails (if present)
5690 // as our best approximation to the last-accessed time.
5691 // (Since java.io.File does not have a lastAccessed() member
5692 // or equivalent.)
5693 Math.max(dataFile.lastModified(),
5694 tnFile.lastModified()),
5695
5696 // A thumbnail file may be present,
5697 // in which case we need its size,
5698 // else we record 0.
5699 Math.max(0, (int) tnFile.length()) // Thumbnail file, if extant.
5700 ));
5701 }
5702
5703 /**If true, touchedEntry() used clone() rather than a constructor. */
5704 private static final boolean _TE_USE_CLONE = true;
5705
5706
5707 /**Make touched cache file entry.
5708 * This returns a new object identical to the old (this)
5709 * one except that the last-accessed stamp is set to the current time of day.
5710 * <p>
5711 * This does not alter anything on disc.
5712 */
5713 CachedFile touchedEntry()
5714 { return(touchedEntry(System.currentTimeMillis())); }
5715
5716 /**Make touched cache file entry.
5717 * This returns a new object identical to the old (this)
5718 * one except that the last-accessed stamp is set to the
5719 * time of day passed in (usually the current time of day).
5720 * <p>
5721 * This does not alter anything on disc.
5722 * <p>
5723 * This could construct a new object,
5724 * but to save time and avoid the heavy lifting in the constructor,
5725 * this uses clone().
5726 *
5727 * @param newLastAccessedTimestamp time to make touched version of descriptor with,
5728 * strictly positive
5729 */
5730 CachedFile touchedEntry(final long newLastAccessedTimestamp)
5731 {
5732 assert(newLastAccessedTimestamp > 0);
5733
5734 if(!_TE_USE_CLONE)
5735 {
5736 // Validate arguments via the constructor.
5737 return(new CachedFile(name,
5738 timestamp, cachedLength,
5739 newLastAccessedTimestamp,
5740 tnBytes));
5741 }
5742
5743 // To try to keep this frequent operation as quick as possible,
5744 // knowing that we are overriding just one value,
5745 // we use clone() rather than constructing a new object.
5746 // This avoids redundantly revalidating all the fields.
5747 else
5748 {
5749 assert(newLastAccessedTimestamp > 0);
5750
5751 final CachedFile result = cloneMe();
5752 result.lastAccessed = newLastAccessedTimestamp;
5753 return(result);
5754 }
5755 }
5756
5757 /**Clone the object; does not throw CloneNotSupportedException. */
5758 private CachedFile cloneMe()
5759 {
5760 try { return((CachedFile) super.clone()); }
5761 catch(final CloneNotSupportedException e) { throw new Error(); } // Cannot happen.
5762 }
5763
5764 /**Make a new file on disc and return its CachedFile.
5765 * This is passed the case base dir, the name of the exhibit
5766 * to have a cache entry created, and the timestamp of the
5767 * master exhibit itself.
5768 * <p>
5769 * No zero-length cache entry is created.
5770 * <p>
5771 * No thumbnail is created; any extant one is expunged.
5772 * <p>
5773 * This does not adjust the in-memory records.
5774 */
5775 static CachedFile makeNewDiscCacheFile(final File cacheDir,
5776 final Name.ExhibitFull name,
5777 final long exhibitStamp)
5778 throws IOException
5779 {
5780 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5781 final File dataFileDir = new File(dataFile.getParent());
5782 final String fileComponent = dataFile.getName();
5783 final File stampFile = new File(dataFileDir,
5784 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TIMESTAMP_KW + '.' + fileComponent);
5785 final File tnFile = new File(dataFileDir,
5786 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5787
5788 // Expunge any extant files.
5789 dataFile.delete();
5790 tnFile.delete();
5791 stampFile.delete();
5792
5793 // Make sure that any parent directories exist...
5794 dataFileDir.mkdirs();
5795
5796 // Attempt to write the (signed decimal) timestamp file.
5797 PrintWriter pw = null;
5798 try {
5799 pw = new PrintWriter(new FileWriter(stampFile));
5800 pw.println(exhibitStamp);
5801 }
5802 finally
5803 {
5804 if(pw != null) { pw.close(); }
5805 }
5806
5807 final long now = System.currentTimeMillis();
5808
5809 // // Write a zero-length data file, and set its timestamp
5810 // // to our notion of `now' rather than the fileserver's.
5811 // // Ignore any error report for now...
5812 // dataFile.createNewFile();
5813 // dataFile.setLastModified(now);
5814
5815 return(new CachedFile(name,
5816 exhibitStamp,
5817 0, // No data cached...
5818 now,
5819 0)); // No thumbnail.
5820 }
5821
5822
5823 /**Extend given cache file on disc and return new CachedFile.
5824 * This is passed the cache base dir, the name of the exhibit
5825 * to have a cache entry created, the start point of the new data
5826 * and the data itself, which must be more than zero length.
5827 * <p>
5828 * Note that though the new data should normally be exactly at the end
5829 * of the existing data,
5830 * it is not an error, though probably inefficient,
5831 * to start writing before the end of the existing data,
5832 * since that indicates wasted effort re-cacheing data we already have.
5833 * <p>
5834 * It <em>is</em> an error to start writing at a point beyond the end
5835 * of the existing cached data since we don't support "sparse" data
5836 * and on some operating systems (eg UNIX) the gaps would be
5837 * filled with zeros which we probably don't want.
5838 * <p>
5839 * If there is an overlap with the existing data,
5840 * the old (overlapped) data is overwritten silently with the new.
5841 * This may allow us to update parts in-situ to fix errors,
5842 * and to silently handle partly-overlapping concurrent updates.
5843 * <p>
5844 * The cache file is extended with (the non-overlapping part of)
5845 * the given data.
5846 * <p>
5847 * This does not adjust the in-memory records.
5848 * <p>
5849 * This must only be called when other cache write activity
5850 * (such as removing entries) is locked out
5851 * to avoid possible file corruption.
5852 */
5853 CachedFile extendCacheFile(final File cacheDir,
5854 final long dataStart,
5855 final ByteBuffer data)
5856 throws IOException
5857 {
5858 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5859
5860 if(dataStart < 0)
5861 { throw new IllegalArgumentException("dataStart must be non-negative"); }
5862 if(dataStart > cachedLength)
5863 { throw new IllegalArgumentException("dataStart must be no later than existing cachedLength"); }
5864 if(data == null)
5865 { throw new IllegalArgumentException("data buffer must not be null"); }
5866 final int len = data.remaining();
5867 if(len < 1)
5868 { throw new IllegalArgumentException("data length must be greater than zero"); }
5869
5870 // Double-check that data on disc is as long as we think that it is.
5871 final long actualLenBefore = dataFile.length();
5872 if(dataStart > actualLenBefore)
5873 { throw new IOException("cannot leave gaps in cached data; actual cached data is shorter than expected"); }
5874
5875 RandomAccessFile raf = null;
5876 try
5877 {
5878 raf = new RandomAccessFile(dataFile, "rw");
5879 // raf.seek(dataStart);
5880 // raf.write(data);
5881 raf.getChannel().write(data, dataStart);
5882 if(data.remaining() != 0) { throw new IOException("did not write all requested data"); }
5883 final long now = System.currentTimeMillis();
5884 dataFile.setLastModified(now); // Enforce our clock's `now'.
5885 return(new CachedFile(name, timestamp, Math.max(actualLenBefore, dataStart + len), now, tnBytes));
5886 }
5887 finally
5888 {
5889 if(raf != null) { raf.close(); }
5890 }
5891 }
5892
5893 /**Remove any thumbnail file and return a new in-memory cache entry.
5894 * This just zaps any thumbnail file and sets the thumbnail length to zero.
5895 * <p>
5896 * This does not mark the entry as updated on disc nor in the new
5897 * record.
5898 * <p>
5899 * This does not adjust the in-memory records itself.
5900 */
5901 CachedFile zapThumbnails(final File cacheDir)
5902 //throws IOException
5903 {
5904 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5905 final File dataFileDir = new File(dataFile.getParent());
5906 final String fileComponent = dataFile.getName();
5907 final File tnFile = new File(dataFileDir,
5908 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5909 tnFile.delete();
5910 return(new CachedFile(name, timestamp, cachedLength, lastAccessed, 0));
5911 }
5912
5913 /**Remove any exhibit data and return a new in-memory cache entry.
5914 * This just zaps any exhibit data file
5915 * and records the cached length as zero in the returned value.
5916 * <p>
5917 * This attempts to force a deletion even if the file is not obviously present,
5918 * just to make best efforts to purge it,
5919 * and we whinge (on System.err) if we cannot make it go away.
5920 * <p>
5921 * This does not mark the entry as updated on disc nor in the new record.
5922 * <p>
5923 * This does not adjust the in-memory records itself.
5924 */
5925 CachedFile zapData(final File cacheDir)
5926 //throws IOException
5927 {
5928 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5929 final boolean dfExists = dataFile.exists();
5930 final boolean dfDeleted = dataFile.delete();
5931 if(dfExists && !dfDeleted && dataFile.exists()) { System.err.println("ERROR: ExhibitDataSimpleCache: unable to zapData() for: "+name); }
5932 return(new CachedFile(name, timestamp, 0, lastAccessed, tnBytes));
5933 }
5934
5935 /**Add a thumbnail file and return a new in-memory cache entry.
5936 * This is not allowed if we already think we have a
5937 * thumbnail or if there is a thumbnail file already on disc.
5938 * <p>
5939 * The thumbnail argument must be non-null.
5940 * <p>
5941 * This does not mark the entry as updated on disc or in the new
5942 * record.
5943 * <p>
5944 * This does not adjust the in-memory records itself.
5945 */
5946 CachedFile saveThumbnails(final File cacheDir,
5947 final ExhibitThumbnails tns)
5948 throws IOException
5949 {
5950 if(tns == null)
5951 { throw new IllegalArgumentException(); }
5952
5953 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
5954 final File dataFileDir = new File(dataFile.getParent());
5955 final String fileComponent = dataFile.getName();
5956 final File tnFile = new File(dataFileDir,
5957 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
5958
5959 if(hasThumbnails() || tnFile.exists())
5960 { throw new IOException("thumbnails already exist"); }
5961
5962 FileTools.serialiseToFile(tns, tnFile, false, true);
5963 final long now = System.currentTimeMillis();
5964 dataFile.setLastModified(now); // Enforce our clock's `now'.
5965 final int tnBytesNew = (int) tnFile.length();
5966 if(tnBytesNew <= 0)
5967 {
5968 tnFile.delete();
5969 throw new IOException("error saving thumbnail");
5970 }
5971 return(new CachedFile(name, timestamp, cachedLength, now, tnBytesNew));
5972 }
5973
5974
5975
5976
5977 /**Equality relies on comparison, ie on last-access time, cached length and name.
5978 */
5979 @Override
5980 public final boolean equals(final Object o)
5981 {
5982 if(!(o instanceof CachedFile)) { return(false); }
5983 return(compareTo((CachedFile) o) == 0);
5984 }
5985
5986 /**The hash depends on the timestamp.
5987 * Two objects can only be equal if their timestamps and cached lengths are.
5988 */
5989 @Override
5990 public final int hashCode()
5991 {
5992 return(((int) lastAccessed) ^ ((int) cachedLength));
5993 }
5994
5995 /**Total ordering: oldest first, then smallest first, then by name.
5996 */
5997 public final int compareTo(final CachedFile other)
5998 {
5999 // Oldest first, primarily.
6000 if(lastAccessed < other.lastAccessed) { return(-1); }
6001 if(lastAccessed > other.lastAccessed) { return( 1); }
6002
6003 // Then smallest first.
6004 if(cachedLength < other.cachedLength) { return(-1); }
6005 if(cachedLength > other.cachedLength) { return( 1); }
6006
6007 // Then sort by name to break ties, ie provide a total ordering.
6008 return(TextUtils.compare(name, other.name));
6009 }
6010
6011 /**Rough estimate of maximum space required for new empty cache entry.
6012 * This assumes a maximum-length exhibit name and no data nor thumbnail
6013 * for the exhibit.
6014 * <p>
6015 * This should approximately match what calcDiscSpace() should
6016 * produce for such an exhibit, but the values may not be
6017 * exactly the same.
6018 */
6019 static final int MAX_EMPTY_ENTRY_BYTES_ON_DISC = (int) (
6020 // Basic file name representation...
6021 FileTools.roundUpToFSBlockSize(ExhibitName.MAX_NAME_LENGTH) +
6022 // Extra directory space for timestamp file...
6023 FileTools.roundUpToFSBlockSize(ExhibitName.MAX_NAME_LENGTH-2+32));
6024
6025 /**Calculate the disc space taken up by this exhibit cache entry all told.
6026 * This guesses a little for overheads such as the directory entry,
6027 * rounding for allocation space on disc, etc,
6028 * and aims to be slightly conservative.
6029 * <p>
6030 * This assumes a roughly UFS-like (UNIX File System) pattern of disc usage.
6031 */
6032 final long calcDiscSpace()
6033 {
6034 // Estimated storage consumed as a result of
6035 // length of final portion of each aux file name in directory entry
6036 // plus some overhead for prefixes,
6037 // the directory entry metadata, etc.
6038 final int dirEntry = 32 + name.getShortName().length();
6039
6040 // Estimate of space taken by exhibit,
6041 // including full cost of its name from top of tree
6042 // assuming that there is some sharing of per-path-component directory overhead.
6043 final long exhibitDataAndDirSpace =
6044 FileTools.roundUpToFSBlockSize(cachedLength) + // For exhibit data.
6045 name.length() + // Storage for path to exhibit.
6046 dirEntry + // For exhibit-file directory entry.
6047 // Other amortized per-exhibit overheads such as inodes, etc.
6048 1024 + FileTools.FS_EST_BLOCK_SIZE_BYTES;
6049
6050 // Extra storage cost of (small) timestamp file.
6051 final long stampFileSpace =
6052 FileTools.FS_EST_BLOCK_SIZE_BYTES + // For `timestamp' file block.
6053 dirEntry; // For timestamp-file directory entry.
6054
6055 // Extra storage cost of thumbnails file,
6056 // iff the thumbnails exist.
6057 final long tnFileSpace = (tnBytes == 0) ? 0 :
6058 (FileTools.roundUpToFSBlockSize(tnBytes) + // For `thumbnails' file block.
6059 dirEntry); // For thumbnail-file directory entry.
6060
6061 return(exhibitDataAndDirSpace + stampFileSpace + tnFileSpace);
6062 }
6063
6064 /**Used to zap my disc files, including all auxiliary files for this exhibit.
6065 * Quietly ignores any errors.
6066 * <p>
6067 * This does not adjust the in-memory records.
6068 */
6069 final void zapMe(final File cacheDir)
6070 {
6071 assert(cacheDir != null);
6072 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), name.toString());
6073 final File dataFileDir = new File(dataFile.getParent());
6074 final String fileComponent = dataFile.getName();
6075 final File stampFile = new File(dataFileDir,
6076 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TIMESTAMP_KW + '.' + fileComponent);
6077 final File tnFile = new File(dataFileDir,
6078 CACHE_EXAUX_PREFIX + CACHE_EXAUX_TN_KW + '.' + fileComponent);
6079 dataFile.delete();
6080 stampFile.delete();
6081 tnFile.delete();
6082 }
6083
6084 /**Generate human-readable summary of state. */
6085 @Override public String toString()
6086 {
6087 return("CachedFile [cachedLength=" + cachedLength
6088 + ", lastAccessed=" + (new Date(lastAccessed))
6089 + ", name=" + name
6090 + ", timestamp=" + (new Date(timestamp))
6091 + ", tnBytes=" + tnBytes + "]");
6092 }
6093
6094 /**My serialisation version number. */
6095 private static final long serialVersionUID = -5246653973669373340L;
6096
6097 /**Deserialise. */
6098 private void readObject(final ObjectInputStream in)
6099 throws IOException, ClassNotFoundException
6100 {
6101 in.defaultReadObject();
6102 validateObject(); // Validate state immediately.
6103 }
6104
6105 /**Validate fields/state.
6106 * Called in the constructor and possibly after de-serialising.
6107 * <p>
6108 * Barf if something bad is found.
6109 * (Maybe allow some extra info in debug version.)
6110 */
6111 public void validateObject()
6112 throws InvalidObjectException
6113 {
6114 // Check that all components are sane and safe.
6115 if(!ExhibitName.validNameSyntax(name))
6116 { throw new InvalidObjectException("bad object: invalid name: " + name); }
6117
6118 if(timestamp <= 0) // Could maybe have tighter constraint...
6119 { throw new InvalidObjectException("bad object: invalid timestamp"); }
6120
6121 if(cachedLength < 0)
6122 { throw new InvalidObjectException("bad object: invalid cached length"); }
6123
6124 if(lastAccessed < 0) // Could maybe have tighter constraint...
6125 { throw new InvalidObjectException("bad object: bad access time: " + lastAccessed); }
6126
6127 if(tnBytes < 0)
6128 { throw new InvalidObjectException("bad object: invalid thumbnail size"); }
6129 }
6130 }
6131
6132
6133 /**Fraction of max cache size that is the low-water mark.
6134 * In the range ]0.0f, 0.1f[ excluding both end points.
6135 * <p>
6136 * We will only do precacheing when the cache size is below
6137 * the low-water mark.
6138 * <p>
6139 * Don't get this too close to 1 to avoid churning the cache
6140 * when loading large single blocks of data
6141 * or upon other minor disturbances.
6142 */
6143 private static final float LOW_WATER_FRACTION = 0.95f;
6144
6145 /**Routine to do incremental pre-cacheing.
6146 * Exits as soon as it detects other threads queueing for the cache lock.
6147 * <p>
6148 * This relies on the following items being fetched and entirely
6149 * maintained by other activity/methods, probably under poll():
6150 * <ul>
6151 * <li>GenProps
6152 * <li>AllExhibitProperties
6153 * </ul>
6154 * <p>
6155 * This will only do any data prefetching while the cache is
6156 * below the low-water mark, and will limit the amount
6157 * of client and master resource used.
6158 * <p>
6159 * We won't actually start any precacheing until
6160 * we see some evidence of user/upstream activity that
6161 * might eventually benefit from it.
6162 * <p>
6163 * This will also only precache while the "aggressive" flag is set,
6164 * and this should be set true only when the system is not busy.
6165 * <p>
6166 * This may also involve precomputation/preloading of optional data,
6167 * though should not be relied in lieu of other methods to keep this fresh,
6168 * so we may have work to do even if there is no space for prefetching.
6169 * <p>
6170 * This also incrementally checks the cache for consistency with the
6171 * current exhibit properties, ie timestamp, size, hashes.
6172 * <p>
6173 * When running as a cloud instance, with bandwidth and CPU metered/charged,
6174 * we may resist precaching exhibit data and all but the most popular thumbnails.
6175 * <p>
6176 * This does not hold a cache lock for its duration,
6177 * but does hold a private lock to protect its internal state
6178 * because it must not be multi-threaded;
6179 * any attempt to run this in a second thread is quietly vetoed.
6180 */
6181 private void _doPreCache(final GenProps gp)
6182 {
6183 // If not in an aggressive mode
6184 // then return immediately.
6185 if(!_aggressive) { return; }
6186
6187 // If no evidence of continuing downstream use of this cache instance
6188 // then reduce the polling frequency/effort.
6189 if(!_userRequestedDataFromCache && Rnd.fastRnd.nextBoolean()) { return; }
6190
6191 // If it is too soon to do any more precacehing
6192 // then return immediately.
6193 if(System.currentTimeMillis() < _noMorePrecacheUntil) { return; }
6194
6195 // If the cache is too full to allow any precacheing
6196 // then return immediately.
6197 if(!metaData.canPrecache(cacheDir)) { return; }
6198
6199 // If the cache is current read- or write- locked
6200 // of if there is anyone waiting to acquire a cache lock,
6201 // then quit immediately so as not to cause extra contention and so as
6202 // to keep precacheing as unintrusive as possible.
6203 if(rwl.isWriteLocked() || (rwl.getReadLockCount() > 0) || rwl.hasQueuedThreads()) { return; }
6204
6205 // If the system is (temporarily) conserving power
6206 // then return immediately.
6207 if(GenUtils.mustConservePower()) { return; }
6208
6209 // If we're already running a precache thread
6210 // then don't try to start another one.
6211 if(_preCacheLock.isLocked()) { return; }
6212
6213 // Spin off an I/O-bound thread for the next bit of precacheing.
6214 // Discardable to avoid blocking poll() for long periods.
6215 // This is executed immediately or not at all if the pool if full.
6216 ThreadUtils.nonCPUThreadPoolDiscardable.submit(new Runnable(){ public final void run() {
6217 // Don't attempt to precache if already in progress...
6218 if(!_preCacheLock.tryLock()) { return; }
6219 try
6220 {
6221 final long startTime = System.currentTimeMillis();
6222
6223 // If it is too soon to do any more precacehing
6224 // then return immediately.
6225 if(startTime < _noMorePrecacheUntil) { return; }
6226
6227 // If we have been asked to consume <= 2% resources
6228 // then just don't even try precaching.
6229 final int websvr_bw_limiter =
6230 gp.getWEBSVR_BW_LIMITER() * LocalProps.getServerSlowdownFactor();
6231 if(websvr_bw_limiter >= 50) { return; }
6232
6233 // OK, do some precache work...
6234 final AllExhibitProperties aep = _AEP;
6235 final AllExhibitImmutableData aeid = aep.aeid;
6236
6237 // Force expiry of the precache iterator
6238 // if we can tell that the exhibit set has changed
6239 // since the iterator was created.
6240 final Long exhibitHash = new Long(aep.longHash);
6241
6242 try
6243 {
6244 if((_precacheExhibitHash != null) && !exhibitHash.equals(_precacheExhibitHash))
6245 { _precacheIterator = null; }
6246
6247 // If the precache iterator has `expired'
6248 // then reset it.
6249 if((_precacheIterator == null) ||
6250 !_precacheIterator.hasNext())
6251 {
6252 // If the hash is (non-null and) equal to the exhibit hash,
6253 // it means that there is no further precaching work to do
6254 // for now.
6255 if(exhibitHash.equals(_precacheExhibitHash))
6256 {
6257 // Postpone precacheing until the exhibit set changes
6258 // OR data is requested from the cache by a visitor.
6259 _userRequestedDataFromCache = false;
6260 _precacheExhibitHash = null;
6261 logger.log("[ExhibitDataSimpleCache: stopped precaching until more user activity seen.]");
6262 return;
6263 }
6264
6265 final ScorerCacheIF scorers = null; // FIXME: should get access to RealThing(TM).
6266
6267 // We (re)set it to the full set of
6268 // currently valid (full) exhibit names,
6269 // since those are the only things that we should be
6270 // aggressively precaching (there may be defunct
6271 // items in the metadata that have been deleted
6272 // from the exhibit set, for example).
6273 //
6274 // Sort the list by recent download popularity,
6275 // then sort the list "best"-first,
6276 // at least to a quick approximation,
6277 // since we usually will not have the
6278 // fully-computed "goodness" data available
6279 // when an exhibit set changes for example.
6280 //
6281 // We give priority to exhibits with
6282 // no metadata or thumbnails cached yet.
6283 //
6284 // For speed we'll accept stale/approximate data,
6285 // as this only alters the order of processing.
6286 final EventVariableValue downloads = varMgr.getEventValue(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD, EventPeriod.VLONG, false);
6287 final ArrayList<Name.ExhibitFull> al = new ArrayList<Name.ExhibitFull>(aeid.getAllExhibitNamesSorted());
6288 Collections.sort(al, (new Comparator<Name.ExhibitFull>(){
6289 /**Sort "best"-first, being prepared to accept stale data if need be. */
6290 public final int compare(final Name.ExhibitFull s1, final Name.ExhibitFull s2)
6291 {
6292 // First give priority to exhibits with
6293 // no metadata or thumbnails cached yet.
6294 final CachedFile cf1 = metaData.exhibitGetInfo(s1);
6295 final boolean hasMetaData1 = (cf1 != null);
6296 final CachedFile cf2 = metaData.exhibitGetInfo(s2);
6297 final boolean hasMetaData2 = (cf2 != null);
6298 // Item without metadata should come first.
6299 if(hasMetaData1 != hasMetaData2)
6300 { return(hasMetaData1 ? +1 : -1); }
6301
6302 final ExhibitMIME.ExhibitTypeParameters exhibitType1 = ExhibitMIME.getExhibitType(ExhibitName.getExtensionComponent(s1).toString());
6303 final boolean needsTN1 = (cf1 != null) && !cf1.hasThumbnails() &&
6304 (exhibitType1 != null) && exhibitType1.canPossiblyCreateThumbnailOfSameMIMEType();
6305 final ExhibitMIME.ExhibitTypeParameters exhibitType2 = ExhibitMIME.getExhibitType(ExhibitName.getExtensionComponent(s2).toString());
6306 final boolean needsTN2 = (cf2 != null) && !cf2.hasThumbnails() &&
6307 (exhibitType2 != null) && exhibitType2.canPossiblyCreateThumbnailOfSameMIMEType();
6308 // Item with missing thumbnail should come first.
6309 if(needsTN1 != needsTN2)
6310 { return(needsTN2 ? +1 : -1); }
6311
6312 // Now sort with more-downloads (more-popular) item first.
6313 final int dl1 = downloads.getCount(s1);
6314 final int dl2 = downloads.getCount(s2);
6315 if(dl1 > dl2) { return(-1); /* Correct order. */ }
6316 if(dl1 < dl2) { return(+1); /* Wrong order. */ }
6317
6318 // If we've not yet taken much time on this sort/round
6319 // (and we're not conserving CPU temporarily or permanently)
6320 // then we can potentially force items' EPCM values up-to-date as we go.
6321 final boolean saveCPU = GenUtils.mustConserveCPU() ||
6322 ((System.currentTimeMillis() - startTime) > MAX_dPC_SPIN_TIME_MS/2);
6323
6324 final ScorerCacheIF scorers = null; // FIXME: should get access to RealThing(TM).
6325
6326 // Then sort by (integer) goodness value.
6327 final int g1 = aep.getExhibitPropsComputableMutable(s1, saveCPU, gp, ExhibitDataSimpleCache.this, scorers).getGoodness();
6328 final int g2 = aep.getExhibitPropsComputableMutable(s2, saveCPU, gp, ExhibitDataSimpleCache.this, scorers).getGoodness();
6329 if(g1 > g2) { return(-1); /* Correct order. */ }
6330 if(g1 < g2) { return(+1); /* Wrong order. */ }
6331 return(0); // Identical goodness.
6332 }
6333 }));
6334
6335 // Compute the "best" exhibits for enhanced precaching.
6336 final int lastIndex = al.size() / 10;
6337 // Filter to a list of "good", non-stale-EPCM items.
6338 final List<Name.ExhibitFull> bestFiltered = new ArrayList<Name.ExhibitFull>(al.subList(0, Math.min(lastIndex, MAX_BEST_EX_PRECACHED)));
6339 // Since items to be trimmed will more often be at the end
6340 // work backwards through the list to minimise copying.
6341 for(int i = bestFiltered.size(); --i >= 0; )
6342 {
6343 final Name.ExhibitFull ex = bestFiltered.get(i);
6344 final ExhibitPropsComputableMutable epcm =
6345 aep.getExhibitPropsComputableMutable(ex, true, gp, ExhibitDataSimpleCache.this, scorers);
6346 // Remove trivially-stale or bad entry. (Continue to trust a stale entry however...)
6347 if((epcm == null) || epcm.isTriviallyStale() || (epcm.getGoodness() <= 0))
6348 { bestFiltered.remove(i); }
6349 }
6350 if(IsDebug.isDebug) { logger.log("[ExhibitDataSimpleCache: bestFiltered.size()="+bestFiltered.size()+".]"); }
6351
6352 // Update the best-exhibit list atomically...
6353 synchronized(_bestExhibits)
6354 {
6355 _bestExhibits.clear();
6356 _bestExhibits.addAll(bestFiltered);
6357 }
6358
6359 // Reinsert the "bestFiltered" entries at the start
6360 // so that they get two bites of the cherry each pass.
6361 al.addAll(0, bestFiltered);
6362
6363 // Help avoid unused memory being held for a long time.
6364 bestFiltered.clear();
6365 al.trimToSize();
6366
6367 // Create/save the iterator.
6368 _precacheIterator = al.iterator();
6369
6370 // Note the hash of the current exhibit set.
6371 _precacheExhibitHash = exhibitHash;
6372
6373 // Note restart of precaching scan of all exhibits.
6374 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHERESTART);
6375 logger.log("[ExhibitDataSimpleCache: started precaching round for "+aeid.length+" exhibits.]");
6376 }
6377
6378 // Do the precacheing work.
6379 // Stop when we have used up all the pending names to examine,
6380 // or we've used a just-GUI-significant amount of time,
6381 // or we see other users queueing to get into the cache...
6382 do
6383 {
6384 // If cache is too full to allow any precacheing
6385 // then return immediately.
6386 if(!metaData.canPrecache(cacheDir)) { return; }
6387
6388 // If there is anyone waiting to acquire a cache lock,
6389 // then quit immediately so as not to cause extra contention
6390 // and so as to keep precacheing as unintrusive as possible.
6391 if(rwl.hasQueuedThreads()) { return; }
6392
6393 // Stop if we have run out of exhibits to precache...
6394 if(!_precacheIterator.hasNext())
6395 {
6396 // Force recompute of 'fully-loaded' measure for this new AEP.
6397 metaData.getFullyCachedCount(aep, true);
6398
6399 if(_precacheExhibitHash != null)
6400 {
6401 logger.log("[ExhibitDataSimpleCache: finished precaching round with no work to do.]");
6402 return;
6403 }
6404
6405 logger.log("[ExhibitDataSimpleCache: finished precaching round with work still pending.]");
6406
6407 // Force the meta-data out to disc
6408 // since we did do some work this time around.
6409 metaData.setNeedsSave();
6410 return;
6411 }
6412
6413 // Note precache examination of next exhibit.
6414 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHEEXAMINED);
6415
6416 // Get the next item to examine and make sure that
6417 // it is still a valid exhibit.
6418 final Name.ExhibitFull nextToExamine = _precacheIterator.next();
6419 final ExhibitStaticAttr esa = aeid.getStaticAttr(nextToExamine);
6420 if(esa == null)
6421 {
6422 // If we find that an exhibit name from the iterator
6423 // is no longer valid
6424 // then force creation of a new iterator from
6425 // the new exhibit set.
6426 // This is to stop us wasting time on deleted exhibits.
6427 // This should restart this precacheing round
6428 // rather than terminating it.
6429 final Set<Name.ExhibitFull> empty = Collections.emptySet();
6430 _precacheIterator = empty.iterator();
6431 _precacheExhibitHash = null;
6432 break;
6433 }
6434
6435 // Update one exhibit.
6436 // Keep trying to bring it up-to-date if:
6437 // * This exhibit is not completely cached.
6438 // * We have not run out of time.
6439 // * We draw a Gaussian random number smaller than
6440 // the exhibit's goodness.
6441 // The aim is that better exhibits should have relatively
6442 // more time spent on bringing them up to date.
6443 //
6444 // An IOException terminates this burst of precaching.
6445 //
6446 // Note that we are prepared to try to force
6447 // the EPCM to be (re)computed if necessary
6448 // as a service to the rest of the system
6449 // if we're are prepared to 'precache' these values anyway.
6450 final ScorerCacheIF scorers = null; // FIXME: should ideally get access to RealThing(TM).
6451 while(_updateOneExhibit(esa, gp, aep, false) &&
6452 (Rnd.fastRnd.nextGaussian() <
6453 1.5*aep.getExhibitPropsComputableMutable(esa.getExhibitFullName(),
6454 (!CALC_MISSING_EPCM_DURING_PRECACHE) || Rnd.fastRnd.nextBoolean(),
6455 gp,
6456 ExhibitDataSimpleCache.this,
6457 scorers).getGoodnessAsFloat()))
6458 {
6459 // If there is anyone waiting to acquire a lock,
6460 // then quit immediately to let them in
6461 // to keep precacheing as unintrusive as possible.
6462 if(rwl.hasQueuedThreads()) { return; }
6463
6464 // If we run out of time,
6465 // then quit immediately.
6466 if((System.currentTimeMillis() - startTime) > MAX_dPC_SPIN_TIME_MS) { return; }
6467 }
6468
6469 // If we still haven't taken too much time
6470 // then examine another exhibit.
6471 } while((System.currentTimeMillis() - startTime) < MAX_dPC_SPIN_TIME_MS);
6472 }
6473 catch(final IOException e)
6474 {
6475 // Absorb any IOException quietly.
6476 }
6477 finally
6478 {
6479 // See how long this round took,
6480 // and put off the next bit of precacheing by a multiple.
6481 //
6482 // If this round took much longer than intended
6483 // or upstream is conserving power
6484 // then postpone the next attempt much further than usual
6485 // as this may be invoking expecially expensive operations elsewhere,
6486 // eg filesystem mounts at the server or connection dial-ups, etc.
6487 //
6488 // We cap our pause time in case of transient delays.
6489 //
6490 // At the end of an entire precaching round
6491 // put off more precache work for an additional significant time.
6492 final long now = System.currentTimeMillis();
6493 final long timeTaken = now - startTime;
6494 final boolean muchSlowerThanExpected = (timeTaken > Math.max(1024, 3*MAX_dPC_SPIN_TIME_MS));
6495 final boolean goSlow = muchSlowerThanExpected || upstreamStratum.isUpstreamConserving();
6496
6497 _noMorePrecacheUntil = now +
6498 (((_precacheIterator != null) && _precacheIterator.hasNext()) ? 11 : 2*MAX_dPC_BACKOFF_TIME) +
6499 Math.min(MAX_dPC_BACKOFF_TIME, (((websvr_bw_limiter>1) ? (timeTaken*websvr_bw_limiter) : timeTaken) << (goSlow ? 3 : 1)));
6500 }
6501 }
6502 finally { _preCacheLock.unlock(); } } });
6503 }
6504
6505 /**Partly check the cache data (including metadata, tns, etc) for validity.
6506 * This picks one or more aspects (at random) of the currently cached data
6507 * for the specified exhibit for validity.
6508 * <p>
6509 * Checks include length, timestamp, and hashes
6510 * dependent on the data available.
6511 * <p>
6512 * This is designed to complete reasonably quickly in most cases,
6513 * to perform an incremental check,
6514 * removing or in some cases repairing damaged/corrupt/invalid data.
6515 * <p>
6516 * If this finds the cache entry to be broken somehow
6517 * then this routine may delete the cache entry entirely,
6518 * or repair the data, or prune back to some valid prefix of the data held,
6519 * or just remove the corrupt underlying data to leave the metadata
6520 * to be fixed at a later date.
6521 * <p>
6522 * This will grab locks only as it needs them
6523 * in order to be as unintrusive as possible.
6524 * <p>
6525 * This is not expected to be needed very often,
6526 * but is mainly designed to avoid silent disc corruption,
6527 * and to provide for automatic repair.
6528 *
6529 * @param aep the current exhibit properties; never null
6530 * @param esa the exhibit whose cache data is to be verified; never null
6531 */
6532 private final void _doCacheDataValidityTest(final AllExhibitProperties aep,
6533 final ExhibitStaticAttr esa)
6534 throws IOException
6535 {
6536 assert((aep != null) && (esa != null));
6537
6538 // Get a snapshot of the cache data, if any, for this exhibit.
6539 CachedFile cf = metaData.exhibitGetInfo(esa.getExhibitFullName());
6540
6541 // If nothing in the cache for this exhibit then return immediately
6542 // since there is nothing to check or repair!
6543 if(cf == null)
6544 { return; }
6545
6546 //if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache._doCacheDataValidityTest(): validating exhibit "+esa+"...]"); }
6547
6548 // If there is a gross change,
6549 // ie in length or timestamp,
6550 // then remove the current cache entry entirely and return.
6551 if((cf.timestamp != esa.timestamp) ||
6552 (cf.cachedLength > esa.length))
6553 {
6554 logger.log("ERROR: cached data timestamp/length wrong for exhibit: " + esa);
6555 _removeCorruptData(cf);
6556 return;
6557 }
6558
6559 // If thumbnail data exists but is corrupt/unusable then remove it.
6560 if(cf.hasThumbnails())
6561 {
6562 ExhibitThumbnails tns = null;
6563
6564 // Grab cache read lock just long enough to read the thumbnails.
6565 // This is assumed to force a validation check during deserialisation.
6566 _getReadLock(rwl, "ExhibitDataSimpleCache._doCacheDataValidityTest() reading thumbnails", logger);
6567 try { tns = cf.getThumbnails(cacheDir); }
6568 catch(final Exception e) { e.printStackTrace(); }
6569 finally { rwl.readLock().unlock(); }
6570
6571 // If we couldn't load/validate the thumbnails
6572 // or they are of the less less-safe form
6573 // without built-in timestamp/checksums
6574 // then get a write lock and zap them
6575 // and use the updated metadata subsequently.
6576 // Iff this cache entry is removed entirely while we are working
6577 // then we will return.
6578 if((tns == null) || // Couldn't load thumbnails at all.
6579 ((tns.created <= 0) && !ExhibitThumbnails.NO_THUMBNAILS.equals(tns)) || // Dubious timestamp for non-empty thumbnails.
6580 ((tns.getSmall() != null) && !tns.getSmall().hasMD5Hash()) || // Missing checksum.
6581 ((tns.getStandard() != null) && !tns.getStandard().hasMD5Hash())) // Missing checksum.
6582 {
6583 logger.log("WARNING: purging damaged or old-style thumbnail for "+esa+": "+tns);
6584 _getWriteLock(rwl, "ExhibitDataSimpleCache._doCacheDataValidityTest() purging damaged/old thumbnail file", logger);
6585 try
6586 {
6587 final CachedFile reReadInfo = metaData.exhibitGetInfo(esa.getExhibitFullName());
6588 if(reReadInfo == null) { return; /* Entry now gone. */ }
6589 metaData._update(rwl, cf = reReadInfo.zapThumbnails(cacheDir).touchedEntry(), logger);
6590 }
6591 catch(final Exception e) { e.printStackTrace(); }
6592 finally { rwl.writeLock().unlock(); }
6593 }
6594 }
6595
6596 // If we don't have the accession data then we have no hashes to check.
6597 final ExhibitPropsLoadable exhibitPropsLoadable = aep.getExhibitPropsLoadable(esa.getExhibitFullName());
6598 final AccessionData ad = exhibitPropsLoadable.getAccessionMetadata();
6599 if(ad == null) { return; }
6600
6601 // OK, note when we are about to start the validation.
6602 final long startTime = System.currentTimeMillis();
6603
6604 // We can check the whole-exhibit hashes
6605 // if we have the whole exhibit in cache
6606 // AND we have the hashes available.
6607 final boolean wholeExhibitInCache = (cf.cachedLength == esa.length);
6608 final boolean canCheckWholeExhibit = wholeExhibitInCache &&
6609 ((ad.hashCRC32 != null) || (ad.hashMD5 != null));
6610
6611 if(canCheckWholeExhibit)
6612 {
6613 // Set up our hashes...
6614 final java.util.zip.Checksum hCRC32 = new java.util.zip.CRC32();
6615 final MessageDigest hMD5;
6616 try { hMD5 = MessageDigest.getInstance(CoreConsts.HASH_MD5); }
6617 catch(final NoSuchAlgorithmException e) // Should never happen...
6618 { throw new Error("could not find "+CoreConsts.HASH_MD5+" digester!"); }
6619
6620 // Create our buffer...
6621 // We can economise on memory for very short exhibits...
6622 final byte buf[] = new byte[Math.min(AccessionData.HASH_BLOCK_SIZE_BYTES, (int) cf.cachedLength)];
6623
6624 // To examine the whole file without locking out cache activity
6625 // we'll read the exhibit file a block at a time,
6626 // reacquiring the cache read lock each time.
6627 // We ignore any data added to the cached file while we work.
6628 // If the file gets *shorter* while we're working
6629 // (this *might* be legit if the exhibit was tossed out of cache)
6630 // or we encounter any other unexpected event
6631 // (eg an IOException caused by an unreadable disc sector),
6632 // then we just trash the whole cached entry.
6633 final File dataFile = new File(new File(cacheDir, CACHE_EXDATA_DIR), esa.getCharSequence().toString());
6634 final DataInputStream dis = new DataInputStream(new FileInputStream(dataFile));
6635 try
6636 {
6637 for(int block = 0; ; ++block)
6638 {
6639 // Compute start offset of this block.
6640 final int start = block * AccessionData.HASH_BLOCK_SIZE_BYTES;
6641
6642 // Stop when we run out of file!
6643 if(start >= cf.cachedLength) { break; /* Finished. */ }
6644
6645 // Compute size of block to read.
6646 final int len = Math.min(AccessionData.HASH_BLOCK_SIZE_BYTES,
6647 (int) (cf.cachedLength - start));
6648
6649 // Grab cache read lock just long enough to read one block.
6650 _getReadLock(rwl, "ExhibitDataSimpleCache._doCacheDataValidityTest() reading one block of full file", logger);
6651 try { dis.readFully(buf, 0, len); }
6652 finally { rwl.readLock().unlock(); }
6653
6654 // Update the hashes.
6655 hCRC32.update(buf, 0, len);
6656 hMD5.update(buf, 0, len);
6657 }
6658
6659 // Now extract the final hashes and compare them
6660 // against the accession data...
6661 final Integer hashCRC32 = new Integer((int) hCRC32.getValue());
6662 final ROByteArray hashMD5 = new ROByteArray(hMD5.digest());
6663
6664 // Check the CRC32 hash if we have it in the accession data.
6665 if((ad.hashCRC32 != null) && !hashCRC32.equals(ad.hashCRC32))
6666 {
6667 logger.log("ERROR: cached data failed CRC32 check ("+hashCRC32+"/"+ad.hashCRC32+") for exhibit: " + esa);
6668 _removeCorruptData(cf);
6669 return;
6670 }
6671
6672 // Check the MD5 hash if we have it in the accession data.
6673 if((ad.hashMD5 != null) && !hashMD5.equals(ad.hashMD5))
6674 {
6675 logger.log("ERROR: cached data failed MD5 check ("+hashMD5+"/"+ad.hashMD5+") for exhibit: " + esa);
6676 _removeCorruptData(cf);
6677 return;
6678 }
6679
6680 // OK, exhibit was fully validated!
6681 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHE_VALIDATION);
6682 //if(IsDebug.isDebug) { System.out.println("[ExhibitDataSimpleCache._doCacheDataValidityTest(): whole-file hashes verified for exhibit: "+esa+".]"); }
6683
6684 final long endTime = System.currentTimeMillis();
6685 final long dur = endTime - startTime;
6686 if(dur > 60000) // Warn about relatively slow validations...
6687 { logger.log("[ExhibitDataSimpleCache: validation took "+dur+"ms for "+esa+".]"); }
6688 return;
6689 }
6690 catch(final IOException e)
6691 {
6692 _removeCorruptData(cf); // Zap dodgy data ASAP.
6693 logger.log("ERROR: unexpected IOException when checking cached data (so removed ALL data) for exhibit: " + esa + ": " + e.getMessage());
6694 e.printStackTrace();
6695 return;
6696 }
6697 finally
6698 { dis.close(); }
6699 }
6700
6701
6702 // TODO: write the per-block hash checks...
6703
6704 }
6705
6706 /**Remove cached exhibit data identified as corrupt.
6707 * If possible then this clears up the metadata and raw data, etc,
6708 * but if not then this just removes the suspect raw data, thumbnails, etc,
6709 * from the disc cache.
6710 * <p>
6711 * The metadata will have to get fixed later.
6712 * <p>
6713 * Since this will have to grab a cache write lock
6714 * and then the metadata lock if it is to remove the corrupt data,
6715 * the caller must not have a read lock in place for example.
6716 *
6717 * @param cf exhibit cache metadata; never null
6718 */
6719 private void _removeCorruptData(final CachedFile cf)
6720 throws IOException
6721 {
6722 // Note removal of corrupt exhibit from cache...
6723 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_CACHEREM_CORRUPT);
6724
6725 logger.log("WARNING: ExhibitDataSimpleCache: cached exhibit corrupt so being completely purged from cache: " + cf.name);
6726 if(!metaData.exhibitRemoveCacheEntry(rwl, cacheDir, cf.name, false, logger, statsIDSCGEN))
6727 {
6728 // If we could not remove the entry neatly,
6729 // eg because the metadata was read-only,
6730 // then just remove the corrupt underlying data brutally!
6731 logger.log("WARNING: ExhibitDataSimpleCache: cannot update metadata; just removing raw data: " + cf.name);
6732 // Grab both locks in correct order...
6733 _getWriteLock(rwl, "ExhibitDataSimpleCache._removeCorruptData() raw data removal", logger);
6734 try
6735 {
6736 synchronized(this)
6737 { cf.zapMe(cacheDir); /* Zap raw cache data. */ }
6738 }
6739 finally
6740 { rwl.writeLock().unlock(); }
6741 }
6742 return;
6743 }
6744
6745 /**If true then try to at least partially compute EPCM while precacheing.
6746 * To do this fully we would need access to Scorers, etc,
6747 * which may simply not be available in this cacheing layer,
6748 * so at most we can do a fast approximation for EPCM values completely absent.
6749 */
6750 private final static boolean CALC_MISSING_EPCM_DURING_PRECACHE = false;
6751
6752 /**Update one exhibit incrementally during precaching.
6753 * Must not be called within a lock; will grab any locks it needs.
6754 *
6755 * @param forceDataFetch if true, strongly encourage data fetch,
6756 * ie extension of exhibit data if at all possible
6757 *
6758 * @return true iff this exhibit is not completely cached,
6759 * ie may benefit from another call to this routine immediately
6760 */
6761 private boolean _updateOneExhibit(final ExhibitStaticAttr esa,
6762 final GenProps gp,
6763 final AllExhibitProperties aep,
6764 final boolean forceDataFetch)
6765 throws IOException
6766 {
6767 boolean notCompletelyCached = false;
6768
6769 CachedFile icf = metaData.exhibitGetInfo(esa.getExhibitFullName());
6770
6771 // Always try to get thumbnails first for any exhibit.
6772 // Much more useful to show in catalogue pages,
6773 // takes less space and puts less strain on the master
6774 // (which which usually have them cached locally).
6775 //
6776 // We'll create a cache entry for this exhibit first if need be.
6777 // This allows us to pre-cache just metadata
6778 // when the server is in a "slow" (less aggressive) mode.
6779 if(metaData.canPrecacheThumbnails(cacheDir))
6780 {
6781 // CachedFile icf = metaData.exhibitGetInfo(esa.filePath);
6782 if(icf == null)
6783 {
6784 // Try to create new entry...
6785 // Ignore the return code...
6786 metaData.exhibitCreateNewCacheEntry(rwl,
6787 cacheDir,
6788 esa,
6789 logger,
6790 statsIDSCGEN);
6791 // ...but try again to get the cache entry.
6792 icf = metaData.exhibitGetInfo(esa.getExhibitFullName());
6793 }
6794
6795 // Is it worth precaching a thumbnail?
6796 // Only do it if:
6797 // 0) There is a cache entry for this exhibit.
6798 // 1) There is not yet a thumbnail.
6799 // AND
6800 // 2) A thumbnail can potentially be made.
6801 // THEN
6802 // 3) We try to fetch the thumbnail directly from the
6803 // back end without forcing it to be created...
6804 // AND IF THAT DOESN'T GET US THE THUMBNAIL, THEN
6805 // 4) If we have the full exhibit data immediately available
6806 // (note that this might not be in local cache,
6807 // eg on the master this might be coming from the
6808 // filesystem still), and
6809 // if it looks worth doing, then we call the normal
6810 // front-end getThumbnails() routine to MAKE the
6811 // thumbnail from the exhibit data.
6812 if((icf != null) &&
6813 !icf.hasThumbnails() &&
6814 (((ExhibitMIME.getInputFileType(esa.getExhibitFullName())) != null) &&
6815 (ExhibitMIME.getInputFileType(esa.getExhibitFullName())).canPossiblyCreateThumbnailOfSameMIMEType()))
6816 {
6817 // Try to force thumbnail to be fetched/made.
6818 if(_getThumbnails(esa.getExhibitFullName(), true, true) == null)
6819 {
6820 // If we can't get the thumbnail made this time
6821 // then clear the _precacheExhibitHash
6822 // to indicate there is work still to be done,
6823 // eg on another precache pass if need be.
6824 _precacheExhibitHash = null; // Work to be done...
6825
6826 // Most of the time, give up quickly for this exhibit
6827 // if we couldn't get thumbnails in place.
6828 //
6829 // We generally want to try very hard
6830 // to get thumbnails in place first.
6831 if(0 != Rnd.fastRnd.nextInt(7))
6832 { return(false); }
6833 }
6834 }
6835 }
6836
6837
6838 // If this is a cloud for which extra bandwidth may be expensive
6839 // then do not pre-cache any exhibit data; be content with thumbnails.
6840 if(LocalProps.isCloudMirrorInstance())
6841 { return(false); } // Treat exhibit as fully precached for this instance type.
6842
6843 // If no evidence of continuing downstream use of this cache instance
6844 // then return immediately; be content with thumbnails.
6845 if(!_userRequestedDataFromCache)
6846 { return(false); }
6847
6848 // If possible, attempt to fetch first/next chunk of an exhibit.
6849 // We are very keen to do this for "popular"/tiny/"best" exhibits,
6850 // even when the cache is quite full and we otherwise might not.
6851 // We also like to bring in the tail of very-nearly-completely-cached
6852 // exhibits so that we can promptly check hashes, etc.
6853 final long cachedLen = (icf == null) ? 0 : icf.cachedLength;
6854 final boolean dataNotFullyCached = cachedLen < esa.length;
6855 final boolean canPrecacheAtAll = metaData.canPrecache(cacheDir);
6856 if(metaData.canPrecacheExhibitData(cacheDir) ||
6857 (canPrecacheAtAll && forceDataFetch /* Asked to extend if at all possible. */ ) ||
6858 (canPrecacheAtAll && metaData.someFree(cacheDir) &&
6859 ((esa.length <= MAX_TRANSFER_CHUNK_SIZE) /* Small exhibit. */ ||
6860 ImageUtils.canBeOwnThumbnail(esa) /* Tiny exhibit. */ ||
6861 _bestExhibits.contains(esa.getExhibitFullName()) /* Good exhibit. */ ||
6862 ((icf != null) && dataNotFullyCached &&
6863 ((esa.length - cachedLen) <= MAX_TRANSFER_CHUNK_SIZE)) /* Very-nearly-completely cached exhibit. */ ||
6864 isPopularDownload(esa) /* Popular exhibit. */ )))
6865 {
6866 // Attempt to extend the on-disc copy of the exhibit
6867 // by one (possibly maximal) chunk, if not yet fully cached.
6868 if(dataNotFullyCached)
6869 {
6870 // Exhibit data is not fully cached locally...
6871 // Read one whole chunk more for efficiency,
6872 // up to the lower of the file end or cache limit.
6873 // For now, we don't bother to align this read.
6874
6875 // Find out what initial portion we are prepared to cache.
6876 final int MAX_CACHEABLE_EX_BYTES = _getMaximumCacheableBytesForOneExhibit(gp);
6877
6878 // Max position to aspire to read up to...
6879 final long readLimit =
6880 Math.min(esa.length, MAX_CACHEABLE_EX_BYTES);
6881 // Maximum number of bytes that we will actually read...
6882 // We read the biggest normally-allowed chunk for efficiency.
6883 final int toRead = Math.min(MAX_TRANSFER_CHUNK_SIZE,
6884 (int) (readLimit - cachedLen));
6885 if(toRead > 0)
6886 {
6887 try
6888 {
6889 // Extend our cache if possible.
6890 _getExhibitDataFromUpstreamToPrecache(esa, aep.aeid, gp, cachedLen, toRead, false);
6891
6892 // Note successful fetch/cache of data block.
6893 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHEEXDATABLOCK);
6894
6895 // Read data successfully,
6896 // but would it be useful to read some more immediately?
6897 final CachedFile icf2 = metaData.exhibitGetInfo(esa.getExhibitFullName());
6898 if((icf2 == null) || (icf2.cachedLength < readLimit))
6899 {
6900 notCompletelyCached = true; // This exhibit could do with more precaching work.
6901 }
6902 else
6903 {
6904 logger.log("[ExhibitDataSimpleCache: completely precached data for "+esa+"; will now test data integrity...]");
6905
6906 // Now test the exhibit's hash(es), etc,
6907 // to ensure that everything arrived intact...
6908 _doCacheDataValidityTest(aep, esa);
6909 }
6910 }
6911 catch(final IOException e)
6912 {
6913 // Note error during precaching...
6914 StatsLogger.captureDataPoint(statsIDSCGEN, SCGNAME_PRECACHEERROR);
6915 throw e;
6916 }
6917 finally
6918 {
6919 // If file still not read as far as possible/allowed,
6920 // then note that there is still work to do...
6921 final CachedFile icf2 = metaData.exhibitGetInfo(esa.getExhibitFullName());
6922 if((icf2 == null) || (icf2.cachedLength < readLimit))
6923 {
6924 _precacheExhibitHash = null; // Work to be done...
6925 }
6926 }
6927 }
6928 }
6929 }
6930
6931
6932 if(CALC_MISSING_EPCM_DURING_PRECACHE)
6933 {
6934 // Try to get mutable properties at least partially computed if necessary.
6935 final ExhibitPropsComputableMutable epcmS =
6936 aep.getExhibitPropsComputableMutable(esa.getExhibitFullName());
6937 if(epcmS == null)
6938 {
6939 // If we find that the current properties are not computed at all
6940 // then we take that as an indication
6941 // that others (already passed over) may need (re)computing too.
6942 _precacheExhibitHash = null; // Quite aggressive...
6943
6944 // Now attempt to force partial computation for this value...
6945 // (As there is no access to Scorers this can only be an approximation.)
6946 aep.getExhibitPropsComputableMutable(esa.getExhibitFullName(), true, gp, this, null);
6947 }
6948 }
6949
6950 // DO OTHER PRE-CACHE OPERATIONS FOR THIS EXHIBIT...
6951
6952 return(notCompletelyCached);
6953 }
6954
6955
6956 /**If true, allow us to try fetching exhibit data from peers rather than master. */
6957 private static final boolean ALLOW_DATA_FETCH_FROM_PEERS = true;
6958
6959 /**If true, allow us to try fetching thumbnails from peers rather than only the master.
6960 * Since all mirrors/peers should generally cache all thumbnails indefinitely
6961 * then this should not incur any significant extra traffic even in poor circumstances.
6962 * <p>
6963 * This may allow lateral spread of a thumbnail once any mirror has managed to create one
6964 * even when the master is unable (eg due to resource restrictions) to make one.
6965 */
6966 private static final boolean ALLOW_TN_FETCH_FROM_PEERS = true;
6967
6968 /**Thread-safe Map from mirror ID to strictly-positive rating with "" for master; never null.
6969 * The rating is a synthetic (milliseconds) time to fetch a big data block (or thumbnails).
6970 * Lower values represent peers that have the data available quickly
6971 * more of the time.
6972 * <p>
6973 * All values are strictly positive.
6974 * <p>
6975 * This is periodically purged of stale data (ie inactive peers)
6976 * to keep it from growing without bound.
6977 */
6978 private final Map<String,Long> altDataSourceRating = new Hashtable<String, Long>();
6979
6980 /**If true then use a cautious strategy to select a peer to talk to. */
6981 private static final boolean PEER_SELECTION_CAUTIOUS = false;
6982
6983 /**Default rating/time (ms) for "unknown" data source/mirror/peer; strictly positive.
6984 * The value chose corresponds to 10000ms RTT and 1kBps throughput,
6985 * ie much slower than most reasonable peers.
6986 */
6987 private static final int PEER_STATS_UNKNOWN_MS = 10000 + MAX_TRANSFER_CHUNK_SIZE;
6988
6989 /**Time-constant for updating peer fetch time value; strictly positive.
6990 * Higher values means a lower-pass filter,
6991 * and more robustness in the face of temporary glitches.
6992 * <p>
6993 * A value that is a power of two may result in more efficient code.
6994 * <p>
6995 * A value from 8 to 256 is probably reasonable.
6996 */
6997 private static final int PEER_STATS_TC = PEER_SELECTION_CAUTIOUS ? 64 : 32;
6998
6999 /**Fake tag we use to indicate a fetch from the master/upstream via the pipeline. */
7000 private static final String MASTER_FAKE_TAG = "";
7001
7002 /**Try to extend cached data for the specified exhibit.
7003 * By default we try to get this from the master,
7004 * but we may try to fetch it from a peer/mirror instead (P2P)
7005 * to reduce the load on the master.
7006 * <p>
7007 * Also, this may be used to attempt data/error recovery
7008 * if data cannot be fetched from the master for some reason.
7009 *
7010 * @param start byte offset in exhibit to start reading/fetching for; non-negative
7011 * @param len number of bytes to read; strictly positive.
7012 * @param forceIt if true then we try very hard to get the data from a peer,
7013 * for example to help with master/server error recovery
7014 */
7015 private void _getExhibitDataFromUpstreamToPrecache(final ExhibitStaticAttr esa,
7016 final AllExhibitImmutableData aeid,
7017 final GenProps gp,
7018 final long start,
7019 final int len,
7020 final boolean forceIt)
7021 throws IOException
7022 {
7023 assert(esa != null);
7024 assert(aeid != null);
7025 assert(gp != null);
7026 assert(start <= Integer.MAX_VALUE);
7027 assert(len > 0);
7028
7029 // Dummy buffer to read the data into.
7030 // The data is discarded; we only want the side-effect of it being cached.
7031 final ByteBuffer buf = ByteBuffer.allocate(len);
7032
7033 // Attempt to adaptively fetch data from peers if allowed.
7034 // This may significantly reduce load on the master for exhibit data.
7035 if(ALLOW_DATA_FETCH_FROM_PEERS || forceIt)
7036 {
7037 // Choose which peer to contact...
7038 final String peerToTry = _pickPeer();
7039
7040 // If we selected the master rather than a peer
7041 // then just fall through to the default handler.
7042 if(!MASTER_FAKE_TAG.equals(peerToTry))
7043 {
7044 final long startFetch = System.currentTimeMillis();
7045 boolean successful = false;
7046 try
7047 {
7048 metaData.exhibitRead(rwl,
7049 peerToTry,
7050 cacheDir, esa.getExhibitFullName(), source,
7051 aeid,
7052 gp,
7053 (int) start,
7054 buf,
7055 null, // Cache locally but not upstream...
7056 logger,
7057 statsIDSCGEN);
7058
7059 // Got the data OK!
7060 successful = true;
7061
7062 // Don't need to go to the master!
7063 return;
7064 }
7065 catch(final IOException e)
7066 {
7067 logger.log("INFO: data block fetch from peer for exhibit "+esa+" failed with IOException: " + peerToTry + ": " + e.getMessage());
7068 /* Fall through to get data from master. */
7069 }
7070 finally
7071 {
7072 final long endFetch = System.currentTimeMillis();
7073 _updatePeerStats(peerToTry,
7074 successful,
7075 (endFetch - startFetch));
7076
7077 // Remove some stats logging from the timing...
7078 if(successful)
7079 {
7080 // Note successful fetch/cache of data block.
7081 StatsLogger.captureDataPoint(statsIDSCGEN, SCGPREF_PRECACHEEXDATABLOCKSRC + peerToTry);
7082 logger.log("INFO: successful data block fetch from peer: "+peerToTry+"; stats: " + (new ArrayList<Map.Entry<String,Long>>(altDataSourceRating.entrySet())));
7083 }
7084 else
7085 {
7086 // Note unsuccessful fetch/cache of data block.
7087 final String ev = SCGPREF_PRECACHEEXDATABLOCKSRCERR + peerToTry;
7088 StatsLogger.captureDataPoint(statsIDSCGEN, ev);
7089 logger.log("INFO: FAILED data block fetch from peer "+peerToTry+"; stats: " + (new ArrayList<Map.Entry<String,Long>>(altDataSourceRating.entrySet())));
7090 }
7091 }
7092 }
7093 }
7094
7095 // Fall through to use the master/upstream by default.
7096
7097 // Get the data from the master...
7098 // Note stats...
7099 final long startFetchFromMaster = System.currentTimeMillis();
7100 boolean fetchFromMasterSuccessful = false;
7101 try
7102 {
7103 metaData.exhibitRead(rwl,
7104 null, // Fetch from upstream/master.
7105 cacheDir, esa.getExhibitFullName(), source,
7106 aeid,
7107 gp,
7108 (int) start,
7109 buf,
7110 null, // Cache locally but not upstream...
7111 logger,
7112 statsIDSCGEN);
7113 fetchFromMasterSuccessful = true;
7114 }
7115 finally
7116 {
7117 // // Possibly note/update stats for large fetches only.
7118 // final boolean largeFetch = (len >= CoreConsts.BULK_DATA_TRANSFER_SIZE);
7119 // if(largeFetch)
7120 {
7121 final long endFetchFromMaster = System.currentTimeMillis();
7122 _updatePeerStats(MASTER_FAKE_TAG,
7123 fetchFromMasterSuccessful,
7124 (endFetchFromMaster - startFetchFromMaster));
7125
7126 // Remove some stats logging from the timing...
7127 if(fetchFromMasterSuccessful)
7128 {
7129 // Note successful fetch/cache of data block.
7130 StatsLogger.captureDataPoint(statsIDSCGEN, SCGPREF_PRECACHEEXDATABLOCKSRC + "master");
7131 }
7132 else
7133 {
7134 // Note unsuccessful fetch/cache of data block.
7135 final String ev = SCGPREF_PRECACHEEXDATABLOCKSRCERR + "master";
7136 StatsLogger.captureDataPoint(statsIDSCGEN, ev);
7137
7138 logger.log("INFO: data block fetch from peer stats: " + (new ArrayList<Map.Entry<String,Long>>(altDataSourceRating.entrySet())));
7139 }
7140 }
7141 }
7142 }
7143
7144 /**Fraction of the time to pick a peer completely at random; strictly positive. */
7145 private final int P2P_RND_FRAC = (PEER_SELECTION_CAUTIOUS ? 61 : 37) +
7146 Rnd.fastRnd.nextInt(11);
7147
7148 /**Fraction of the time to choose a 2nd-tier peer rather than the best peer; strictly positive. */
7149 private final int P2P_NEXT_BEST_FRAC = 3 + Rnd.fastRnd.nextInt(3);
7150
7151 /**Factor/multiplier of peers worse than top that will be considered for fetches routinely; strictly positive. */
7152 private static final int NEARLY_TOP_FACTOR = 2;
7153
7154 /**If true, avoid use of master where peers are available. */
7155 private static final boolean PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE = true;
7156
7157 /**Pick one of the supplied peers to attempt to fetch exhibit data from; never null.
7158 * Usually this returns the "best" (fastest) peer,
7159 * but sometimes this will return an apparently sub-optimal peer so as to:
7160 * <ul>
7161 * <li>to test the waters (ie to keep sampling all peers occasionally), and
7162 * <li>to spread load around a little, and
7163 * <li>to rehabilitate once-poor peers (eg bad peers can become good again).
7164 * </ul>
7165 * In particular we are allowing for peer/server load
7166 * and inter-peer network conditions to change continually.
7167 * <p>
7168 * Occasionally this will purge the cached peer stats of anything
7169 * not in the argument set, with constant amortised cost per call.
7170 *
7171 * @param activeMirrors set of mirror tags
7172 * (and possible MASTER_FAKE_TAG ("") for the master);
7173 * never null, never empty
7174 * @return selected peer, MASTER_FAKE_TAG for master; never null
7175 */
7176 private String _pickPeer(final Set<String> activeMirrors)
7177 {
7178 assert(activeMirrors != null);
7179 assert(!activeMirrors.isEmpty());
7180
7181 // A small fraction of the time pick a peer completely at random
7182 // (providing it is the master or it seems at least to be "up").
7183 // This allows us to try new peers and those currently out of favour,
7184 // and spreads traffic load a little.
7185 // This "hot potato" selection method is also fairly quick.
7186 if(Rnd.fastRnd.nextInt(P2P_RND_FRAC) == 0)
7187 {
7188 final String putativePeer = (new ArrayList<String>(activeMirrors)).get(
7189 Rnd.goodRnd.nextInt(activeMirrors.size()));
7190 if((MASTER_FAKE_TAG.equals(putativePeer) ||
7191 (Boolean.TRUE == LoadBalancingUtils.testIfHTTPServerIsUp(HostUtils.makeMirrorNameGeneric(putativePeer), true))))
7192 { return(putativePeer); }
7193 }
7194
7195 // Unless being cautious in peer use/selection,
7196 // if there are one or more untested peers
7197 // then shuffle them and pick the first one that is up (or is the master).
7198 if(!PEER_SELECTION_CAUTIOUS)
7199 {
7200 final List<String> untested = new ArrayList<String>(activeMirrors);
7201 untested.removeAll(altDataSourceRating.keySet());
7202 if(!untested.isEmpty())
7203 {
7204 Collections.shuffle(untested, Rnd.fastRnd);
7205 for(final String peer : untested)
7206 {
7207 if(MASTER_FAKE_TAG.equals(peer) ||
7208 (Boolean.TRUE == LoadBalancingUtils.testIfHTTPServerIsUp(HostUtils.makeMirrorNameGeneric(peer), true)))
7209 { return(peer); }
7210 }
7211 }
7212 }
7213
7214 // Usually pick a peer in relation to its weighting,
7215 // ie pick "better" peers (with lower service times) more often.
7216
7217 // Select the peer with the lowest score here.
7218 long bestScore = Long.MAX_VALUE;
7219 String selectedPeer = MASTER_FAKE_TAG; // Default to the master.
7220 for(final String peer : activeMirrors)
7221 {
7222 final Long score = altDataSourceRating.get(peer);
7223
7224 // Treat unknown peers as having a high "unknown" fallback time, a la XNTPD.
7225 // Possibly mark the master so as to prefer peers and reduce strain on the master.
7226 final long sl = (score == null) ? PEER_STATS_UNKNOWN_MS :
7227 ((PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE && MASTER_FAKE_TAG.equals(peer)) ?
7228 (score.longValue() * 2*NEARLY_TOP_FACTOR) : score.longValue());
7229 if(sl < bestScore)
7230 {
7231 bestScore = sl;
7232 selectedPeer = peer;
7233 }
7234 }
7235
7236 // Some of the time, when scores are sufficiently good,
7237 // select randomly any peer with a score close enough to the best.
7238 // This helps spread the load amongst, and keep an eye on,
7239 // the top few candidates.
7240 //
7241 // We deliberately exclude any peer whose status is unknown (or worse).
7242 if((bestScore < PEER_STATS_UNKNOWN_MS / (2*NEARLY_TOP_FACTOR)) &&
7243 (Rnd.fastRnd.nextInt(P2P_NEXT_BEST_FRAC) == 0))
7244 {
7245 final List<String> candidatePeers = new ArrayList<String>(1 + activeMirrors.size()/2);
7246 for(final String peer : activeMirrors)
7247 {
7248 final Long score = altDataSourceRating.get(peer);
7249
7250 // Skip unknown peers altogether...
7251 if(score == null) { continue; }
7252
7253 // Possibly mark the master down to prefer peers.
7254 final long sl = ((PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE && MASTER_FAKE_TAG.equals(peer)) ? (score.longValue() * 2*NEARLY_TOP_FACTOR) :
7255 score.longValue());
7256 if(sl < NEARLY_TOP_FACTOR*bestScore) // Consider peer if relatively good...
7257 { candidatePeers.add(peer); }
7258 }
7259 // Override best peer if we have a real choice...
7260 // Use goodRnd to make as independent a selection as possible.
7261 final int nCands = candidatePeers.size();
7262 if(nCands > 1)
7263 { selectedPeer = candidatePeers.get(Rnd.goodRnd.nextInt(nCands)); }
7264 }
7265
7266 // Possibly take this opportunity to clear out stale data,
7267 // amortising the mean cost-per-call to be O(1) ie independent of peer-set size.
7268 if(0 == Rnd.fastRnd.nextInt(altDataSourceRating.size() + 2))
7269 {
7270 // Atomically remove any entries for peers not in the argument set.
7271 // The master tag is never removed however.
7272 synchronized(altDataSourceRating)
7273 {
7274 for(final Iterator<String> it = altDataSourceRating.keySet().iterator(); it.hasNext(); )
7275 {
7276 final String peer = it.next();
7277 if(MASTER_FAKE_TAG.equals(peer))
7278 { continue; }
7279 if(!activeMirrors.contains(peer))
7280 { it.remove(); }
7281 }
7282 }
7283 }
7284
7285 // Return the selected peer...
7286 return(selectedPeer);
7287 }
7288
7289 /**Pick a peer to attempt to fetch exhibit data from; never null.
7290 * Usually this returns the "best" peer;
7291 * sometimes this will return an apparently sub-optimal peer so as to:
7292 * <ul>
7293 * <li>to test the waters (ie keep sampling all peers occasionally),
7294 * <li>spread load around a little, and
7295 * <li>rehabilitate once-poor peers (eg bad guys may come good).
7296 * </ul>
7297 * In particular we are allowing for peer/server load
7298 * and inter-peer network conditions to change continually.
7299 * <p>
7300 * Occasionally this will purge the cached peer stats of anything
7301 * not in the argument set.
7302 * <p>
7303 * The master should by preference avoid fetching data from a peer
7304 * to avoid contaminating the master copy with bad data from any peer.
7305 * However, if it has a mirror tag, then it may fetch data P2P.
7306 *
7307 * @return selected peer, MASTER_FAKE_TAG for master (ie no peer); never null
7308 */
7309 private String _pickPeer()
7310 {
7311 final String mirrorTag = LocalProps.getMirrorTag();
7312 // This host has to be a mirror itself to be allowed to use P2P...
7313 if(mirrorTag != null)
7314 {
7315 // Get candidate mirrors...
7316 // We use whatever (even stale) entries that we can find
7317 // in order to allow P2P to work longer after
7318 // losing contact with the master
7319 // (when it may prove to be especially useful).
7320 final Set<String> activeMirrors = new HashSet<String>(
7321 LoadBalancingUtils.getActiveMirrors(varMgr, false).keySet());
7322 // Remove our/this instance/mirror as a candidate...
7323 activeMirrors.remove(mirrorTag);
7324 if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._pickPeer(): potential peer count for P2P: "+activeMirrors.size()+".]"); }
7325
7326 // Fall back to using the master as usual if no other candidates...
7327 if(activeMirrors.size() > 0)
7328 {
7329 if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._pickPeer(): potential peers for P2P: "+(new ArrayList<String>(activeMirrors))+".]"); }
7330
7331 // Add the master as a synthetic candidiate unless we always try to favour peers.
7332 if(!PREFER_PEERS_TO_MASTER_WHERE_POSSIBLE)
7333 { activeMirrors.add(MASTER_FAKE_TAG); }
7334
7335 // Choose which peer to contact...
7336 final String peerToTry = _pickPeer(activeMirrors);
7337 if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._pickPeer(): peer to try for P2P: "+peerToTry+".]"); }
7338
7339 // Return the selected mirror (which may be MASTER_FAKE_TAG).
7340 return(peerToTry);
7341 }
7342 }
7343
7344 return(MASTER_FAKE_TAG); // Fake tag for the master as there were no peers available...
7345 }
7346
7347 /**Update data-transfer stats for the given peer.
7348 * A failed fetch is treated as a very slow access,
7349 * so that a failing peer compares numerically unfavourably with reliable peers.
7350 */
7351 private void _updatePeerStats(final String peer,
7352 final boolean fetchSuccessful,
7353 final long timeTaken)
7354 {
7355 assert(peer != null);
7356
7357 // Treat a fail as worse numerically than "unknown"
7358 // and as worse than the actual time taken
7359 // so that consistent failures are liked less than unknown/slow peers.
7360 final long l = fetchSuccessful ? timeTaken :
7361 (1 + (timeTaken + PEER_STATS_UNKNOWN_MS));
7362
7363 // Atomic wrt the cache...
7364 synchronized(altDataSourceRating)
7365 {
7366 // If there is no entry for this peer then create one now.
7367 Long t = altDataSourceRating.get(peer);
7368 if(t == null)
7369 {
7370 if(PEER_SELECTION_CAUTIOUS)
7371 {
7372 // Start with a (very) cautious estimate of performance...
7373 t = new Long(PEER_STATS_UNKNOWN_MS);
7374 }
7375 else
7376 {
7377 // Start with the value/time that we see for this new peer.
7378 t = new Long(l);
7379 }
7380 altDataSourceRating.put(peer, t);
7381 }
7382
7383 // Compute new value for the cache.
7384 // Back off fast in case of failure.
7385 final int timeConst = fetchSuccessful ? PEER_STATS_TC :
7386 (Math.max(1, PEER_STATS_TC/2));
7387 final Long v = new Long(Math.max(1, ((t.longValue() * (timeConst-1)) + l + (timeConst/2)) / timeConst));
7388 altDataSourceRating.put(peer, v);
7389
7390 // Record some stats...
7391 if(TRACE_P2P_ACTIVITY) { logger.log("[ExhibitDataSimpleCache._updatePeerStats(\""+peer+"\", "+fetchSuccessful+", "+timeTaken+") new value = "+v+"...]"); }
7392 }
7393
7394 if(fetchSuccessful)
7395 {
7396 StatsLogger.captureDataPoint(statsIDSCGEN, SCGPREF_PRECACHEEXDATABLOCKFETCHTIME + GenUtils.log2Approx(Math.max(0, timeTaken)));
7397 }
7398
7399 // Should we be recording performance/usage stats centrally?
7400 final boolean recordStats = "true".equals(genProps.getGen().get(KEY_debugFlag_P2P_BLOCKXFER));
7401 // If so, do it now.
7402 if(recordStats)
7403 {
7404 final StringBuilder sb = new StringBuilder(32);
7405 sb.append("P2P:bx:");
7406 sb.append(LocalProps.getMirrorTag());
7407 sb.append(':');
7408 sb.append("".equals(peer) ? "M" : peer);
7409 sb.append('=');
7410 if(fetchSuccessful)
7411 {
7412 // Note time taken for successful transfer.
7413 sb.append(GenUtils.log2Approx(timeTaken));
7414 }
7415 else
7416 {
7417 // Note failure.
7418 sb.append("FAIL");
7419 }
7420
7421 try
7422 {
7423 varMgr.setVariable(new SimpleVariableValue(
7424 SystemVariables.PERFMON_STRING_GLOBAL_EVENT,
7425 sb.toString()));
7426 }
7427 catch(final IOException e)
7428 {
7429 e.printStackTrace(); // Whinge but absorb error...
7430 }
7431 }
7432 }
7433
7434 /**In the top-N (global) downloads recently, with at least 2 downloads.
7435 * The "more than one download" filter is to trim off a noisy "long tail"
7436 * (eg on a quiet day).
7437 * <p>
7438 * This uses a combination of hits from the previous and current days.
7439 *
7440 * @param esa the exhibit details; never null
7441 * @return true if this exhibit is a "popular" download.
7442 */
7443 private boolean isPopularDownload(final ExhibitStaticAttr esa)
7444 throws IOException
7445 {
7446 final EventVariableValue downloadsYesterday = varMgr.getEventValue(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD, EventPeriod.VLONG, false);
7447 final EventVariableValue downloadsToday;
7448 final String shortName = esa.getExhibitFullName().getShortName().toString();
7449 return(((downloadsYesterday.getRank(shortName) < 100) &&
7450 (downloadsYesterday.getCount(shortName) > 1)) ||
7451 // Note that fetching current period possibly incurs more cost.
7452 (((downloadsToday = varMgr.getEventValue(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD, EventPeriod.VLONG, true)).getRank(shortName) < Integer.MAX_VALUE) &&
7453 (downloadsToday.getCount(shortName) > 1)));
7454 }
7455
7456 /**Precache lock to prevent multi-threaded precache runs. */
7457 private final ReentrantLock _preCacheLock = new ReentrantLock();
7458
7459 /**Time before which we will not do more precacheing.
7460 * Private to _doPreCache().
7461 * <p>
7462 * Initial value of 0 allows precaching to start immediately.
7463 */
7464 private volatile transient long _noMorePrecacheUntil;
7465
7466 /**Set true when a user requests data from the cache.
7467 * This is as a result of user activity, and without this
7468 * being true we still won't indulge in any precacheing.
7469 * This means that we can safely have more than one context
7470 * set up (for example), in a servlet runner as long as
7471 * only one is ever actually used.
7472 * <p>
7473 * Accessed without locking; read by _doPreCache()
7474 * and may be set by a routine that is sure that it has received
7475 * a user request to fetch exhibit/thumbnail data.
7476 * <p>
7477 * We can reset this if we believe we have finished precaching
7478 * (or at least a reasonable chunk of precacheing work)
7479 * for the current exhibit set.
7480 */
7481 private transient volatile boolean _userRequestedDataFromCache = LocalProps.fastStartMode();
7482
7483 /**An iterator over a snapshot of all exhibit names.
7484 * This is initially null, and when null or when exhausted
7485 * it is reset to be a new snapshot of the exhibit names.
7486 * This avoids starvation of some exhibits.
7487 * <p>
7488 * We access this only from _doPreCache() which is single-threaded,
7489 * so this need not be thread safe.
7490 * <p>
7491 * We may order the iteration in some way as to try to
7492 * precache as efficiently as possible, eg smallest or
7493 * `best' first, or we might store the exhibits in, for
7494 * example, a shuffled order.
7495 * <p>
7496 * When we get a name from this iterator we must make sure
7497 * that it still represents a valid exhibit, since the exhibit
7498 * might have been deleted, for example.
7499 * <p>
7500 * Accessed only by _doPreCache().
7501 */
7502 private transient Iterator<Name.ExhibitFull> _precacheIterator;
7503
7504 /**Indicator for which image set we are working on.
7505 * When we start a new round of precaching we set this
7506 * to the hash of the current exhibit set.
7507 * <p>
7508 * If we come across an exhibit that we do some precaching work on,
7509 * we set this to null.
7510 * <p>
7511 * When we are about to start a new round of precaching
7512 * and discover this is set to the hash of the current exhibit set,
7513 * we assume that there was no work to be done and we skip precaching.
7514 * When the exhibit set changes we will then resume.
7515 * <p>
7516 * Accessed under the rwl by _doPreCache().
7517 */
7518 private transient Long _precacheExhibitHash;
7519
7520 /**Maximum time that _doPreCache() can spend in one go (ms).
7521 * Designed to be short enough to avoid causing massively irritating
7522 * interruptions to user interactivity if we lock other activity
7523 * out for this long,
7524 * though long enough to be relatively efficient if possible.
7525 * <p>
7526 * Precacheing will not generally interfere with interactive operations
7527 * so we try to make this time large enough
7528 * to allow the fetch of a block or three of exhibit data over a slow Net link,
7529 * allowing for RTT and connection setup and bandwidth, etc.
7530 * <p>
7531 * Something of the order of a few seconds may be good.
7532 * <p>
7533 * We radically reduce this for CPU-sensitive (eg cloud) environments.
7534 */
7535 private final int MAX_dPC_SPIN_TIME_MS = LocalProps.isCloudMirrorInstance() ? 113 : 3001;
7536
7537 /**Max time _doPreCache() has to sleep for (ms).
7538 * This is basically if some freak event happens beyond
7539 * _doPreCache()'s reasonable control.
7540 * <p>
7541 * A few minutes is probably reasonable.
7542 */
7543 private final int MAX_dPC_BACKOFF_TIME = 4 * 60 * 1000 +
7544 Rnd.fastRnd.nextInt(1 * 60 * 1000);
7545
7546 /**Our stratum cached; never null though may be UNKNOWN.
7547 * We may examine the low-power flag to decide to reduce upstream access.
7548 * <p>
7549 * Is marked volatile for thread-safe lock-free access.
7550 * <p>
7551 * Updates piggybacked on variable set/fetch work.
7552 */
7553 private volatile Stratum upstreamStratum = Stratum.UNKNOWN;
7554
7555 /**Return cached stratum; never null.
7556 * Never throws an exception.
7557 */
7558 public Stratum getStratum()
7559 { return(upstreamStratum); }
7560
7561
7562
7563 /**Set true once destroy() is called; never set false again. */
7564 private volatile boolean destroyed;
7565
7566 /**Shut down the data pipeline.
7567 * Flush state, variables and logs upstream and to disc as appropriate,
7568 * and then make sure that upstream of us is destroyed.
7569 */
7570 public void destroy()
7571 {
7572 // Quickly discourage precaching and free some memory...
7573 _aggressive = false;
7574 _userRequestedDataFromCache = false;
7575 _thumbnailsInMemory.clear();
7576
7577 // Stop the thread pool.
7578 try { discardableReadAheadTaskThreadPool.shutdownNow(); }
7579 finally
7580 {
7581 // Try to save all useful persistable state that we hold.
7582 try
7583 {
7584 try { _cleanAndSaveMetaData(true); }
7585 finally { _handleSysVars(true); }
7586 }
7587 catch(final IOException e) { e.printStackTrace(); }
7588 // And ensure that the upstream source is destroy()ed too.
7589 finally { source.destroy(); }
7590 }
7591
7592 // Mark this instance as doomed once we've completed any saves.
7593 destroyed = true;
7594
7595 // Check for background threads...
7596 if(_gAEP_lock.isLocked())
7597 { System.err.println("WARNING: AEP fetch thread still running..."); }
7598 // Grab read lock on cache to prevent further changes.
7599 try { _getReadLock(rwl, "shutting down cache in destroy()", logger); }
7600 catch(final InterruptedIOException e) { e.printStackTrace(); }
7601
7602 // Prevent further updates to metadata
7603 // and thus most cache updates.
7604 metaData.setReadWrite(false);
7605 }
7606 }