001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.io.DataOutputStream;
032 import java.io.IOException;
033 import java.io.InputStream;
034 import java.io.InvalidObjectException;
035 import java.io.ObjectInputValidation;
036 import java.io.OutputStream;
037 import java.io.Serializable;
038 import java.lang.ref.SoftReference;
039 import java.nio.ByteBuffer;
040 import java.security.DigestOutputStream;
041 import java.security.MessageDigest;
042 import java.util.ArrayList;
043 import java.util.Arrays;
044 import java.util.Collections;
045 import java.util.Enumeration;
046 import java.util.HashMap;
047 import java.util.HashSet;
048 import java.util.Hashtable;
049 import java.util.Iterator;
050 import java.util.List;
051 import java.util.Map;
052 import java.util.Set;
053 import java.util.SortedSet;
054 import java.util.concurrent.ConcurrentHashMap;
055 import java.util.concurrent.ConcurrentMap;
056 import java.util.concurrent.Future;
057 import java.util.concurrent.TimeUnit;
058 import java.util.concurrent.TimeoutException;
059
060 import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
061 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
062 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
063 import org.hd.d.pg2k.svrCore.datasource.SimpleExhibitPipelineIF;
064 import org.hd.d.pg2k.svrCore.location.Location;
065 import org.hd.d.pg2k.svrCore.props.GenProps;
066 import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
067 import org.hd.d.pg2k.svrCore.vars.SimpleVariablePipelineIF;
068
069 /**Set of all exhibit names and properties.
070 * This includes AllExhibitImmutableData and all other loadable and
071 * calculatable exhibit properties, including emergent statistical
072 * properties computed across the entire exhibit set.
073 * <p>
074 * This maintains a hash over all the loaded/static properties and the
075 * exhibit set so that changes should be easily caught.
076 * Computed state is excluded since it can be computed on demand,
077 * adds no extra information and/or includes contingent information and noise
078 * such as hints from system settings.
079 * <p>
080 * Designed to be efficient `on-the-wire' and in memory.
081 * <p>
082 * This object is logically immutable except that:
083 * <ul>
084 * <li>Some information is computed on demand and may be discarded if
085 * out-of-date or in low-memory conditions or for serialisation.
086 * <li>The not-yet-computed state of some expensive-to-compute items
087 * is made visible for more efficient use elsewhere in the system
088 * and this state may change according to the previous point.
089 * <li>Some expensive computable information may be retained across
090 * serialisation having been computed in a different environment
091 * to the current one.
092 * </ul>
093 * <p>
094 * In particular, EPCM values will be (re)computed dynamically as needed.
095 * <p>
096 * This holds per-exhibit information such as timestamp, x/y dimensions, etc,
097 * and whole-collection data such as hot/cold lists, location maps, etc.
098 * <p>
099 * The hashCode() and equals() methods are implemented,
100 * based on the underlying data.
101 */
102 public final class AllExhibitProperties implements Serializable, ObjectInputValidation,
103 MemoryTools.Compactable
104 {
105 /**Construct a default (empty) AllExhibitProperties object with zero timestamp.
106 * This is operation is fast, and produces a small object,
107 * with no data but entirely consistent.
108 * <p>
109 * The hash of this object is 0, as is the timestamp.
110 */
111 public AllExhibitProperties()
112 {
113 this(null,
114 new ExhibitPropsGlobalImmutable(),
115 new AllExhibitImmutableData(),
116 new HashMap<Name.ExhibitFull,ExhibitPropsLoadable>(),
117 new HashMap<Name.ExhibitFull,ExhibitPropsComputable>(),
118 0);
119 }
120
121 /**Construct a new AllExhibitProperties object from scratch, checking for consistency and validity.
122 */
123 public AllExhibitProperties(final ExhibitPropsGlobalImmutable _epgi,
124 final AllExhibitImmutableData _aeid,
125 final Map<? extends CharSequence, ExhibitPropsLoadable> _loadedProps,
126 final Map<? extends CharSequence, ExhibitPropsComputable> _computedProps)
127 {
128 this(null, _epgi, _aeid, _loadedProps, _computedProps, 0);
129 }
130
131 /**Canonicalise AEP, ie reconstruct new instance from old data; null only if input is.
132 * Can be useful for recomputing hashes, etc,
133 * from AEP instances created and serialised before an minor implementation change.
134 */
135 public static AllExhibitProperties canonicalise(final AllExhibitProperties in)
136 {
137 if(in == null) { return(null); }
138 final AllExhibitProperties result = new AllExhibitProperties(in, in.epgi, in.aeid, in.getExhibitPropsLoadableMap(), in.getExhibitPropsComputableMap(), in.hashNotChangedSince);
139 assert(in.equals(result)); // This operation should preserve equality!
140 return(result);
141 }
142
143 /**Build a new AllExhibitProperties object, preserving any computed state possible from the old one.
144 * Because computing some information is very expensive,
145 * this can save significant time.
146 * <p>
147 * If old is null, the object is built from scratch.
148 *
149 * @param _aeid immutable core exhibit data
150 * @param _loadedProps map from exhibit name
151 * (as a CharSequence, eg a String, but preferably a Name.ExhibitFull)
152 * to loaded properties (ExhibitPropsLoadable);
153 * not all exhibits need have a mapping
154 * (in which case they are assumed no have no properties)
155 * @param unchangedSince if non-zero, the hashNotChangedSince value is set from this
156 */
157 public AllExhibitProperties(final AllExhibitProperties old,
158 final ExhibitPropsGlobalImmutable _epgi,
159 final AllExhibitImmutableData _aeid,
160 final Map<? extends CharSequence, ExhibitPropsLoadable> _loadedProps,
161 final Map<? extends CharSequence, ExhibitPropsComputable> _computedProps,
162 final long unchangedSince)
163 {
164 // Capture ExhibitPropsGlobalImmutable.
165 epgi = _epgi;
166
167 // Capture AllExhibitImmutableData.
168 aeid = _aeid;
169
170 final boolean isEmpty = (aeid.size() == 0); // No exhibits.
171
172 // Set our construction timestamp; zero if no exhibits.
173 constructedAt = isEmpty ? 0L : System.currentTimeMillis();
174
175 // Make our loadedProps value.
176 // Is read-only so does not need to be synchronized.
177 final Map<Name.ExhibitFull,ExhibitPropsLoadable> lP = new HashMap<Name.ExhibitFull,ExhibitPropsLoadable>(_loadedProps.size() * 2 + 1);
178 // Work through our input data, filtering and converting as necessary...
179 for(final CharSequence name : _loadedProps.keySet())
180 {
181 // Ensure that we store the (pooled/canonical) type-safe name.
182 final ExhibitStaticAttr esa = _aeid.getStaticAttr(name);
183 if(null == esa) { throw new IllegalArgumentException(); }
184 final Name.ExhibitFull key = esa.getExhibitFullName();
185 final ExhibitPropsLoadable epl = _loadedProps.get(key);
186 if((epl != null) && !epl.equals(ExhibitPropsLoadable.EMPTY))
187 { lP.put(key, epl); }
188 }
189 loadedProps = lP; // Assign atomically.
190
191 // Copy in (valid) computed properties.
192 // Our new map will be logically immutable
193 // so an ordinary HashMap is fine and allows read concurrency.
194 computedProps = new HashMap<Name.ExhibitFull, ExhibitPropsComputable>(2*_computedProps.size());
195 if(!isEmpty)
196 {
197 // Work through our input data, filtering and converting as necessary...
198 for(final CharSequence key : _computedProps.keySet())
199 {
200 final ExhibitStaticAttr newesa = aeid.getStaticAttr(key);
201 // If named exhibit does not exist, abort.
202 if(newesa == null) { throw new IllegalArgumentException("EPC value specified for non-existent exhibit"); }
203
204 final ExhibitPropsComputable epc = _computedProps.get(key);
205 // Don't store null/EMPTY instances.
206 if((null == epc) || epc.equals(ExhibitPropsComputable.EMPTY)) { continue; }
207 computedProps.put(newesa.getExhibitFullName(), epc);
208 }
209
210 // If we have mutable properties,
211 // then copy across the entries corresponding to exhibits in this instance.
212 // We remove entries corresponding to non-existent exhibits.
213 // Even stale entries are copied since they may still be pretty accurate
214 // and expensive to recompute, though trivially-stale values are discarded.
215 // Note that it is safe to share entries with an old instance
216 // since they are immutable.
217 // We drop entries where the exhibit itself appears to have changed,
218 // but we don't test things such as description text and other aux data.
219 if((old != null) && (old.epcmMap != null))
220 {
221 final Map<Name.ExhibitFull, ExhibitPropsComputableMutable> tmpMap =
222 new HashMap<Name.ExhibitFull, ExhibitPropsComputableMutable>(old.epcmMap);
223 epcmMap.clear(); // Ensure that we have no deserialised weird state!
224 for(final Name.ExhibitFull key : tmpMap.keySet())
225 {
226 final ExhibitStaticAttr newesa = aeid.getStaticAttr(key);
227 // If exhibit no longer exists or has changed, discard this, don't keep it.
228 if((newesa == null) || !newesa.isIdentical(old.aeid.getStaticAttr(key)))
229 {
230 continue;
231 }
232
233 // Discard null entries and trivially-stale values
234 // that we can quickly recompute if required.
235 final ExhibitPropsComputableMutable epcm = tmpMap.get(key);
236 if((epcm == null) || epcm.isTriviallyStale())
237 {
238 continue;
239 }
240
241 // Seems OK; use the ESA full exhibit name as the key
242 // for memory efficiency (ie as the canonical key).
243 epcmMap.put(newesa.getExhibitFullName(), epcm);
244 }
245 }
246 }
247
248 // Compute our long hash.
249 // This should cover all the elements that equals() does.
250 if(isEmpty) // Guaranteed zero (and fast to compute!) if no exhibits.
251 { longHash = 0; }
252 else
253 {
254 try {
255 // Start a message digest (SHA1).
256 final MessageDigest md = GenUtils.getStandardDigest();
257 final DataOutputStream dos =
258 new DataOutputStream(
259 new DigestOutputStream(
260 (new OutputStream(){ // Null output stream...
261 @Override
262 public final void write(final int b) { }
263 @Override
264 public final void write(final byte[] b, final int off, final int len) { }
265 }),
266 md));
267
268 // Get the full, sorted list of aeid keys.
269 final List<Name.ExhibitFull> allKeys = aeid.getAllExhibitNamesSorted();
270
271 // Put in the names and the loadable/computable property hashes,
272 // in (reverse) sorted key order.
273 for(int i = allKeys.size(); --i >= 0; )
274 {
275 final Name.ExhibitFull name = allKeys.get(i);
276 dos.writeInt(name.length());
277 dos.write(name.toByteArray()); // Capture the full name.
278 final ExhibitPropsLoadable epl = getExhibitPropsLoadable(name);
279 dos.writeInt(epl.hashCode());
280 // We include the EPC hash explicitly,
281 // even though EPC values are nominally derived values,
282 // because their representation (eg format) may change/improve.
283 final ExhibitPropsComputable epc = getExhibitPropsComputable(name);
284 if(null != epc) { dos.writeInt(epc.hashCode()); }
285 else { dos.writeByte(0); }
286 }
287
288 // Contribute hashes from embedded global properties.
289 dos.writeLong(epgi.longHash);
290
291 // Fold in the aeid contribution,
292 // trusting the timestamp as representative.
293 dos.writeLong(aeid.timestamp);
294
295 // Create the basic hash from the first 8 digest bytes.
296 dos.flush();
297 final byte[] digest = md.digest();
298 long lHash = 0xDeadBeefDeadBeefL;
299 for(int i = 8; --i >= 0; )
300 { lHash ^= (((long)(digest[i] & 0xff)) << (8*i)); }
301
302 // We guarantee that the hash is strictly positive
303 // if there are exhibits
304 // (and that the hash is zero if there are no exhibits).
305 lHash = (lHash >>> 1) ^ (lHash & 1);
306 longHash = (lHash == 0) ? 1 : lHash;
307 }
308 catch(final IOException e) { throw new Error("internal error"); } // Should never happen.
309 }
310
311 // See if the hash has changed and set a timestamp if so.
312 // If an explicit not-changed-since timestamp has been supplied
313 // then use the supplied explicit timestamp.
314 // (This is to support explicit reconstruction of AEP state, eg from a diff.)
315 // Else if there is no old object or the old object's hash is different,
316 // then use the constructedAt time.
317 // Else carry forward the old hashNotChangedSince time,
318 // but force it to be no older than the aeid.timestamp
319 // as a belt-and-braces measure in case the hash misses something.
320 if(unchangedSince != 0)
321 { hashNotChangedSince = unchangedSince; }
322 else if((old == null) || (old.longHash != longHash))
323 { hashNotChangedSince = constructedAt; }
324 else
325 { hashNotChangedSince = Math.max(old.hashNotChangedSince, aeid.timestamp); }
326
327 // Verify object state.
328 try { validateObject(); }
329 catch(final InvalidObjectException e)
330 { throw new IllegalArgumentException(e.getMessage()); }
331 }
332
333 /**The immutable global exhibit properties; never null.
334 * This is made available as a field for maximum performance.
335 */
336 public final ExhibitPropsGlobalImmutable epgi;
337
338 /**The immutable exhibit data; never null.
339 * This is made available as a field for maximum performance.
340 */
341 public final AllExhibitImmutableData aeid;
342
343 /**Logically immutable map from exhibit name to loadable props (ExhibitPropsLoadable); never null.
344 * If an entry for an exhibit is null then we return ExhibitPropsLoadable.EMPTY.
345 * <p>
346 * The keys here are canonicalised against aeid so that we don't
347 * have duplicate String values knocking about.
348 * <p>
349 * Note that keys may actually be String during deserialisation of older formats.
350 */
351 private final Map<Name.ExhibitFull, ExhibitPropsLoadable> loadedProps;
352
353 /**Get loadable props for exhibit; never null.
354 * This is completely thread-safe as loadedProps is read-only/immutable
355 * once construction/deserialisation is complete.
356 *
357 * @return non-null instance; ExhibitPropsLoadable.EMPTY if none
358 */
359 public ExhibitPropsLoadable getExhibitPropsLoadable(final Name.ExhibitFull exhibitName)
360 {
361 final ExhibitPropsLoadable result = loadedProps.get(exhibitName);
362 if(result == null) { return(ExhibitPropsLoadable.EMPTY); } // Never return null.
363 return(result);
364 }
365
366 /**Get immutable map from full exhibit name to ExhibitPropsLoadable; never null.
367 * May omit entries that map to ExhibitPropsLoadable.EMPTY.
368 */
369 public Map<Name.ExhibitFull, ExhibitPropsLoadable> getExhibitPropsLoadableMap()
370 { return(Collections.unmodifiableMap(loadedProps)); }
371
372 /**Class that provides data source to get at exhibit data semi-random access or as a sequential stream.
373 * An implementor must override getRawFile() and may also override
374 * getInputStream() with a markable stream.
375 * <p>
376 * This does not hold a lock while calling back to the underlying data source
377 * in order to help avoid deadlocks.
378 * <p>
379 * The state of an InputStream returned by this is undefined after
380 * an IOException from the stream.
381 */
382 public static abstract class ExhibitDataSource
383 {
384 /**Check if the given exhibit is fully loaded in local/fast store; true if so.
385 * This means that the whole exhibit is quickly available,
386 * eg in a local file system, for reading.
387 * <p>
388 * This is only a hint; an immediately following call
389 * to read the exhibit might find the exhibit no longer local.
390 * <p>
391 * The result is undefined for a non-existent exhibit.
392 * <p>
393 * This may always return false if not sure or not known.
394 *
395 * @return true if exhibit is known to be fully/quickly available from local storage,
396 * false otherwise
397 */
398 public boolean isExhibitFullyLoaded(final ExhibitStaticAttr esa)
399 throws IOException
400 { return(false); /* Return default 'not known' answer. */ }
401
402 /**Read a chunk of the raw exhibit binary into the given buffer.
403 * As for SimpleExhibitPipelineIF.getRawFile().
404 */
405 public abstract void getRawFile(ByteBuffer buf, ExhibitFull exhibitName, int position)
406 throws IOException;
407
408 /**Get a the raw exhibit binary as a (markable) stream to read sequentially.
409 * The default implementation uses getRawFile()
410 * and does no buffering.
411 * <p>
412 * The returned stream is thread-safe, but may not behave as expected
413 * is used by more than one thread at once,
414 * ie concurrent reads are not supported.
415 */
416 public InputStream getInputStream(final ExhibitStaticAttr esa)
417 {
418 final InputStream result = new InputStream(){
419 /**Current file position (0 at start, esa.length at EOF); non-negative. */
420 private int pos;
421
422 /**Return exact number of bytes remaining, ie available. */
423 @Override
424 public final synchronized int available()
425 { return((int) Math.min(Integer.MAX_VALUE, esa.length - pos)); }
426
427 /**Read a single byte; highly inefficient but functionality must be supplied.
428 * This <strong>does not</strong> hold a lock during the call to getRawFile().
429 */
430 @Override
431 public final int read()
432 throws IOException
433 {
434 final byte data[] = new byte[1];
435 final int n = read(data, 0, 1);
436 if(n == -1) { return(-1); } // EOF.
437 assert(n == 1); // We block to read at least one byte...
438 return(data[0] & 0xff);
439 }
440
441 /**Read a block of bytes directly from the raw data source.
442 * Always does a full read unless at EOF when remaining
443 * bytes will be returned.
444 * <p>
445 * This is thread-safe, but may not behave as expected
446 * is used by more than one thread at once,
447 * ie concurrent reads are not supported.
448 * <p>
449 * This <strong>does not</strong> hold a lock during the call to getRawFile().
450 * <p>
451 * After an IOException the stream is left positioned at EOF.
452 */
453 @Override
454 public final int read(final byte buf[], final int off, int len)
455 throws IOException
456 {
457 final int avail = available();
458 if(avail <= 0) { return(-1); } // EOF.
459 if(len > avail) { len = avail; } // Limit to remaining data.
460 if(len < 1) { return(0); }
461
462 try
463 {
464 final ByteBuffer byteBuffer = ByteBuffer.wrap(buf, off, len);
465 assert(byteBuffer.position() == off);
466 assert(byteBuffer.remaining() == len);
467 getRawFile(byteBuffer, esa.getExhibitFullName(), pos);
468 final int bytesRead = len - byteBuffer.remaining();
469 assert(bytesRead >= 0);
470
471 // Update file position (thread-) safely.
472 if(bytesRead > 0)
473 {
474 synchronized(this)
475 {
476 if((pos += bytesRead) > esa.length)
477 { throw new IOException("concurrent reads took position beyond EOF"); }
478 }
479 }
480 }
481 catch(final IOException e)
482 {
483 // In case of error move position to EOF and rethrow.
484 pos = (int) esa.length;
485 throw e;
486 }
487
488 return(len);
489 }
490
491 /**Marked position (ie store for pos when mark() is called); [0, esa.length]. */
492 private int markedPos;
493
494 /**Marks the current position in this input stream.
495 * A subsequent call to
496 * the <code>reset</code> method repositions this stream at the last marked
497 * position so that subsequent reads re-read the same bytes.
498 * <p/>
499 * <p> The <code>readlimit</code> argument tells this input stream to
500 * allow that many bytes to be read before the mark position gets
501 * invalidated.
502 * <p/>
503 * <p>This implementation records the current stream position.</p>
504 *
505 * @param readlimit the maximum limit of bytes that can be read before
506 * the mark position becomes invalid.
507 * @see java.io.InputStream#reset()
508 */
509 @Override
510 public synchronized void mark(final int readlimit)
511 {
512 // Notes the current position.
513 markedPos = pos;
514 }
515
516 /**Tests if this input stream supports the <code>mark</code> and <code>reset</code> methods.
517 * Always true for this implementation.
518 * @return <code>true</code>
519 * @see java.io.InputStream#mark(int)
520 * @see java.io.InputStream#reset()
521 */
522 @Override
523 public boolean markSupported()
524 { return(true); }
525
526 /**Repositions this stream to the position at the time the <code>mark</code> method was last called on this input stream.
527 * <p>This implementation always restores the stream to the marked position
528 * and never needs to throw an IOException.</p>
529 *
530 * @see java.io.InputStream#mark(int)
531 * @see java.io.IOException
532 */
533 @Override
534 public synchronized void reset()
535 // throws IOException
536 {
537 pos = markedPos;
538 }
539
540 /**Skips over and discards <code>n</code> bytes of data from this input stream.
541 * The <code>skip</code> method may, for a variety of reasons, end
542 * up skipping over some smaller number of bytes, possibly <code>0</code>.
543 * This may result from any of a number of conditions; reaching end of file
544 * before <code>n</code> bytes have been skipped is only one possibility.
545 * The actual number of bytes skipped is returned. If <code>n</code> is
546 * negative, no bytes are skipped.
547 * <p/>
548 * <p>This implementation never needs to throw an IOException.</p>
549 *
550 * @param n the number of bytes to be skipped
551 * @return the actual number of bytes skipped
552 */
553 @Override
554 public synchronized long skip(final long n)
555 // throws IOException
556 {
557 if((n < 0) || (n >= Integer.MAX_VALUE))
558 { throw new IllegalArgumentException(); }
559
560 // Compute number of bytes before EOF.
561 final int av = available();
562
563 // If the skip request does not take us beyond EOF
564 // then do it as requested.
565 if(n <= av)
566 {
567 pos += n;
568 return(n);
569 }
570
571 // Else skip to EOF and return the number of bytes skipped.
572 pos += av;
573 return(av);
574 }
575 };
576 return(result);
577 }
578 }
579
580 /**Map from full exhibit name to ExhibitPropsComputable properties; never null.
581 * This is a Hashtable to be completely thread-safe, even during
582 * serialisation where Hashtable.writeObject() is synchronized.
583 * <p>
584 * An instance of ExhibitPropsComputable is created on first demand
585 * and posted to the map. No lock is held while the computation
586 * is done, so redundant computations are possible, but a high
587 * degree of concurrency is available in return.
588 * <p>
589 * Entries are never removed from this map,
590 * and null is never posted as a value.
591 * <p>
592 * Defensively copied during deserialisation.
593 * <p>
594 * Note that keys may actually be String during deserialisation of older formats.
595 */
596 private final Map<Name.ExhibitFull,ExhibitPropsComputable> computedProps;
597
598 /**Get computable (immutable) props for exhibit; if none available then returns null.
599 */
600 public ExhibitPropsComputable getExhibitPropsComputable(final Name.ExhibitFull exhibitName)
601 { return(computedProps.get(exhibitName)); }
602
603 /**Get immutable map from full exhibit name to ExhibitPropsComputable; never null.
604 */
605 public Map<Name.ExhibitFull,ExhibitPropsComputable> getExhibitPropsComputableMap()
606 { return(Collections.unmodifiableMap(computedProps)); }
607
608 /**The hash of all the data held; guaranteed non-negative.
609 * Depends on the timestamp of all the exhibit (aeid) and
610 * other loadable information held (eg map data).
611 * <p>
612 * Is guaranteed to be zero if the exhibit set is empty,
613 * and strictly positive otherwise.
614 * <p>
615 * Computable data (and hashNotChangedSince) is excluded from the hash
616 * since it is derived from other data that is included in the hash.
617 */
618 public final long longHash;
619
620 /**The time that this object was constructed.
621 * Strictly positive unless there are no exhibits,
622 * in which case this is zero.
623 * <p>
624 * We keep this private.
625 */
626 private final long constructedAt;
627
628 /**Time since which we claim that the exhibit set/data has not changed; guaranteed non-negative.
629 * If a new AllExhibitProperties object is constructed
630 * with the aid of an old one, and the longHash has not changed
631 * between the two, then this is copied from the old
632 * object (except that as a belt and braces measure it is
633 * ensured to be no older than the aeid.timestamp),
634 * else it is the same as the constructedAt time.
635 * <p>
636 * If a new AllExhibitProperties object is constructed from
637 * scratch each time then this will be the time that the
638 * object was constructed, and so we can regard this as a
639 * an `exhibits have not changed since' time.
640 * <p>
641 * Strictly positive unless there are no exhibits,
642 * in which case this is zero (to prompt early recomputation!).
643 */
644 public final long hashNotChangedSince;
645
646
647 /**Frequently-used value instance Integer(1). */
648 private static final Integer ONE = Integer.valueOf(1);
649
650 /**Map from author initials (String) to exhibit count for that author (Integer).
651 * Only authors that have have one or more exhibits are stored in the map.
652 * <p>
653 * This map is immutable, and is computed on first use.
654 * <p>
655 * This map is transient (ie not stored in the persistent state),
656 * and volatile for thread-safe lockless access.
657 */
658 private transient volatile Map<String,Integer> authorExhibitCounts;
659
660 /**Get a map from author initials (String) to exhibit count for that author (Integer); never null.
661 * Only authors that have have one or more exhibits are stored in the map.
662 */
663 public final Map<String,Integer> getAuthorExhibitCounts()
664 {
665 Map<String,Integer> result = authorExhibitCounts;
666 if(result == null)
667 {
668 synchronized(this) // Ensure that we only spend CPU time once, in return for possibly reduced concurrency.
669 {
670 if(null == (result = authorExhibitCounts))
671 {
672 // Still really needs (re)computing (no other thread raced there first).
673 final HashMap<String,Integer> rawResult = new HashMap<String,Integer>(); // For speed.
674 for(final Name.ExhibitFull fullName : aeid.getAllExhibitNamesSorted())
675 {
676 final String author = ExhibitName.getAuthorComponent(fullName).toString();
677 final Integer oldCount = rawResult.get(author);
678 if(oldCount == null) { rawResult.put(author, ONE); }
679 else { rawResult.put(author, Integer.valueOf(oldCount.intValue() + 1)); }
680 }
681 authorExhibitCounts = result = Collections.unmodifiableMap(rawResult);
682 }
683 }
684 }
685 assert(null != result);
686 return(result);
687 }
688
689 /**Map from category [top-level directory] (String) to exhibit count for that category (Integer).
690 * Only categories that have have one or more exhibits are stored in the map.
691 * <p>
692 * This map is immutable, and is computed on first use.
693 * <p>
694 * This map is transient (ie not stored in the persistent state),
695 * and volatile for thread-safe lockless access.
696 */
697 private transient volatile Map<String,Integer> categoryExhibitCounts;
698
699 /**Get a map from category [top-level directory] (String) to exhibit count for that category (Integer); never null.
700 * Only categories that have have one or more exhibits are stored in the map.
701 * <p>
702 * This map is immutable, and is computed on first use.
703 */
704 public final Map<String,Integer> getCategoryExhibitCounts()
705 {
706 Map<String,Integer> result = categoryExhibitCounts;
707 if(result == null)
708 {
709 synchronized(this) // Ensure that we only spend CPU time once, in return for possibly reduced concurrency.
710 {
711 if(null == (result = categoryExhibitCounts))
712 {
713 // Still really needs (re)computing (no other thread raced there first).
714 final HashMap<String,Integer> rawResult = new HashMap<String,Integer>(64); // For speed.
715 for(final Name.ExhibitFull fullName : aeid.getAllExhibitNamesSorted())
716 {
717 final String category = ExhibitName.getCategoryComponent(fullName).toString();
718 final Integer oldCount = rawResult.get(category);
719 if(oldCount == null) { rawResult.put(category, ONE); }
720 else { rawResult.put(category, Integer.valueOf(oldCount.intValue() + 1)); }
721 }
722 categoryExhibitCounts = result = Collections.unmodifiableMap(rawResult);
723 }
724 }
725 }
726 assert(null != result);
727 return(result);
728 }
729
730 /**Limit of goodness*correlation to consider something (eg category) significantly good or bad. */
731 public static final float GOODBAD_LIMIT = 0.4f;
732
733
734 /**Find out if a category is rated "good"/popular or not.
735 * Returns TRUE if rated good, FALSE if bad,
736 * null if not significantly either or if not known.
737 *
738 * @param categoryDir the initial directory component of an extant exhibit
739 * @param force if true may force (expensive) computation to give
740 * a more accurate answer,
741 * else may return a more approximate or stale answer,
742 * or none at all (null)
743 */
744 public Boolean isCategoryGood(final String categoryDir,
745 final BasicVarMgrInterface vars,
746 final boolean force)
747 throws IOException
748 { return(_vcCache.isCategoryGood(categoryDir, this, vars, force)); }
749
750 /**Map from dotted-file-extension (String) to exhibit count for that extension (Integer).
751 * Only extensions that have have one or more exhibits are stored in the map.
752 * <p>
753 * This map is immutable, and is computed on first use.
754 * <p>
755 * This map is transient (ie not stored in the persistent state),
756 * and volatile for thread-safe lockless access.
757 */
758 private transient volatile Map<String,Integer> dottedExtensionExhibitCounts;
759
760 /**Get a map from dotted-file-extension (String) to exhibit count for that author (Integer); never null.
761 * Only extensions that have have one or more exhibits are stored in the map.
762 * <p>
763 * This map is immutable, and is computed on first use.
764 */
765 public final Map<String,Integer> getDottedExtensionExhibitCounts()
766 {
767 Map<String,Integer> result = dottedExtensionExhibitCounts;
768 if(result == null)
769 {
770 // Needs (re)computing.
771 final HashMap<String,Integer> rawResult = new HashMap<String,Integer>(); // For speed.
772 for(final Name.ExhibitFull fullName : aeid.getAllExhibitNamesSorted())
773 {
774 final String dExt = "." + ExhibitName.getExtensionComponent(fullName);
775 final Integer oldCount = rawResult.get(dExt);
776 if(oldCount == null) { rawResult.put(dExt, ONE); }
777 else { rawResult.put(dExt, Integer.valueOf(oldCount.intValue() + 1)); }
778 }
779 dottedExtensionExhibitCounts = result = Collections.unmodifiableMap(rawResult);
780 }
781 return(result);
782 }
783
784
785
786
787 /**Returns set of available extensions, sorted case insensitively.
788 * This may be more than actually appear in the data set
789 * (and this result does not depend on the data set, in fact).
790 * This result contains no duplicates.
791 * <p>
792 * These are suffixes starting with `.' eg ``.jpg''.
793 * <p>
794 * Never returns null.
795 */
796 public static String[] getLegalSuffixes()
797 {
798 final ExhibitMIME.ExhibitTypeParameters types[] = ExhibitMIME.getAllValidExhibitTypes();
799 final Set<String> suffixes = new HashSet<String>(types.length * 2 + 1);
800 for(int i = types.length; --i >= 0; )
801 { suffixes.add(types[i].dotSuffixForInputFile); }
802 final String result[] = new String[suffixes.size()];
803 suffixes.toArray(result);
804 Arrays.sort(result, String.CASE_INSENSITIVE_ORDER);
805 return(result);
806 }
807
808
809
810
811 /**Get total size in bytes of all exhibits; non-negative.
812 * Computed on first use.
813 */
814 public long getTotalExhibitBytes()
815 {
816 long totalBytes = exhibitBytes; // One read access in normal case.
817 if(totalBytes == 0) // Recompute.
818 {
819 for(final ExhibitStaticAttr esa : aeid.getAllStaticAttrs())
820 { totalBytes += esa.length; }
821 exhibitBytes = totalBytes; // Cache result.
822 }
823 return(totalBytes);
824 }
825
826 /**Total size in bytes of all exhibits.
827 * A zero value is taken to mean "not computed", though in the case where
828 * there are no exhibits the resulting redundant recomputations will be cheap.
829 * <p>
830 * This is transient since it can be recalculated on demand.
831 * <p>
832 * This is volatile to allow safe lock-free access.
833 */
834 private transient volatile long exhibitBytes;
835
836
837 /**Returns a Map from attribute (String word) to a Set of full names of all exhibits with each attribute; never null.
838 * An exhibit can have a particular attribute either
839 * by having the attribute in the name (in the usual place at the end),
840 * or in some circumstances by the exhibit type or content being examined.
841 * <p>
842 * The key set is from ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet().
843 * <p>
844 * The returned value is immutable (and the keys and values are immutable)
845 * and is never null.
846 * <p>
847 * Attributes which no exhibits have are present in the Map
848 * with an empty Set as the mapped-to value, not null.
849 */
850 public Map<String,Set<Name.ExhibitFull>> getExhibitsByAttribute()
851 {
852 Map<String,Set<Name.ExhibitFull>> result; // Immutable non-null result.
853
854 synchronized(_exhibitsByAttribute_lock)
855 {
856 if(null == (result = exhibitsByAttribute.get()))
857 { // Needs recomputing.
858 final SortedSet<String> attrWords = ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet();
859
860 // We use a HashMap for lookup speed.
861 final Map<String,Set<Name.ExhibitFull>> work = new HashMap<String,Set<Name.ExhibitFull>>(attrWords.size() * 2 + 1);
862 // Make empty Set entries for each attribute word.
863 for(final String attrWord : attrWords)
864 {
865 // We use HashSet for eventual lookup speed.
866 work.put(attrWord, new HashSet<Name.ExhibitFull>());
867 }
868
869 // Now go through all the exhibits,
870 // adding them all by their explicit attribute words.
871 for(final Name.ExhibitFull fullName : aeid.getAllExhibitNamesSorted())
872 {
873 // Get the attribute words for this to be filed under.
874 final Enumeration<?> en = ExhibitName.getAttributeWordsComponentEnumeration(
875 fullName, attrWords);
876 if(en != null)
877 {
878 while(en.hasMoreElements())
879 {
880 final String attrWord = (String) en.nextElement();
881 work.get(attrWord).add(fullName);
882 }
883 }
884 }
885
886 // Now replace all the Sets in the Map with unmodifiable ones,
887 // In particular replacing empty sets with the static singleton.
888 final Set<Name.ExhibitFull> noWords = Collections.emptySet();
889 for(final Iterator<String> it = work.keySet().iterator(); it.hasNext(); )
890 {
891 final String attrWord = it.next();
892 final Set<Name.ExhibitFull> s = work.get(attrWord);
893 if(s.size() == 0)
894 { work.put(attrWord, noWords); }
895 else
896 { work.put(attrWord, Collections.unmodifiableSet(s)); }
897 }
898
899 // Now make the whole map unmodifiable.
900 result = Collections.unmodifiableMap(work);
901 exhibitsByAttribute = new SoftReference<Map<String,Set<ExhibitFull>>>(result);
902 }
903
904 return(result);
905 }
906 }
907
908 /**Returns an immutable Map from attribute (String word) to a Set of full names of all exhibits with each attribute; never null.
909 * An exhibit can have a particular attribute either
910 * by having the attribute in the name (in the usual place at the end),
911 * or in some circumstances by the exhibit type or content being examined.
912 * <p>
913 * The key set is from ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet().
914 * <p>
915 * The returned value is immutable (and the keys and values are immutable)
916 * and is never null.
917 * <p>
918 * Attributes which no exhibits have are present in the Map
919 * with zero as the mapped-to value, not null.
920 */
921 public Map<String,Integer> getExhibitCountsByAttribute()
922 {
923 Map<String,Integer> result = exhibitCountsByAttribute;
924 if(null == result) // Recompute...
925 {
926 // Compute somewhat expensively via the full map.
927 // This has the advantage of exactly matching its semantics.
928 final Map<String,Set<Name.ExhibitFull>> m = getExhibitsByAttribute();
929 final Map<String,Integer> r = new HashMap<String, Integer>(1 + 2*m.size());
930 for(final String s : m.keySet())
931 { r.put(s, Integer.valueOf(m.get(s).size())); }
932 exhibitCountsByAttribute = result = Collections.unmodifiableMap(r);
933 }
934 return(result);
935 }
936
937 /**A Map from attribute (String word) to a Set of full names of all exhibits with each attribute; never null though the referent may be.
938 * An exhibit can have a particular attribute either
939 * by having the attribute in the name (in the usual place at the end),
940 * or in some circumstances by the exhibit type or content being examined.
941 * <p>
942 * This is immutable, and is transient and recomputed on first use,
943 * and/or after being flushed due to memory shortage,
944 * being entirely derived from other information in this AEP.
945 * <p>
946 * We hold a private lock while recomputing this information
947 * to allow other processing to continue in parallel with this
948 * possibly slow task.
949 */
950 private transient SoftReference<Map<String,Set<Name.ExhibitFull>>> exhibitsByAttribute = new SoftReference<Map<String,Set<ExhibitFull>>>(null);
951
952 /**A Map from attribute (String word) to a count of full names of all exhibits with each attribute; null until first use.
953 * Set/computed when exhibitsByAttribute is
954 * but not discarded as should require little memory.
955 */
956 private transient volatile Map<String,Integer> exhibitCountsByAttribute;
957
958 /**Private lock for manipulating exhibitsByAttribute; must use construction to create correctly. */
959 private final transient Object _exhibitsByAttribute_lock = new Object();
960
961
962 /**Get case-sensitive Set of all main words (String values) in extant exhibits; never null.
963 * Result will be empty if there are no exhibits.
964 * <p>
965 * Computed on demand (and cached) under a private lock as this may be slow
966 * and we do not want to waste CPU time on redundant computations.
967 */
968 public Set<String> getMainWords()
969 {
970 Set<String> result;
971
972 synchronized(_mainWords_lock)
973 {
974 if(null == (result = mainWords.get()))
975 {
976 final List<Name.ExhibitFull> allNames = aeid.getAllExhibitNamesSorted();
977 final Set<String> allAttrWords = ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet();
978
979 // Create the result making an initial conservative guess at size.
980 final Set<String> work = new HashSet<String>(3 * allNames.size()); // HashSet for lookup speed.
981
982 for(final Name.ExhibitFull fullName : allNames)
983 {
984 final Enumeration<?> words = ExhibitName.getMainWords(fullName, allAttrWords);
985 while(words.hasMoreElements())
986 { work.add((String) words.nextElement()); }
987 }
988
989 // Cache as an immutable value.
990 result = Collections.unmodifiableSet(work);
991 mainWords = new SoftReference<Set<String>>(result);
992 }
993
994 return(result); // Can safely hand out immutable.
995 }
996 }
997
998 /**Case-sensitive immutable cache (transient, possibly memory-sensitive) of Set of all exhibit main words; never null but referent may be.
999 * Computed on first use, and possibly recomputed after a memory shortage.
1000 * <p>
1001 * Accessed under the _mainWords_lock.
1002 * <p>
1003 * Used, for example, by the upload routines to warn of possibly-misspelt words.
1004 */
1005 private transient SoftReference<Set<String>> mainWords = new SoftReference<Set<String>>(null);
1006
1007 /**Private lock for manipulating mainWords; must use constructor to get set up correctly. */
1008 private final transient Object _mainWords_lock = new Object();
1009
1010
1011 /**Private cache; Map from full exhibit name to ExhibitPropsComputableMutable.
1012 * May or may not be serialised (may be transient to economise on space);
1013 * never null if a constructor has been called.
1014 * <p>
1015 * A thread-safe Map implementation is used;
1016 * one that allows decent concurrency, especially without locking.
1017 */
1018 private transient final ConcurrentMap<Name.ExhibitFull,ExhibitPropsComputableMutable> epcmMap =
1019 _createNewEpcmMap(1001);
1020
1021 /**Recover EPCM (and possibly other details) from old AEP.
1022 * This can save a great deal of recomputation after an AEP update.
1023 * <p>
1024 * Will only use details from old AEP:
1025 * <ul>
1026 * <li>That match current exhibits.
1027 * <li>That are not trivially stale.
1028 * <li>If we have no such data at all
1029 * (to prevent later overwriting of newly-computed data).
1030 * </ul>
1031 * <p>
1032 * This should not be called once threads other than the caller have access
1033 * to this AEP since after performing our initial checks there is a danger
1034 * of races that might leave old data in place.
1035 * <p>
1036 * This does not compute any new entries.
1037 */
1038 public void recoverOldExhibitPropsComputableMutableData(final AllExhibitProperties oldAEP,
1039 final SimpleLoggerIF logger)
1040 {
1041 if(oldAEP == null) { throw new IllegalArgumentException(); }
1042
1043 final ConcurrentMap<Name.ExhibitFull,ExhibitPropsComputableMutable> m = epcmMap;
1044 assert(m != null);
1045
1046 // If map is non-empty then don't accept data from old AEP.
1047 // This avoids accidentally overwriting fresh values with stale ones
1048 // or adding stale/dubious values to a live AEP instance.
1049 if(!m.isEmpty()) { return; }
1050
1051 // If old map is empty then there is nothing to recover.
1052 final ConcurrentMap<Name.ExhibitFull,ExhibitPropsComputableMutable> oldM = oldAEP.epcmMap;
1053 if((oldM == null) || (oldM.isEmpty())) { return; }
1054
1055 if(logger != null) { logger.log("[AllExhibitProperties: recovering EPCM entries: old map size: "+oldM.size()+".]"); }
1056
1057 for(final Name.ExhibitFull exhibitName : aeid.getAllExhibitNamesSorted())
1058 {
1059 final ExhibitPropsComputableMutable oldECPM =
1060 oldM.get(exhibitName);
1061 // No useful ECPM value to retain.
1062 if((oldECPM == null) || oldECPM.isTriviallyStale())
1063 { continue; }
1064 // Retain old value if we have not newly computed another result in the interim.
1065 final ExhibitPropsComputableMutable newlyComputedValue = m.putIfAbsent(exhibitName, oldECPM);
1066 // If the newly-computed result is more stale than the old one,
1067 // try just once more to retain the old value.
1068 if((newlyComputedValue != null) && newlyComputedValue.isTriviallyStale() && !oldECPM.isStale())
1069 { m.replace(exhibitName, newlyComputedValue, oldECPM); }
1070 }
1071
1072 if(logger != null) { logger.log("[AllExhibitProperties: finished recovering EPCM entries: new map size: "+m.size()+".]"); }
1073 }
1074
1075 /**If true, then we may speculatively spin off threads to compute full EPCM values even when stale ones OK. */
1076 private static final boolean COMPUTE_FULL_EPCM_RESULT_SPECULATIVELY_IN_BG = false;
1077
1078 /**Get ExhibitPropsComputableMutable for specified exhibit; null if no such exhibit or no result yet computed.
1079 * Returns whatever is currently available as-is and
1080 * never forces any (re)computation of values
1081 * nor blocks (on any externally-visible lock).
1082 *
1083 * @param exhibitName full name of exhibit for which data is desired; never null
1084 */
1085 public ExhibitPropsComputableMutable getExhibitPropsComputableMutable(final Name.ExhibitFull exhibitName)
1086 {
1087 // Get existing cached value, if any.
1088 return(epcmMap.get(exhibitName));
1089 }
1090
1091 /**Get ExhibitPropsComputableMutable for specified exhibit within given time; null if no such exhibit or out of time.
1092 * If the "allowStale" parameter is false,
1093 * the system will make best efforts to compute an accurate, non-stale value.
1094 * However, this may not be possible for a number of contingent reasons,
1095 * such as transient I/O errors.
1096 * <p>
1097 * Equally, if the "allowStale" flag is true,
1098 * there may be a non-stale value cached that can be returned.
1099 * <p>
1100 * So the "allowStale" parameter should be regarded as a strong hint at most.
1101 * <p>
1102 * This will spend up to the specified amount of time attempting to
1103 * (re)compute the value if needed,
1104 * else will return what (if anything) is already available.
1105 * If there is a shortage of resources (eg threads)
1106 * then this may not succeed in (re)calculation.
1107 *
1108 * @param exhibitName full name of exhibit for which data is desired; never null
1109 * @param allowStale allows "approximate" or "stale" version to be returned
1110 * if need be, ie does not force re-computation of new value,
1111 * useful if site is busy or time is short
1112 * @param gp system properties;
1113 * never null and not default/empty for non-stale result
1114 * @param dataSource source of event and exhibit (meta) data;
1115 * must not be null if a non-stale result is desired
1116 * @param scorers source of exhibit-content based scoring;
1117 * must not be null if a non-stale result is desired
1118 * @param maxTimeMs maximum time (ms) to spend computing result;
1119 * strictly positive
1120 *
1121 * @return non-null result if exhibit exists and we don't run out of time or other resources;
1122 * possibly stale/approximate if allowStale==true or gp/dsb are empty/null
1123 */
1124 public ExhibitPropsComputableMutable getExhibitPropsComputableMutable(final Name.ExhibitFull exhibitName,
1125 final boolean allowStale,
1126 final GenProps gp,
1127 final SimpleExhibitPipelineIF dataSource,
1128 final ScorerCacheIF scorers,
1129 final long maxTimeMs)
1130 {
1131 if(maxTimeMs <= 0) { throw new IllegalArgumentException(); }
1132
1133 // Find what is already available.
1134 final ExhibitPropsComputableMutable initialResult = getExhibitPropsComputableMutable(exhibitName);
1135 // If the caller will accept stale or the extant answer isn't
1136 // then we can return any extant non-null value immediately!
1137 if(initialResult != null)
1138 {
1139 if(allowStale || !initialResult.isStale())
1140 { return(initialResult); }
1141 }
1142
1143 // Spend a capped time attempting recomputation.
1144 // If short of threads then we may not do this at all.
1145 // We must use a pool with a 'discard' rather than 'caller runs' policy
1146 // so that we cannot be blocked nor use excessive resources.
1147 // TODO: may wish to use normal-priority pool in future.
1148 // We don't directly harvest the result: we rely on it being cached.
1149 final Future<?> f = ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable() {
1150 public void run() { getExhibitPropsComputableMutable(exhibitName, allowStale, gp, dataSource, scorers); }
1151 });
1152 // Don't block indefinitely.
1153 try { f.get(maxTimeMs, TimeUnit.MILLISECONDS); }
1154 catch(final TimeoutException e) { /* Ran out of time, just ignore. */ }
1155 catch(final Exception e) { e.printStackTrace(); /* Unexpected but not fatal. */ }
1156
1157 // Return latest-available value: best efforts only.
1158 return(getExhibitPropsComputableMutable(exhibitName));
1159 }
1160
1161 /**Get ExhibitPropsComputableMutable for specified exhibit; null if no such exhibit else never null.
1162 * If the "allowStale" parameter is false,
1163 * the system will make best efforts to compute an accurate, non-stale value.
1164 * However, this may not be possible for a number of contingent reasons,
1165 * such as transient I/O errors.
1166 * <p>
1167 * Equally, if the "allowStale" flag is true,
1168 * there may be a non-stale value cached that can be returned.
1169 * <p>
1170 * So the "allowStale" parameter should be regarded as a strong hint at most.
1171 *
1172 * @param exhibitName full name of exhibit for which data is desired; never null
1173 * @param allowStale allows "approximate" or "stale" version to be returned
1174 * if need be, ie does not force re-computation of new value,
1175 * useful if site is busy or time is short
1176 * @param gp system properties;
1177 * never null and not default/empty for non-stale result
1178 * @param dataSource source of event and exhibit (meta) data;
1179 * must not be null if a non-stale result is desired
1180 * @param scorers source of exhibit-content based scoring;
1181 * must not be null if a non-stale result is desired
1182 *
1183 * @return non-null result if exhibit exists;
1184 * possibly stale/approximate if allowStale==true or gp/dsb are empty/null
1185 */
1186 public ExhibitPropsComputableMutable getExhibitPropsComputableMutable(final Name.ExhibitFull exhibitName,
1187 final boolean allowStale,
1188 final GenProps gp,
1189 final SimpleExhibitPipelineIF dataSource,
1190 final ScorerCacheIF scorers)
1191 {
1192 // Get internal representation of name to save memory if creating new entry,
1193 // else reject request if exhibit name is not valid/current.
1194 final ExhibitStaticAttr esa = aeid.getStaticAttr(exhibitName);
1195 if(esa == null) { return(null); /* No such exhibit. */ }
1196 final Name.ExhibitFull ex = esa.getExhibitFullName(); // Shared representation of name to save heap.
1197
1198 // Get current map.
1199 final ConcurrentMap<Name.ExhibitFull,ExhibitPropsComputableMutable> m = epcmMap;
1200 assert(m != null) : "epcmMap cannot be null now";
1201
1202 // Get existing cached value, if any.
1203 final ExhibitPropsComputableMutable cachedValue = m.get(ex);
1204 final boolean cachedValueIsNull = (cachedValue == null);
1205
1206 if(allowStale)
1207 {
1208 if(cachedValueIsNull)
1209 {
1210 // Quickly create an approximate and (probably) stale entry.
1211 final ExhibitPropsComputableMutable result = ExhibitPropsComputableMutable.generateFastApproximation(esa, gp);
1212 // Never replace a non-null mapping to avoid overwriting a newer value.
1213 m.putIfAbsent(ex, result);
1214
1215 // If we superficially have all the required data sources available
1216 // (we leave fine sufficiency judgements up to the EPCM calculation routine)
1217 // then attempt to create fully up-to-date value in the background.
1218 // We severely limit the resources consumed by this speculative computation,
1219 // and never block waiting for the computation since it may take a LONG time.
1220 if(COMPUTE_FULL_EPCM_RESULT_SPECULATIVELY_IN_BG &&
1221 result.isStale() &&
1222 (aeid.length != 0) &&
1223 (gp != null) && (gp.timestamp != 0) &&
1224 (dataSource != null) &&
1225 (scorers != null))
1226 {
1227 // Recursively call to try to compute and cache an up-to-date value.
1228 // The allowStale flag is flipped thus preventing further recursion.
1229 ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable(){
1230 public final void run()
1231 { getExhibitPropsComputableMutable(exhibitName, false, gp, dataSource, scorers); }
1232 });
1233 }
1234
1235 // Return probably stale/incomplete result.
1236 return(result);
1237 }
1238
1239 // Fall through to return non-null cached value.
1240 }
1241 else // Non-stale value requested.
1242 {
1243 // Force our cached entry up-to-date if absent/stale,
1244 // given that stale is not good enough for the caller.
1245 if(cachedValueIsNull || cachedValue.isStale())
1246 {
1247 // Compute full result if possible,
1248 // but fall back to "stale" result if necessary so as to avoid returning null.
1249 try
1250 {
1251 final ExhibitPropsComputableMutable result = ExhibitPropsComputableMutable.compute(
1252 esa, gp, this, dataSource, _vcCache, scorers);
1253 // Avoid a race resulting in older value overwriting a newer concurrently-computed one.
1254 if(cachedValueIsNull) { m.putIfAbsent(ex, result); }
1255 // But don't in any case risk overwriting a non-trivially-stale value
1256 // with a new trivially stale value.
1257 else if(!result.isTriviallyStale()) { m.replace(ex, cachedValue, result); }
1258
1259 // To try to avoid throwing away too much work through over-caution,
1260 // if the original value was replaced while we were working,
1261 // but the value now stored is a stale value (and this new one is not)
1262 // then attempt once more to atomically replace the stored value
1263 // with our non-stale value.
1264 if(!result.isStale())
1265 {
1266 final ExhibitPropsComputableMutable stored = m.get(ex);
1267 assert(stored != null);
1268 if((stored != result) && stored.isStale())
1269 { m.replace(ex, stored, result); }
1270 }
1271
1272 return(result);
1273 }
1274 catch(final Exception e)
1275 {
1276 // If full calculation failed then try for a fast approximation
1277 // iff there is currently nothing present at all.
1278 if(cachedValueIsNull)
1279 {
1280 final ExhibitPropsComputableMutable result = ExhibitPropsComputableMutable.generateFastApproximation(esa, gp);
1281 // Avoid race causing older value to overwrite a newer concurrently-computed one.
1282 m.putIfAbsent(ex, result);
1283 return(result);
1284 }
1285 }
1286 }
1287
1288 // Fall through to return non-null non-stale cached value.
1289 }
1290
1291 assert(cachedValue != null) : "result cannot be null";
1292 return(cachedValue);
1293 }
1294
1295 /**Private vote/correlation cache; never null after construction/deserialisation.
1296 * This state could be serialised to save some recomputation on re-use.
1297 * <p>
1298 * This is thread-safe.
1299 */
1300 private final transient ExhibitPropsComputableMutableVoteCacheIF _vcCache =
1301 new ExhibitPropsComputableMutableVoteCache();
1302
1303 /**Bring up-to-date any vote/correlation data.
1304 * Useful to call from a background thread from time to time
1305 * in order that all correlations will be precomputed when required.
1306 *
1307 * @param vars non-null handle on system variables
1308 * @param noTimeLimit if true, this will try to complete the work in one go
1309 */
1310 public void updateVoteCache(final SimpleVariablePipelineIF vars,
1311 final boolean noTimeLimit)
1312 throws IOException
1313 { _vcCache.update(this, vars, noTimeLimit); }
1314
1315 /**Create new epcmMap; never null.
1316 * This is where we set policy on map segments, load factor, etc.
1317 * <p>
1318 * We use a concurrency-friendly, lock-free Map for lookup and update speed.
1319 * We don't expect lots of concurrent writes,
1320 * so a few segments (fewer than say 10) should be OK.
1321 */
1322 private static ConcurrentHashMap<Name.ExhibitFull, ExhibitPropsComputableMutable> _createNewEpcmMap(final int capacity)
1323 {
1324 return(new ConcurrentHashMap<Name.ExhibitFull, ExhibitPropsComputableMutable>(
1325 capacity, 0.75f, 4));
1326 }
1327
1328
1329 /**Get exhibit Location information, specific or generic, if any; Location.NONE if none, never null.
1330 * This tries first for a "specific" location for this exhibit,
1331 * then tries for a generic location from the pattern-matching mechanism.
1332 *
1333 * @param exhibitName the full name of the exhibit; must not be null
1334 * @return the location information for this exhibit; NONE if none
1335 */
1336 public Location.Base getLocation(final Name.ExhibitFull exhibitName)
1337 {
1338 // Try for "specific" location information first.
1339 final Location.Base ls =
1340 getExhibitPropsLoadable(exhibitName).getLocation();
1341 if((ls != null) &&
1342 (!ls.equals(Location.NONE))) // Possibly redundant.
1343 { return(ls); }
1344
1345 // OK: should find any specific information, so try for generic info.
1346 final Location.Base lg = epgi.getLocationMap().
1347 locLookup(exhibitName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
1348 if(lg != null)
1349 { return(lg); }
1350
1351 // No location found.
1352 return(Location.NONE);
1353 }
1354
1355
1356 /**Immutable map from MD5 hash to exhibit full name; never null but referent my be null.
1357 * Volatile for lock-free thread-safe access.
1358 * <p>
1359 * Transient; (re)computed on demand after deserialisation and/or after memory shortage.
1360 */
1361 private volatile transient SoftReference<Map<ROByteArray, Name.ExhibitFull>> hashMD5ToNameSR = new SoftReference<Map<ROByteArray,ExhibitFull>>(null);
1362
1363 /**Immutable Set of all exhibits with duplicate or missing MD5 hashes; null until computed on first use.
1364 * Ideally this is empty; assumed to be small.
1365 * <p>
1366 * Volatile for lock-free thread-safe access.
1367 * <p>
1368 * Transient; (re)computed on demand after deserialisation.
1369 */
1370 private volatile transient Set<Name.ExhibitFull> hashMD5Err;
1371
1372 /**Get immutable map from MD5 hash to valid exhibit full name; never null.
1373 * Computed on first use.
1374 * <p>
1375 * Sets internal hashMD5ToNameSR and hashMD5Err on first use as a side-effect.
1376 * Note that hashMD5Err is never cleared again to avoid races in getHashMD5Err().
1377 */
1378 public Map<ROByteArray, Name.ExhibitFull> getHashMD5ToName()
1379 {
1380 Map<ROByteArray, Name.ExhibitFull> hashMD5ToName;
1381
1382 if(null == (hashMD5ToName = hashMD5ToNameSR.get()))
1383 {
1384 // Compute on first use...
1385
1386 // We try not to waste lots of space in this map.
1387 // We use a HashMap for lookup speed.
1388 final HashMap<ROByteArray, Name.ExhibitFull> m = new HashMap<ROByteArray, Name.ExhibitFull>(aeid.length + (aeid.length/4) + 1, 0.81f);
1389
1390 // Collect the "error" list too.
1391 final HashSet<Name.ExhibitFull> e = new HashSet<Name.ExhibitFull>();
1392
1393 for(final Name.ExhibitFull fullName : aeid.getAllExhibitNamesSorted())
1394 {
1395 final ExhibitPropsLoadable exhibitPropsLoadable = getExhibitPropsLoadable(fullName);
1396 final AccessionData accessionMetadata = exhibitPropsLoadable.getAccessionMetadata();
1397 if(accessionMetadata == null)
1398 {
1399 // No meta-data, so this goes in the error list...
1400 e.add(fullName);
1401 continue;
1402 }
1403 final ROByteArray hashMD5 = accessionMetadata.hashMD5;
1404 if(hashMD5 == null)
1405 {
1406 // No MD5 hash, so this goes in the error list...
1407 e.add(fullName);
1408 continue;
1409 }
1410 // Insert into table.
1411 final Name.ExhibitFull prev = m.put(hashMD5, fullName);
1412 if(prev != null)
1413 {
1414 assert(ExhibitName.validNameSyntax(prev));
1415 // Previous value falls into error bucket...
1416 e.add(prev);
1417 }
1418 }
1419
1420 // OK, save results, errors first to avoid races in getHashMD5Err().
1421 if(e.isEmpty()) { hashMD5Err = Collections.<ExhibitFull>emptySet(); }
1422 else { hashMD5Err = Collections.unmodifiableSet(e); }
1423 hashMD5ToName = Collections.unmodifiableMap(m);
1424 hashMD5ToNameSR = new SoftReference<Map<ROByteArray,ExhibitFull>>(hashMD5ToName);
1425 }
1426
1427 return(hashMD5ToName);
1428 }
1429
1430 /**Get immutable Set of all exhibits with duplicate or missing MD5 hashes; never null.
1431 * Ideally this is empty.
1432 */
1433 public Set<Name.ExhibitFull> getHashMD5Err()
1434 {
1435 if(hashMD5Err == null)
1436 {
1437 getHashMD5ToName(); // Get our value recomputed as a side-effect, race-free.
1438 }
1439 final Set<Name.ExhibitFull> result = hashMD5Err;
1440 assert(result != null); // Result is never null.
1441 return(result);
1442 }
1443
1444
1445 /**Filter class for exhibits in AEP.
1446 * If the accept() method returns false then the given exhibit
1447 * is excluded from the results of select().
1448 */
1449 public static interface AEPFilter
1450 {
1451 public boolean accept(AllExhibitProperties aep, Name.ExhibitFull fullExhibitName);
1452 }
1453
1454 /**Filter results optionally starting with an input set of exhibit names; never null.
1455 * This returns a List of full exhibit name results,
1456 * starting with the given input set of names,
1457 * or all exhibits (set up in smart-sorted order) if null is passed in.
1458 * <p>
1459 * This search may be parallelised.
1460 * <p>
1461 * Note that is its up to the filters to decide whether to
1462 * call getExhibitPropsComputable() to guarantee correct answers
1463 * or getExhibitPropsComputableIfPresent() to guarantee fast answers.
1464 * <p>
1465 * If the stopAfter parameter is greater than zero,
1466 * then this routine will stop as soon as possible after stopAfter results,
1467 * and will then truncate to exactly stopAfter results.
1468 * <p>
1469 * The results array is sorted in a `smart'ish way.
1470 * <p>
1471 * The filter argument must not be null.
1472 *
1473 * @param f filter to apply; never null
1474 * @param initialNames initial set of names in non-null, else all names
1475 * @param stopAfter +ve number of results to stop after, or zero for no limit
1476 *
1477 * @return non-null, possibly empty, smart-sorted full exhibit names
1478 */
1479 public Name.ExhibitFull[] select(final AEPFilter f,
1480 List<Name.ExhibitFull> initialNames,
1481 final int stopAfter)
1482 // throws IOException
1483 {
1484 assert(f != null);
1485 if(initialNames == null) { initialNames = aeid.getAllExhibitNamesSorted(); }
1486
1487 // Accumulate results here.
1488 List<Name.ExhibitFull> result = new ArrayList<Name.ExhibitFull>();
1489
1490 // Do the filtering...
1491 for(int i = initialNames.size(); --i >= 0; )
1492 {
1493 final Name.ExhibitFull thisExhibit = initialNames.get(i);
1494 if(!f.accept(this, thisExhibit)) { continue; }
1495 result.add(thisExhibit);
1496
1497 if((stopAfter > 0) && (result.size() >= stopAfter)) { break; }
1498 }
1499
1500 // Truncate results to size of need be.
1501 if((stopAfter > 0) && (result.size() > stopAfter))
1502 { result = result.subList(0, stopAfter); }
1503
1504 // Generate (sorted) array form and return...
1505 final Name.ExhibitFull r[] = new Name.ExhibitFull[result.size()];
1506 result.toArray(r);
1507 Arrays.sort(r, ExhibitName.SIMPLE_SMART_ORDER);
1508 return(r);
1509 }
1510
1511 /**Generate human-readable summary of this instance's internal state. */
1512 @Override
1513 public String toString()
1514 {
1515 final StringBuilder sb = new StringBuilder(128);
1516 sb.append("aeid.length=").append(aeid.length);
1517 sb.append(",epgi=").append(epgi);
1518 return(sb.toString());
1519 }
1520
1521 /**Returns a hash code value for the object; derived from the longHash.
1522 * Guaranteed zero if there are no exhibits, non-zero otherwise.
1523 *
1524 * @return a hash code value for this object.
1525 * @see Object#equals(Object)
1526 * @see Hashtable
1527 */
1528 @Override
1529 public int hashCode()
1530 {
1531 return(((int) (longHash >>> 32)) ^ (int) longHash);
1532 }
1533
1534 /**Indicates whether some other object is "equal to" this one; the underlying data is the same if true.
1535 * The hashNotChangedSince value is excluded from the comparison.
1536 * <p>
1537 * Loaded data is included in this notion of equality;
1538 * computed/derived data is excluded.
1539 * The construction time and "unchanged since" values are also excluded.
1540 * <p>
1541 * This routine is likely to be very slow,
1542 * especially to return a "true" result,
1543 * ie fully verify that two instances are identical in all significant aspects.
1544 *
1545 * @param obj the reference object with which to compare.
1546 * @return <code>true</code> if this object is the same as the obj
1547 * argument; <code>false</code> otherwise.
1548 * @see Boolean#hashCode()
1549 * @see Hashtable
1550 */
1551 @Override
1552 public synchronized boolean equals(final Object obj)
1553 {
1554 if(this == obj) { return(true); }
1555 // Must be of the same type to be equal.
1556 if(!(obj instanceof AllExhibitProperties)) { return(false); }
1557 final AllExhibitProperties other = (AllExhibitProperties) obj;
1558
1559 // We can quickly reject items that don't have the same hash...
1560 // (Else hashCode() may be inconsistent with equals().)
1561 if(longHash != other.longHash) { return(false); }
1562
1563 // Check the underlying immutable and other source data.
1564 if(!aeid.equals(other.aeid)) { return(false); }
1565 if(!epgi.equals(other.epgi)) { return(false); }
1566 if(!loadedProps.equals(other.loadedProps)) { return(false); }
1567 if(!computedProps.equals(other.computedProps)) { return(false); }
1568
1569 // OK, seems to actually be equal.
1570 return(true);
1571 }
1572
1573 /**Compact the internal representation of this instance (and its sub-objects) if possible.
1574 * This has no effect on the logical content of this instance in-memory or serialised,
1575 * is guaranteed to be safe to run concurrently with other uses of this instance
1576 * (and will take any locks as needed to work incrementally),
1577 * and may do nothing but consume some CPU cycles.
1578 * <p>
1579 * This may be able to convert some state to a more memory-efficient representation
1580 * after construction or deserialisation,
1581 * and is suitable to call in a background thread.
1582 * <p>
1583 * We don't prevent multiple concurrent calls to this routine,
1584 * since they are at worst wasteful of CPU but not unsafe.
1585 * <p>
1586 * (Should not be called on a partially-constructed object.)
1587 */
1588 public void compact()
1589 {
1590 // ASAP discard easily/quickly recomputable state if memory is stressed.
1591 if(MemoryTools.isMemoryStressed())
1592 {
1593 exhibitsByAttribute.clear();
1594 hashMD5ToNameSR.clear();
1595 mainWords.clear();
1596 }
1597
1598 // Anything beyond this point is not worth running more than once.
1599 // (It's not unsafe to run concurrently, so we don't lock out multiple threads.)
1600 if(_compacted) { return; }
1601
1602 // Iterate over all compact()able per-exhibit data...
1603 // Only effective the first time through, so (usually) only run once per instance.
1604 for(final Name.ExhibitFull exhibitName : aeid.getAllExhibitNamesSorted())
1605 {
1606 try
1607 {
1608 final ExhibitPropsComputable epc = getExhibitPropsComputable(exhibitName);
1609 if(epc != null) { epc.compact(); }
1610 final ExhibitPropsLoadable epl = getExhibitPropsLoadable(exhibitName);
1611 if(epl != null) { epl.compact(); }
1612 }
1613 catch(final RuntimeException e)
1614 {
1615 // Ignore (but log) any transient problems.
1616 e.printStackTrace();
1617 }
1618 }
1619
1620 _compacted = true;
1621 }
1622
1623 /**Set true once compact() has run the first time.
1624 * Marked volatile for thread-safe lock-free access.
1625 */
1626 private transient volatile boolean _compacted;
1627
1628 /**For Compactable interface, or null if none. */
1629 public String getCompactableInstanceName() { return("AEP|"+longHash); }
1630
1631
1632 /**Estimate of bytes taken up by uncompressed serialised form; strictly positive.
1633 * This is only a hand-waving empirical estimate,
1634 * and may not actually inspect the internal state much.
1635 * <p>
1636 * The result is capped at Integer.MAX_VALUE.
1637 */
1638 public int estimateSerialBytes()
1639 {
1640 final long result = 1024 + // Allow for some fixed overhead.
1641 (((long) aeid.length) << 11); // Allow about 2kB per exhibit on average.
1642 assert(result > 0);
1643 if(result > Integer.MAX_VALUE) { return(Integer.MAX_VALUE); }
1644 return((int) result);
1645 }
1646
1647 /**Deserialise: use constructor for validation, defensive copying, renormalisation, etc.
1648 */
1649 protected Object readResolve()
1650 // throws ObjectStreamException
1651 {
1652 // Construct new instance of object in normal defensive way.
1653 // Note: allow the hash to be recomputed to suit any algorithm changes;
1654 // we keep the hashUnchangedSince value iff the hash doesn't change on recomputation.
1655 return(new AllExhibitProperties(this, epgi, aeid, loadedProps, computedProps, 0));
1656 }
1657
1658
1659 /**Validate fields/state.
1660 * Called in the constructor and possibly after de-serialising.
1661 * <p>
1662 * Barf if something bad is found.
1663 * (Maybe allow some extra info in debug version.)
1664 */
1665 public void validateObject()
1666 throws InvalidObjectException
1667 {
1668 // Check that all components are sane and safe.
1669 if(longHash < 0)
1670 { throw new InvalidObjectException("bad object: longHash < 0"); }
1671 if(constructedAt < 0) // We could have a tighter check, eg > 2001/01/01.
1672 { throw new InvalidObjectException("bad object: constructedAt < 0"); }
1673 if(hashNotChangedSince < 0) // We could have a tighter check, eg > 2001/01/01.
1674 { throw new InvalidObjectException("bad object: hashNotChangedSince < 0"); }
1675 if(aeid == null)
1676 { throw new InvalidObjectException("bad object: aeid == null"); }
1677 if(aeid.timestamp < 0) // Sheer nosiness!
1678 { throw new InvalidObjectException("bad object: aeid.timestamp < 0"); }
1679 if(epgi == null)
1680 { throw new InvalidObjectException("bad object: epgi == null"); }
1681 // Make sure that loadedProps maps valid exhibits to ExhibitPropsLoadable items.
1682 if(loadedProps == null)
1683 { throw new InvalidObjectException("bad object: loadedProps == null"); }
1684 for(final Iterator<?> it = loadedProps.keySet().iterator(); it.hasNext(); )
1685 {
1686 final Object key = it.next();
1687 // Allow String keys for deserialising old-format objects ONLY...
1688 if(!(key instanceof String) && !(key instanceof Name.ExhibitFull))
1689 { throw new InvalidObjectException("bad object: loadedProps: non-String/non-FullName key"); }
1690 if(!aeid.isPresent((CharSequence) key))
1691 { throw new InvalidObjectException("bad object: loadedProps: non-exhibit key"); }
1692 if(!(loadedProps.get(key) instanceof ExhibitPropsLoadable))
1693 { throw new InvalidObjectException("bad object: loadedProps: non-ExhibitPropsLoadable value"); }
1694 }
1695 // Make sure that computedProps maps valid exhibits to ExhibitPropsComputable items.
1696 if(computedProps == null)
1697 { throw new InvalidObjectException("bad object: computedProps == null"); }
1698 synchronized(computedProps)
1699 {
1700 for(final Iterator<?> it = computedProps.keySet().iterator(); it.hasNext(); )
1701 {
1702 final Object key = it.next();
1703 // Allow String keys for deserialising old-format objects ONLY...
1704 if(!(key instanceof String) && !(key instanceof Name.ExhibitFull))
1705 { throw new InvalidObjectException("bad object: computedProps: non-String/non=ExhibitFull key"); }
1706 if(!aeid.isPresent((CharSequence) key))
1707 { throw new InvalidObjectException("bad object: computedProps: non-exhibit key"); }
1708 if(!(computedProps.get(key) instanceof ExhibitPropsComputable))
1709 { throw new InvalidObjectException("bad object: computedProps: non-ExhibitPropsComputable value"); }
1710 }
1711 }
1712 // Make sure that any epcmMap maps valid exhibits to ExhibitPropsComputableMutable items.
1713 if(epcmMap != null)
1714 {
1715 for(final Iterator<?> it = epcmMap.keySet().iterator(); it.hasNext(); )
1716 {
1717 final Object key = it.next();
1718 if(!(key instanceof Name.ExhibitFull))
1719 { throw new InvalidObjectException("bad object: epcmMap: non-Name.ExhibitFull key"); }
1720 if(!aeid.isPresent((Name.ExhibitFull) key))
1721 { throw new InvalidObjectException("bad object: epcmMap: non-exhibit key"); }
1722 if(!(epcmMap.get(key) instanceof ExhibitPropsComputableMutable))
1723 { throw new InvalidObjectException("bad object: epcmMap: non-ExhibitPropsComputableMutable value"); }
1724 }
1725 }
1726 }
1727
1728 /**Our serial version... */
1729 private static final long serialVersionUID = -2563720785711714753L;
1730 }