001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.svrCore;
031
032 import java.io.IOException;
033 import java.io.InterruptedIOException;
034 import java.io.InvalidObjectException;
035 import java.io.ObjectInputValidation;
036 import java.io.ObjectStreamException;
037 import java.io.Serializable;
038 import java.util.ArrayList;
039 import java.util.Arrays;
040 import java.util.BitSet;
041 import java.util.Collections;
042 import java.util.Enumeration;
043 import java.util.HashSet;
044 import java.util.Hashtable;
045 import java.util.Iterator;
046 import java.util.List;
047 import java.util.Set;
048 import java.util.SortedSet;
049 import java.util.concurrent.ConcurrentHashMap;
050 import java.util.concurrent.ExecutionException;
051 import java.util.concurrent.Future;
052 import java.util.concurrent.Semaphore;
053 import java.util.concurrent.atomic.AtomicInteger;
054 import java.util.concurrent.locks.ReentrantLock;
055
056 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
057 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
058 import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
059 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
060 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
061 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
062 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
063
064 import ORG.hd.d.IsDebug;
065
066 /**
067 * Created by IntelliJ IDEA.
068 * User: DHD
069 * Date: 02-Jul-2006
070 * Time: 16:40:48
071 */
072
073 /**Class to cache vote computations and correlated values.
074 * In particular this aims to avoid repeated expensive recomputation
075 * of underlying vote values so as global correlations can be
076 * efficiently computed.
077 */
078 public final class ExhibitPropsComputableMutableVoteCache implements ExhibitPropsComputableMutableVoteCacheIF,
079 Serializable, ObjectInputValidation
080 {
081 /**Construct default empty cache. */
082 public ExhibitPropsComputableMutableVoteCache() { }
083
084 /**Reconstruct object, eg during deserialisation.
085 * We take a defensive copy of mutable state, etc.
086 * <p>
087 * We also take the opportunity to strip out any invalid/stale state.
088 * <p>
089 * We hold a lock on the map parameter while copying its state.
090 */
091 private ExhibitPropsComputableMutableVoteCache(final Hashtable<Name, Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor>> map)
092 throws InvalidObjectException
093 {
094 assert(map != null);
095
096 // Copy in legitimate state.
097 synchronized(map) { voteCorrCacheMap.putAll(map); }
098 // Clear stale entries from those supplied.
099 _clearStaleState(voteCorrCacheMap);
100
101 validateObject();
102 }
103
104 /**Key-separator character; not a valid exhibit-name character. */
105 private static final char KEY_SEPARATOR_CHAR = ':';
106
107 /**Correlation types.
108 * By default most correlation types/data is "dense",
109 * ie there should be many examples for and against each value.
110 * <p>
111 * Some types are "sparse",
112 * ie where we usually expect only one or two examples for a given value,
113 * and so we want to filter these more gently than usual.
114 */
115 public static enum CorrType
116 {
117 /**By variation on same exhibit (sparse). */ VARIANT(true),
118 /**By similar exhibit (sparse). */ SIMILAR(true),
119 /**By exhibit size (bytes). */ SIZE,
120 /**By exhibit attribute word(s). */ ATTRIBUTE,
121 /**By exhibit resolution (eg pixels). */ RESOLUTION,
122 /**By exhibit main word. */ MAIN_WORD,
123 /**By exhibit category. */ CATEGORY,
124 /**By exhibit type. */ TYPE,
125 /**By author. */ AUTHOR,
126 /**By MIME primary type component. */ MIME1,
127 /**By prefixes. */ PREFIX,
128 /**By all stem words. */ STEM_WORDS;
129
130 /**By default correlations are not expected to be sparse. */
131 private CorrType() { sparse = false; }
132 /*Construct correlation with specified sparseness. */
133 private CorrType(final boolean isSparse) { sparse = isSparse; }
134
135 /**Is this correlation "sparse"?. */
136 private final boolean sparse;
137
138 /**Get the "sparseness" flag. */
139 public final boolean isSparse() { return(sparse); }
140
141 /**Make the appropriate cache lookup key(s) for a given value and full valid exhibit name; never null.
142 * Add zero-or-more, non-null, non-empty cache key(s) of the form:
143 * <samp>ENUM : value</samp> eg <samp>SIZE:16</samp>
144 * @param corrType the correlation type; never null
145 * @param esa exhibit details; never null
146 * @param aep never null
147 * @param result non-null, mutable Set to which result is added
148 *
149 * @throws IOException if key cannot be constructed
150 */
151 static void makeCacheKey(final CorrType corrType,
152 final ExhibitStaticAttr esa,
153 final AllExhibitProperties aep,
154 final Set<Name> result)
155 throws IOException
156 {
157 assert(corrType != null);
158 assert(esa != null);
159 assert(aep != null);
160
161 switch(corrType)
162 {
163 case AUTHOR:
164 { result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + ExhibitName.getAuthorComponent(esa.getCharSequence()))); return; }
165
166 case TYPE:
167 { result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + ExhibitName.getExtensionComponent(esa.getCharSequence()))); return; }
168
169 case CATEGORY:
170 { result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + ExhibitName.getCategoryComponent(esa.getCharSequence()))); return; }
171
172 case MAIN_WORD:
173 {
174 // Just the main word matches, case-sensitively.
175 final Name.ExhibitShort f = esa.getExhibitFullName().getShortName();
176 final int dash = TextUtils.indexOf(f, ExhibitName.WORD_SEP);
177 final CharSequence mainWord = f.subSequence(0, dash);
178 result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + mainWord));
179 return;
180 }
181
182 case MIME1:
183 {
184 final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(esa.getCharSequence()));
185 if(type == null) { return; }
186 final int slash = type.mimeType.indexOf('/');
187 if(slash < 1) { return; }
188 final String primaryComponent = type.mimeType.substring(0, slash);
189 result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + primaryComponent));
190 return;
191 }
192
193 case SIMILAR:
194 {
195 // "Similar" means the main-words component matches
196 // case-insensitively.
197 result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + ExhibitName.getMainWordsComponent(
198 esa.getCharSequence(),
199 ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet()).toString().toLowerCase()));
200 return;
201 }
202
203 case VARIANT:
204 {
205 // "Variant" means all components match except:
206 // * the category
207 // * the attribute words
208 // * the extension
209 // All matching is case-sensitive.
210 final CharSequence cs = esa.getCharSequence();
211 final CharSequence mainWords = ExhibitName.getMainWordsComponent(
212 cs, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
213 final int num = ExhibitName.getNumberInSeriesComponent(cs);
214 final CharSequence auth = ExhibitName.getAuthorComponent(cs).toString();
215 final StringBuilder sb = new StringBuilder(29 + mainWords.length());
216 sb.append(corrType.toString()).append(KEY_SEPARATOR_CHAR);
217 sb.append(mainWords).append('-');
218 sb.append(num).append('-');
219 sb.append(auth);
220 result.add(Name.create(sb));
221 return;
222 }
223
224 case SIZE: // Size in bytes grouped into buckets by exponent.
225 { result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + GenUtils.log2Approx(esa.length))); return; }
226
227 case RESOLUTION:
228 {
229 final StringBuilder sb = new StringBuilder();
230 sb.append(corrType.toString()).append(KEY_SEPARATOR_CHAR);
231
232 // Some notion of the raw information content of the exhibit.
233 // Zero-or-more items extracted from meta-data.
234 final ExhibitPropsComputable ePC =
235 aep.getExhibitPropsComputable(esa.getExhibitFullName());
236 if(ePC != null)
237 {
238 final java.awt.Dimension xyDim = ePC.getXyDimensions();
239 if(xyDim != null)
240 {
241 // Add pixel resolution for images.
242 sb.append("px");
243 sb.append(GenUtils.log2Approx(xyDim.width * (long) xyDim.height));
244 result.add(Name.create(sb));
245 }
246
247 // TODO: should have sample rate for audio, etc.
248 // final Node md = ePC.getMetadata();
249 }
250
251 // We will not have created any key
252 // if no useful metadata was available.
253 return;
254 }
255
256 case ATTRIBUTE:
257 {
258 // Get the exhibit's attribute words, if any.
259 final Set<String> aws = ExhibitName.getAttributeWordsComponentSortedSet(
260 esa.getCharSequence(), ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
261 // Prefix them and add them to the result.
262 for(final String aw : aws)
263 { result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + aw)); }
264 return;
265 }
266
267 case STEM_WORDS:
268 {
269 // By all (mono-cased) stem words.
270 final Enumeration<?> mainWords = ExhibitName.getMainWords(
271 esa.getCharSequence(), ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
272 while(mainWords.hasMoreElements())
273 { result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + ((String) mainWords.nextElement()).toLowerCase())); }
274 return;
275 }
276
277 case PREFIX:
278 {
279 // By all stem prefixes of the "virtual" name.
280 final Enumeration<?> mainWords = ExhibitName.getMainWords(
281 esa.getCharSequence(), ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
282 final StringBuilder sb = new StringBuilder(esa.getCharSequence().length() - 4);
283 sb.append(ExhibitName.getCategoryComponent(esa.getCharSequence()));
284 sb.append('/');
285 while(mainWords.hasMoreElements())
286 {
287 final String word = ((String) mainWords.nextElement());
288 sb.append(word).append(ExhibitName.WORD_SEP);
289 result.add(Name.create(corrType.toString() + KEY_SEPARATOR_CHAR + sb));
290 }
291 return;
292 }
293
294 default:
295 { throw new Error("Cannot generate key for correlation type " + corrType); }
296 }
297 }
298
299 /**Make all cache keys for a given exhibit; never null.
300 * This may return a variable number of keys,
301 * and may ignore those that cannot currently be made and
302 * are unlikely ever to be made,
303 * but is as consistent as possible for any one exhibit.
304 *
305 * @return non-null, possibly-empty, set of cache keys
306 */
307 static Set<Name> makeCacheKeys(final ExhibitStaticAttr esa,
308 final AllExhibitProperties aep)
309 {
310 final HashSet<Name> result = new HashSet<Name>();
311 for(final CorrType ct : CorrType.values())
312 {
313 try { makeCacheKey(ct, esa, aep, result); }
314 catch(final IOException e) { /* Ignore ungeneratable keys. */ }
315 }
316 return(result);
317 }
318 }
319
320
321 /**Map from vote/correlation key to compute Factor and period for which it was computed; never null after construction/deserialisation.
322 * Since all our calculations depend on VLONG events,
323 * cache entries need only be invalidated when the period number
324 * changes.
325 * <p>
326 * One valid key format is a full, valid exhibit name as a Name.ExhibitFull
327 * <p>
328 * Other keys are of the form of a CorrType name followed by a colon
329 * followed by the rest of the key.
330 * <p>
331 * This state can be serialised; it is also recomputed on demand.
332 * <p>
333 * We use a Hashtable to be thread-safe.
334 */
335 private final Hashtable<Name, Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor>> voteCorrCacheMap =
336 new Hashtable<Name, Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor>>();
337
338 /**Compute the vote or retrieve from cache; never null.
339 * A computed/cached value is valid until the VLONG period changes.
340 * <p>
341 * The returned factor's goodness can range from -1 to +1,
342 * and confidence from 0 to +1;
343 * any scaling required will have to be applied elsewhere.
344 *
345 * @param exhibitName full, valid exhibit name; never null
346 * @param aep never null
347 * @param vars never null
348 * @return vote Factor for the given exhibit; never null.
349 * @throws java.io.IOException
350 */
351 public final ExhibitPropsComputableMutable.Factor calcVoteFactor(
352 final ExhibitFull exhibitName,
353 final AllExhibitProperties aep,
354 final BasicVarMgrInterface vars)
355 throws IOException
356 {
357 if(!ExhibitName.validNameSyntaxBasic(exhibitName))
358 { throw new IllegalArgumentException(); }
359 if((aep == null) || (vars == null))
360 { throw new IllegalArgumentException(); }
361
362 // Avoid clogging up our cache for non-existent exhibits...
363 if(!aep.aeid.isPresent(exhibitName))
364 { throw new IllegalArgumentException("no such exhibit"); }
365
366 // Compute the current period.
367 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
368
369 // Get the current cached value, if any.
370 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> cached =
371 voteCorrCacheMap.get(exhibitName);
372 if(cached != null)
373 {
374 // If it is not null, but is stale,
375 // then clear the entry (and go on to recompute it in a moment).
376 // (There is a small race risk here,
377 // but the worst that can happen is some wasted
378 // recomputations, not a wrong result.)
379 final long l = cached.first.longValue();
380 // If the value is still current then return it!
381 if(l == currentPeriod)
382 { return(cached.second); }
383 // // Clear stale cached value.
384 // else
385 // { voteCorrCacheMap.remove(exhibitName); }
386 }
387
388 // OK no cached value, or stale, so (re)compute it,
389 // and store the new value in the cache.
390 final ExhibitPropsComputableMutable.Factor result =
391 ExhibitPropsComputableMutable.calcVoteFactor(exhibitName, aep, vars, 0);
392 voteCorrCacheMap.put(exhibitName,
393 new Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor>(
394 MemoryTools.intern(new Long(currentPeriod)), result));
395
396 // Return our result...
397 return(result);
398 }
399
400 /**Get correlates for specified exhibit; never null but result may be empty.
401 * This is based on votes of other "related" exhibits.
402 * <p>
403 * The goodness of each Factor is either -1 or +1,
404 * with the correlation/confidence ranging between 0 and 1.
405 * <p>
406 * This will return values computed up to one period ago if need be,
407 * to help avoid a sudden splurge of CPU effort as we tick from one period
408 * to the next.
409 * <p>
410 * No particular ordering of the results is guaranteed,
411 * but there will be no duplicates and no nulls.
412 *
413 * @param force if true, force complete computation if need be,
414 * else we will just return what we have in cache;
415 * complete recomputation may be expensive but should last a long time
416 * and we will abort with an IOException if we cannot complete
417 * the recomputation in a reasonable time
418 *
419 * @throws IOException cannot extract required correlates
420 */
421 public ExhibitPropsComputableMutable.Factor[] getCorrelates(
422 final ExhibitStaticAttr esa,
423 final AllExhibitProperties aep,
424 final BasicVarMgrInterface vars,
425 final boolean force)
426 throws IOException
427 {
428 if((esa == null) || (aep == null) || (vars == null))
429 { throw new IllegalArgumentException(); }
430
431 // Avoid clogging up our cache for non-existent exhibits...
432 if(!aep.aeid.isPresent(esa.getCharSequence()))
433 { return(NO_CORRELATES); }
434
435 // Compute the current period.
436 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
437
438 // Non-stale cached values available.
439 final List<ExhibitPropsComputableMutable.Factor> result =
440 new ArrayList<ExhibitPropsComputableMutable.Factor>();
441
442 // Check all correlation types.
443 // Accept any cached value however ancient, unless "forcing".
444 _findCorrelates(esa, aep, currentPeriod, result, force);
445
446 // If we got no valid correlates for this exhibit at all,
447 // then try for a full recomputation and re-extract our factors.
448 if(force && (result.size() == 0))
449 {
450 // Update/recompute values if needed.
451 update(aep, vars, false);
452
453 // Re-do our factor extraction...
454 _findCorrelates(esa, aep, currentPeriod, result, force);
455 }
456
457 if(result.size() == 0)
458 { return(NO_CORRELATES); /* Efficient way to return empty list. */ }
459
460 final ExhibitPropsComputableMutable.Factor[] r =
461 new ExhibitPropsComputableMutable.Factor[result.size()];
462 result.toArray(r);
463 return(r);
464 }
465
466 /**Find out if a category is rated "good"/popular or not.
467 * Returns TRUE if rated good, FALSE if bad,
468 * null if not significantly either or if not known.
469 *
470 * @param categoryDir the initial directory component of an extant exhibit
471 * @param force if true may force (expensive) computation to give
472 * a more accurate answer,
473 * else may return a more approximate or stale answer,
474 * or none at all (null)
475 */
476 public Boolean isCategoryGood(final CharSequence categoryDir,
477 final AllExhibitProperties aep,
478 final BasicVarMgrInterface vars,
479 final boolean force)
480 throws IOException
481 {
482 if(!ExhibitName.validNameInitialComponentSyntax(categoryDir))
483 { throw new IllegalArgumentException(); }
484
485 //System.out.println("***Looking for category goodness: " + categoryDir);
486
487 // Make a syntactically-correct fake name/attrs from the category.
488 final ExhibitStaticAttr esa = new ExhibitStaticAttr(
489 categoryDir + "/a-A.a", 1, System.currentTimeMillis());
490
491 // Compute the correlate name for this category.
492 final Set<Name> keys = new HashSet<Name>();
493 ExhibitPropsComputableMutableVoteCache.CorrType.makeCacheKey(
494 ExhibitPropsComputableMutableVoteCache.CorrType.CATEGORY,
495 esa, aep, keys);
496 assert(keys.size() == 1); /* We expect to get exactly one key. */
497 final Name key = keys.iterator().next();
498
499 //System.out.println("***Looking for key: " + key);
500
501 // Ensure data is current, if so requested.
502 if(force) { update(aep, vars, true); }
503
504 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> f =
505 voteCorrCacheMap.get(key);
506 // If no Factor or too old then return null immediately.
507 if(f == null)
508 { return(null); }
509 if(force &&
510 (f.first.longValue() < EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis())-1))
511 { return(null); }
512
513 //System.out.println("***Got: " + f.second);
514
515 // If Factor not definite/strong enough then return null...
516 if(Math.abs(f.second.goodness * f.second.correlation) < ExhibitPropsComputableMutable.GOODBAD_LIMIT)
517 { return(null); }
518
519 // Return a definite good/bad result.
520 return((f.second.goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
521 }
522
523 /**Append to the result List any factors we find for this exhibit.
524 * This ignores any keys that cannot be constructed.
525 *
526 * @param esa the exhibit name/attr; never null
527 * @param currentPeriod current VLONG period; strictly positive
528 * @param result found correlates are appended to this value
529 * @param force if false then accept any extant value;
530 * if true then only accept values up to one period old
531 */
532 private void _findCorrelates(final ExhibitStaticAttr esa,
533 final AllExhibitProperties aep,
534 final long currentPeriod,
535 final List<ExhibitPropsComputableMutable.Factor> result,
536 final boolean force)
537 {
538 // Make all cache keys for this exhibit...
539 final Set<Name> keys = CorrType.makeCacheKeys(esa, aep);
540 // and update all keyed entries all with this exhibit's rating.
541 for(final Name key : keys)
542 {
543 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> f =
544 voteCorrCacheMap.get(key);
545 if(f == null) { continue; }
546 if(!force)
547 { result.add(f.second); }
548 else if(f.first.longValue() >= currentPeriod-1)
549 { result.add(f.second); }
550 // else
551 // { result.remove(key); /* Remove stale value. */ }
552 }
553 }
554
555 /**Immutable empty correlates list. */
556 private static final ExhibitPropsComputableMutable.Factor[] NO_CORRELATES =
557 new ExhibitPropsComputableMutable.Factor[0];
558
559 /**Significance confidence threshold; non-negative.
560 * Computed correlations of confidence less than +/- this
561 * are discarded to save space.
562 * <p>
563 * A value between 0.001 (0.1%) and 0.1 (10%) is probably reasonable.
564 */
565 private static final float SIGNIFICANCE_CONF_THRESHOLD = 0.003f;
566
567 /**Significance count threshold for non-sparse correlations; non-negative.
568 * Computed non-sparse correlations with counts less than this
569 * are discarded to save space and time.
570 * <p>
571 * A value between 2 and 10 is probably reasonable;
572 * an odd value avoids recording a "neutral" score at minimum count.
573 */
574 private static final int SIGNIFICANCE_COUNT_THRESHOLD = 3;
575
576 /**The class in which we accumulate stats while recomputing correlations. */
577 private static final class Accum
578 {
579 /**Number of samples that have contributed to this value; strictly positive. */
580 final AtomicInteger count = new AtomicInteger();
581
582 /**Accumulated (sum) confidence value with negative goodness being treated as a negative confidence.
583 * All access protected by the instance lock.
584 */
585 private double totalConfidence;
586
587 synchronized double getTotalConfidence()
588 { return(totalConfidence); }
589
590 synchronized void addToTotalConfidence(final double delta)
591 { totalConfidence += delta; }
592 }
593
594 /**Time limit for update() in ms; strictly positive.
595 * A value of the order of a second or so is probably right.
596 * This allows some significant work to get done and cached,
597 * but should not block anything (such as page generation)
598 * for an unbearable amount of time even to a visitor.
599 * <p>
600 * This should still let us get a reasonable amount of work done
601 * so that we can rebuild the data incrementally if necessary.
602 */
603 private final int UPDATE_TIME_LIMIT_MS = 1023;
604
605 /**Private lock to prevent more than one thread at once expending effort in the core part of update(). */
606 private final ReentrantLock _uCoreLock = new ReentrantLock();
607
608 /**Maximum number of threads to allow in update(); strictly positive.
609 * It is probably worth allowing into update() a few threads
610 * to try to parallelise probably-I/O-bound sections at the start
611 * and to do CPU-bound computations concurrently on multi-CPU systems,
612 * but using too many concurrent threads will possibly just waste resources
613 * and overload I/O subsystems (eg back-end HTTP connections).
614 */
615 private static final int _computeMaxConcurrentUpdateThreads = Math.min(7,
616 1 + Runtime.getRuntime().availableProcessors());
617
618 /**Counting semaphore to limit update() first-phase concurrency; never null. */
619 private final Semaphore _uStartSem = new Semaphore(_computeMaxConcurrentUpdateThreads);
620
621 /**If true then try to load/compute early the scores of voted-for-exhibits.
622 * An eager set-up attempts to start its computations
623 * while part of its data (one of the "all" buckets) is still being fetched.
624 * <p>
625 * An eager setup should be able to compute the correlations more quickly,
626 * but at increased risk of using part/all somewhat/very stale values,
627 * and thus generating a less good result.
628 */
629 private static final boolean EAGER_VOTE_LOAD = true;
630
631 /**If true then try to use the "all" votes buckets rather than summing the other buckets.
632 * Using the "all" bucket may less accurate/comprehensive,
633 * and may delay initialisation if it has to be fetched from upstream.
634 */
635 private static final boolean USE_ALL_BUCKET = false;
636
637 /**Bring correlations data up to date.
638 * This may also perform housekeeping such as clearing stale state.
639 * <p>
640 * This may be a relatively expensive call,
641 * though if values are up-to-date then this should be quite cheap.
642 * <p>
643 * This is thread-safe and will use multiple caller threads to help fetch
644 * vote data and concurrently recompute exhibit scores,
645 * though second and subsequent concurrent calling threads
646 * will return immediately upon reaching the core correlation computations
647 * in order to prevent wasted duplicate effort.
648 * <p>
649 * If this routine cannot get values it needs then it throws an IOException.
650 * <p>
651 * No work will be done for an empty AEP.
652 * <p>
653 * This will abort with an IOException if taking too long
654 * (of the order of many seconds) or cannot otherwise ensure that
655 * the correlations data is up-to-date.
656 * Thus if this returns without throwing an IOException then
657 * the correlations data can be assumed to be up-to-date.
658 *
659 * @param aep current exhibit properties; never null
660 * @param vars handle on system variables; never null
661 * @param noTimeLimit if true, this runs until complete if possible
662 *
663 * @throws IOException if the correlations data cannot be guaranteed
664 * to be up-to-date
665 * and/or a problem was found fetching required data
666 * and/or the routine was taking to long to complete the calculations
667 */
668 public void update(final AllExhibitProperties aep,
669 final BasicVarMgrInterface vars,
670 final boolean noTimeLimit)
671 throws IOException
672 {
673 if((aep == null) || (vars == null))
674 { throw new IllegalArgumentException(); }
675
676 // Nothing to be done for an empty set of exhibits.
677 if(aep.aeid.length == 0) { return; }
678
679 // If we can find our marker key,
680 // and it is for the current period,
681 // then we can assume that computations are up-to-date.
682 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
683 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> entry =
684 voteCorrCacheMap.get(MARKER_KEY);
685 if((entry != null) && (entry.first.longValue() == currentPeriod))
686 { return; /* Apparently completely up-to-date! */ }
687
688 if(_uCoreLock.isLocked())
689 { throw new IOException("update() already busy in core phase; vote cache data is stale"); }
690
691
692 // The complete set of voted-for exhibits (pro and con).
693 // Guess size based on a few votes per sample period (ie per day).
694 // Note that these values are seen only by this thread.
695 final Set<Name.ExhibitFull> votedForNames = new HashSet<Name.ExhibitFull>(8 * SystemVariables.EVENT_SAMPLES_RETAINED);
696 final long currentPeriodL;
697
698 // If lots of external threads are needing the update() to be done
699 // then allow a few of them in to help with the work.
700 if(!_uStartSem.tryAcquire())
701 { throw new IOException("update() already busy in first phase; vote cache data is stale"); }
702 try
703 {
704 // Are we probably the first thread into this section?
705 // (It doesn't matter if we get this wrong occasionally.)
706 final boolean firstThreadIn = _uStartSem.availablePermits() == (_computeMaxConcurrentUpdateThreads-1);
707
708 // We are now really getting started...
709 final long startTime = System.currentTimeMillis();
710 // Note the (target) limit time;
711 // if really out of date then allow MUCH longer before timing out
712 // (unless conserving power)
713 // But in any case don't quit until this has made some progress
714 // to ensure that this does eventually complete.
715 final boolean reasonablyUpToDate =
716 ((entry != null) && (entry.first.longValue() == currentPeriod-1));
717 final long stopBy = startTime +
718 Rnd.fastRnd.nextInt(UPDATE_TIME_LIMIT_MS) +
719 ((reasonablyUpToDate || GenUtils.mustConservePower()) ?
720 (UPDATE_TIME_LIMIT_MS>>1) : (UPDATE_TIME_LIMIT_MS<<1));
721
722 if(IsDebug.isDebug) { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): cache size: " + voteCorrCacheMap.size() + ".]"); }
723
724
725 // Request/fetch the current-period event sets now,
726 // so that they may be ready later to be added into the mix
727 // if not immediately available.
728 final EventVariableValue pcur = vars.getEventValue(SystemVariables.VOTE_PRO, EventPeriod.VLONG, true);
729 final EventVariableValue ccur = vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, true);
730
731 final List<EventVariableValue> availableALLBuckets = new ArrayList<EventVariableValue>(2);
732 if(USE_ALL_BUCKET)
733 {
734 // Get pro and con votes' "all" period event set.
735 // This is a heuristic to abort if we cannot get vote "all" buckets,
736 // and to try to encourage fetching of such data by the cache,
737 // or to continue quietly if what we currently have is OK,
738 // even empty if there seems to have been no recent voting.
739 //
740 // If we get null/stale/empty "all" buckets
741 // then abort and hope that they will be present/fresh
742 // the next time that we are called
743 // (eg allowing for asynchronous fetch/cacheing of these values).
744 final EventVariableValue pevvs[] = vars.getEventValues(
745 SystemVariables.VOTE_PRO, EventPeriod.VLONG, 0, null);
746 final EventVariableValue cevvs[] = vars.getEventValues(
747 SystemVariables.VOTE_CON, EventPeriod.VLONG, 0, null);
748 if((pevvs.length != 0) && (pevvs[0] != null)) { availableALLBuckets.add(pevvs[0]); }
749 if((cevvs.length != 0) && (cevvs[0] != null)) { availableALLBuckets.add(cevvs[0]); }
750 // If not eager and we don't have both "all" buckets right now
751 // then give up quickly, without burning much CPU time.
752 if(availableALLBuckets.size() < (EAGER_VOTE_LOAD ? 1 : 2))
753 { throw new IOException("missing vote one or both all buckets (early)"); }
754 }
755
756 // List of buckets from which we will extract (short) exhibit names.
757 // Starts with the pro/con 'all' buckets.
758 final List<EventVariableValue> buckets = new ArrayList<EventVariableValue>(availableALLBuckets);
759
760 // If not using the "all" vote buckets or
761 // if we are using "all" buckets but either is completely empty
762 // then we may wish to resort
763 // to explicitly fetching all/most available recent event periods
764 // and computing the union of their values.
765 if(!USE_ALL_BUCKET ||
766 ((availableALLBuckets.size() == 2) &&
767 ((availableALLBuckets.get(0).getTotalEventCount() == 0) ||
768 (availableALLBuckets.get(1).getTotalEventCount() == 0))))
769 {
770 if(IsDebug.isDebug && USE_ALL_BUCKET) { System.err.println("[ExhibitPropsComputableMutableVoteCache.update(): WARNING: at least one 'all' bucket is empty: collecting vote data the slow way...]"); }
771
772 // Remove extant dubious 'all' values.
773 if(USE_ALL_BUCKET) { buckets.clear(); }
774
775 // Forcing alternative (slow) mechanism...
776 // Get events for all retained slots.
777 final BitSet whichValues = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
778 whichValues.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
779 final SimpleVariableDefinition[] defs = new SimpleVariableDefinition[]{ SystemVariables.VOTE_PRO, SystemVariables.VOTE_CON };
780 // Have extra threads try for values in a different order for concurrency...
781 if(!firstThreadIn) { Collections.shuffle(Arrays.asList(defs), Rnd.goodRnd); }
782 for(final SimpleVariableDefinition def : defs)
783 {
784 final EventVariableValue evvsAll[] = vars.getEventValues(
785 def, EventPeriod.VLONG, currentPeriod, whichValues);
786 for(final EventVariableValue evv : evvsAll)
787 {
788 if((evv != null) && (evv.getTotalEventCount() != 0))
789 { buckets.add(evv); }
790 }
791 }
792 }
793
794 // Throw in current-slot votes at the last moment.
795 // Try and fetch them again if they were empty when we last tried.
796 // These data are nice-to-have if we can get them,
797 // since they make the correlation values as fresh as possible.
798 buckets.add((pcur.getTotalEventCount() != 0) ? pcur :
799 vars.getEventValue(SystemVariables.VOTE_PRO, EventPeriod.VLONG, true));
800 buckets.add((ccur.getTotalEventCount() != 0) ? ccur :
801 vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, true));
802
803
804 // If eager then get/compute votes/scores ASAP,
805 // even when we don't have both "all" buckets.
806 //
807 // We convert from short name to long/full form
808 // dropping any exhibit (name) no longer extant.
809 final long calcVoteFactorStart = System.currentTimeMillis();
810 currentPeriodL = MemoryTools.intern(new Long(currentPeriod));
811 // Scramble order of buckets to help ensure that
812 // parallel threads don't waste too much time redoing the same work.
813 if(!firstThreadIn) { Collections.shuffle(buckets, Rnd.fastRnd); }
814 int checked = 0; // To enable some sort of progress report...
815 for(final EventVariableValue evv : buckets)
816 {
817 ++checked; // Track progress...
818
819 // Bucket entries should never be null.
820 assert(evv != null);
821
822 List<Object> so = evv.getDistinctValuesInRankOrder();
823 // Again, shuffle work order to help avoid threads colliding.
824 if(!firstThreadIn)
825 {
826 so = new ArrayList<Object>(so);
827 Collections.shuffle(so, Rnd.fastRnd);
828 }
829 for(final Object key : so)
830 {
831 if(!(key instanceof String)) { continue; }
832 final String shortName = (String) key;
833 final Name.ExhibitFull exhibitName = aep.aeid.getFullName(shortName);
834 if(exhibitName == null) { continue; }
835 // Skip if we've already handled/added this exhibitName.
836 if(!votedForNames.add(exhibitName)) { continue; }
837
838 // If eagerly fetching/computing vote values then do it now,
839 // possibly in parallel with fetching one of the "all" buckets.
840 if(EAGER_VOTE_LOAD)
841 {
842 // ----------------------------------------------------
843 // Bring at least one vote up to date
844 // if not all yet up-to-date.
845
846 // Get any extant cached vote value for this exhibit.
847 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> oldVVB =
848 voteCorrCacheMap.get(exhibitName);
849 if((oldVVB != null) && (oldVVB.first.longValue() == currentPeriod))
850 { continue; /* Already up-to-date. */ }
851
852 // Compute a new/current vote value for this exhibit.
853 final ExhibitPropsComputableMutable.Factor result =
854 ExhibitPropsComputableMutable.calcVoteFactor(exhibitName, aep, vars, 0);
855 voteCorrCacheMap.put(exhibitName,
856 new Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor>(
857 currentPeriodL, result));
858
859 // Abort if we've run out of time.
860 if(!noTimeLimit && (System.currentTimeMillis() > stopBy))
861 {
862 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): ABORTED calcVoteFactor: ran out of time (checked "+checked+"/"+buckets.size()+") @ "+(System.currentTimeMillis()-calcVoteFactorStart)+"ms]"); }
863 throw new IOException("ran out of time in update(); data not brought up-to-date yet because not all voted-for-exhibits up-to-date yet");
864 }
865
866 // Abort if another thread has already made it as far as
867 // the core section (so we can't get in).
868 if(_uCoreLock.isLocked())
869 {
870 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): ABORTED (another thread already in update() core) calcVoteFactor @ time "+(System.currentTimeMillis()-calcVoteFactorStart)+"ms]"); }
871 throw new IOException("another thread already in update() core");
872 }
873 }
874 }
875 }
876 final long calcVoteFactorEnd = System.currentTimeMillis();
877 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): calcVoteFactor time "+(calcVoteFactorEnd-calcVoteFactorStart)+"ms]"); }
878
879 // If we have not yet managed to get both buckets
880 // and thus all the exhibit names that they contain
881 // then we cannot go any further for the moment.
882 if(EAGER_VOTE_LOAD && USE_ALL_BUCKET && (availableALLBuckets.size() < 2))
883 { throw new IOException("too few all vote buckets (EAGER)"); }
884 if(USE_ALL_BUCKET) { assert(availableALLBuckets.size() == 2) : "must have at least both pro and con `all' buckets or full set of pro/con buckets by now"; }
885
886 if(IsDebug.isDebug) { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): votedForNames size: " + votedForNames.size() + ".]"); }
887 }
888 finally
889 { _uStartSem.release(); }
890
891 // Only allow at most one (external) thread at once
892 // to consume CPU in the core part of update()
893 // so as to avoid wasting resources
894 // though we will inject local parallelism directly for speed.
895 if(!_uCoreLock.tryLock()) { throw new IOException("update() already busy in core; vote cache data is stale"); }
896 try
897 {
898 // Double-check that the work was not completed by another thread
899 // while we were in an earlier section!
900 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> e2 =
901 voteCorrCacheMap.get(MARKER_KEY);
902 if((e2 != null) && (e2.first.longValue() == currentPeriod))
903 { return; /* Apparently completely up-to-date! */ }
904
905 // For every voted-for exhibit,
906 // and for every correlation type that we can compute,
907 // accumulate correlation stats.
908 // We then discard any that are insignificant,
909 // and insert the remaining significant values into the cache.
910 //
911 // Also compute the mean count per non-sparse value
912 // as a baseline against which non-sparse values with lower counts
913 // can be rated for confidence/certainty.
914 //
915 // We optimistically assume that in the typical case
916 // we can get through this entire operation quickly
917 // (especially since we parallelise it)
918 // so we don't bother splitting it into phases.
919 //
920 // We expect this work to be mainly compute-bound.
921 final SortedSet<String> attrWords = ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet();
922
923 final AtomicInteger totalNonSparseEntries = new AtomicInteger();
924 final AtomicInteger totalNonSparseCount = new AtomicInteger();
925 final ConcurrentHashMap<Name,Accum> working = new ConcurrentHashMap<Name, Accum>(17 + 19*votedForNames.size());
926 final long beforeCompute = System.currentTimeMillis();
927
928 // List of results that we can wait for completion of.
929 final List<Future<?>> tasks = new ArrayList<Future<?>>(votedForNames.size());
930
931 // Post tasks for each of the exhibits that has been voted for.
932 for(final Name.ExhibitFull exhibitName : votedForNames)
933 {
934 tasks.add(ThreadUtils.computeIntensiveThreadPool.submit(new Runnable(){
935 public final void run()
936 {
937 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
938 // If exhibit appears not to exist any more then we're done...
939 // (This is in practice very rare.)
940 if(esa == null) { return; }
941
942 // Get/compute the current/new vote value for this exhibit.
943 final ExhibitPropsComputableMutable.Factor vote;
944 try { vote = calcVoteFactor(exhibitName, aep, vars); }
945 catch(final IOException e)
946 {
947 // If we get an IOException here
948 // then we probably have to abort the entire calculation
949 // and restart later.
950 e.printStackTrace();
951 throw new IllegalStateException("unexpected IOException: will have to restart", e);
952 }
953 assert(vote != null);
954
955 // Make all cache keys for this exhibit.
956 final Set<Name> keys = CorrType.makeCacheKeys(esa, aep);
957 // Count the words in this exhibit for scaling...
958 final int nWords = ExhibitName.getMainWordsCount(esa.getCharSequence(), attrWords);
959 // Update all keyed entries with this exhibit's rating.
960 for(final Name key : keys)
961 {
962 // Is this a "sparse" correlation type?
963 final CorrType corrType = CorrType.valueOf(key.subSequence(0, TextUtils.indexOf(key, KEY_SEPARATOR_CHAR)).toString());
964 final boolean isSparse = corrType.isSparse();
965
966 // Is this an especially "dense" term that needs scaling
967 // so as not to swamp other results.
968 final boolean needsScalingByWordCount =
969 (corrType == CorrType.STEM_WORDS) ||
970 (corrType == CorrType.PREFIX);
971
972 // Retrieve existing accumulator, or create a new one.
973 Accum ac;
974 while(null == (ac = working.get(key)))
975 {
976 // Create new all-zeros accumulator
977 // if one not already inserted.
978 // (This is race-free c/o putIfAbsent().)
979 working.putIfAbsent(key, new Accum());
980 }
981 // Update the accumulator.
982 final int count = ac.count.incrementAndGet();
983 final float corr = (!needsScalingByWordCount) ?
984 vote.correlation : (vote.correlation / nWords);
985 if(vote.goodness < 0) { ac.addToTotalConfidence(-corr); }
986 else if(vote.goodness > 0) { ac.addToTotalConfidence(+corr); }
987
988 // Note non-sparse entries that have a significant count.
989 if(!isSparse && (count >= SIGNIFICANCE_COUNT_THRESHOLD))
990 {
991 if(count == SIGNIFICANCE_COUNT_THRESHOLD)
992 { totalNonSparseEntries.incrementAndGet(); /* Note new (non-sparse) entry. */ }
993 totalNonSparseCount.incrementAndGet();
994 }
995 }
996 }
997 }));
998 }
999
1000 // Wait for all the tasks to complete...
1001 assert(tasks.size() == votedForNames.size());
1002 for(final Future<?> task : tasks)
1003 {
1004 try { task.get(); }
1005 catch(final InterruptedException e)
1006 {
1007 e.printStackTrace();
1008 final IOException et = new InterruptedIOException("unexpected interrupt during calculation");
1009 et.initCause(e);
1010 throw et;
1011 }
1012 catch(final ExecutionException e)
1013 {
1014 e.printStackTrace();
1015 final IOException et = new IOException("unexpected exception during calculation");
1016 et.initCause(e);
1017 throw et;
1018 }
1019 }
1020
1021 final long afterCompute = System.currentTimeMillis();
1022 /*if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): *** correlation factors compute time "+(afterCompute-beforeCompute)+"ms.]"); }
1023
1024 final int tNSE = totalNonSparseEntries.get();
1025 final double meanNonSparseCount = (tNSE < 1) ? 0.0
1026 : (totalNonSparseCount.get() / (double) tNSE);
1027 final double mNSC_min = Math.max(meanNonSparseCount,
1028 SIGNIFICANCE_COUNT_THRESHOLD);
1029
1030 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): votedForNames.size()="+votedForNames.size()+", working.size()="+working.size()+", meanNonSparseCount="+meanNonSparseCount + ".]"); }
1031
1032 // Use the time when we started this whole update
1033 // to take a conservative view of the age of our source data,
1034 // eg in case the period ticked over while we were working.
1035 final Long cp = currentPeriodL;
1036
1037 // Go through the working set,
1038 // converting significant results into main cache entries.
1039 final long finalComputePhaseStart = System.currentTimeMillis();
1040 for(final Name key : working.keySet())
1041 {
1042 // Is this a "sparse" correlation type?
1043 final boolean isSparse =
1044 CorrType.valueOf(key.subSequence(0, TextUtils.indexOf(key, KEY_SEPARATOR_CHAR)).toString()).isSparse();
1045
1046 final Accum ac = working.get(key);
1047 assert(ac != null);
1048
1049 // Ignore values with very small counts
1050 // (except for explicit "sparse" types)
1051 // as potentially unreliable (or at least very noisy).
1052 final int count = ac.count.get();
1053 if(!isSparse && (count < SIGNIFICANCE_COUNT_THRESHOLD))
1054 { continue; }
1055
1056 assert(ac.count.get() > 0);
1057 assert(!Double.isInfinite(ac.getTotalConfidence()));
1058 assert(!Double.isNaN(ac.getTotalConfidence()));
1059 // Compute mean/normalised confidence
1060 // scaling as appropriate for below-mean counts.
1061 // We scale by the expected extra variance of a low sample size.
1062 final double meanConf = Math.abs(ac.getTotalConfidence() / count);
1063 final double scaledConf =
1064 (/* isSparse || */ (count >= mNSC_min)) ? meanConf :
1065 (meanConf * Math.sqrt(count / mNSC_min));
1066
1067 // If too small to be significant, then don't use it.
1068 // Test in such a way as to reject NaN values too,
1069 // ie unless a test succeeds, since all tests against NaN fail.
1070 if(!(scaledConf > SIGNIFICANCE_CONF_THRESHOLD)) { continue; }
1071
1072 // Make our factor with a helpful comment.
1073 final String comment = key + " count=" + count;
1074 final ExhibitPropsComputableMutable.Factor f = new ExhibitPropsComputableMutable.Factor(
1075 (ac.getTotalConfidence() < 0) ? -1 : +1,
1076 scaledConf,
1077 comment);
1078
1079 // Insert the new Factor into the cache.
1080 voteCorrCacheMap.put(MemoryTools.intern(key), // Reduce old-heap churn.
1081 new Tuple.Pair<Long,ExhibitPropsComputableMutable.Factor>(cp, f));
1082 }
1083 final long finalComputePhaseEnd = System.currentTimeMillis();
1084 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): final phase compute time "+(finalComputePhaseEnd-finalComputePhaseStart)+"ms.]"); }
1085
1086 // Insert a distinguished marker key
1087 // so that it is quick to check when we last recomputed values here.
1088 // We do this only when all updates are complete.
1089 voteCorrCacheMap.put(MARKER_KEY,
1090 new Tuple.Pair<Long,ExhibitPropsComputableMutable.Factor>(cp, ExhibitPropsComputableMutable.FACTOR_ZERO));
1091
1092 // Remove any stale state since we now have up-to-date values.
1093 _clearStaleState(voteCorrCacheMap);
1094
1095 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): final cache size: " + voteCorrCacheMap.size() + ".]"); }
1096 }
1097 finally { _uCoreLock.unlock(); }
1098
1099 // Dump the correlations table for debug purposes,
1100 // but outside the lock so as not to hold things up unduly...
1101 // Only show the non-sparse, non-exhibit entries.
1102 if(IsDebug.isDebug)
1103 {
1104 // Nicely dump the table in sorted order...
1105 final List<Name> keys = new ArrayList<Name>(voteCorrCacheMap.size());
1106 // Capture the key set atomically...
1107 synchronized(voteCorrCacheMap) { keys.addAll(voteCorrCacheMap.keySet()); }
1108 // ... and sort our copy to iterate over.
1109 Collections.sort(keys);
1110 // Make a re-usable working buffer.
1111 final StringBuilder sb = new StringBuilder(255);
1112
1113 for(final Name key : keys)
1114 {
1115 // Skip the raw exhibit votes.
1116 if(TextUtils.indexOf(key, KEY_SEPARATOR_CHAR) == -1) { continue; }
1117 // Skip the sparse entries (there may be *lots* of them)...
1118 if(CorrType.valueOf(key.subSequence(0, TextUtils.indexOf(key, KEY_SEPARATOR_CHAR)).toString()).isSparse()) { continue; }
1119
1120 sb.setLength(0); // Clear the buffer.
1121 sb.append(" "); // Indent...
1122 sb.append(key).append(" = ");
1123 final Tuple.Pair<Long, ExhibitPropsComputableMutable.Factor> p = voteCorrCacheMap.get(key);
1124 if(p != null)
1125 {
1126 sb.append((p.second.goodness < 0) ? '-' : '+');
1127 sb.append(p.second.correlation);
1128 }
1129 System.out.println(sb);
1130 }
1131 }
1132 }
1133
1134 /**Special marker key value to indicate when we last computed correlations.
1135 * Legal key, but never conflicts with any normal usage.
1136 */
1137 private static final Name MARKER_KEY = Name.create(CorrType.AUTHOR.toString() + KEY_SEPARATOR_CHAR);
1138
1139
1140 /**Serialise: strip out stale information before serialising. */
1141 protected Object writeReplace()
1142 // throws ObjectStreamException
1143 {
1144 // Clear stale entries before saving.
1145 _clearStaleState(voteCorrCacheMap);
1146
1147 // Save the object as-is.
1148 return(this);
1149 }
1150
1151 /**Remove stale state from the map passed in.
1152 * A lock is held on the map while this is done
1153 * to prevent it being altered unexpectedly.
1154 * <p>
1155 * We only remove things more than one period old
1156 * so that we can gently recompute values in the background
1157 * rather than being bounced into it as we roll into a new period.
1158 *
1159 * @param map non-null map.
1160 */
1161 static private void _clearStaleState(final Hashtable<Name,Tuple.Pair<Long,ExhibitPropsComputableMutable.Factor>> map)
1162 {
1163 synchronized(map)
1164 {
1165 // Compute the current period.
1166 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
1167
1168 for(final Iterator<Tuple.Pair<Long,ExhibitPropsComputableMutable.Factor>> it = map.values().iterator(); it.hasNext(); )
1169 {
1170 final Tuple.Pair<Long,ExhibitPropsComputableMutable.Factor> tp = it.next();
1171 // Simply weed out stale entries,
1172 // ie anything not in the current VLONG period or the one before.
1173 if(tp.first.longValue() < currentPeriod-1)
1174 { it.remove(); continue; }
1175 }
1176 }
1177 }
1178
1179
1180 /**Deserialise: use constructor for validation, defensive copying, etc.
1181 */
1182 protected Object readResolve()
1183 throws ObjectStreamException
1184 {
1185 // Construct new instance of object in normal defensive way.
1186 return(new ExhibitPropsComputableMutableVoteCache(voteCorrCacheMap));
1187 }
1188
1189
1190 /**Validates the object.
1191 *
1192 * @throws java.io.InvalidObjectException If the object cannot validate itself
1193 */
1194 public void validateObject() throws InvalidObjectException
1195 {
1196 if(voteCorrCacheMap == null)
1197 { throw new InvalidObjectException("bad object: null voteCorrCacheMap"); }
1198
1199 synchronized(voteCorrCacheMap)
1200 {
1201 for(final Iterator<?> it = voteCorrCacheMap.keySet().iterator(); it.hasNext(); )
1202 {
1203 // Weed out any invalid entries...
1204 final Object ok = it.next();
1205 if(!(ok instanceof String))
1206 { throw new InvalidObjectException("bad object: invalid voteCorrCacheMap key type"); }
1207
1208 // A key can be a valid exhibit name...
1209 // or a CorrType + ':' + rest of key.
1210 final String sk = (String) ok;
1211 if(!ExhibitName.validNameSyntax(sk))
1212 {
1213 final int cPos = sk.indexOf(KEY_SEPARATOR_CHAR);
1214 if(cPos < 1)
1215 { throw new InvalidObjectException("bad object: invalid voteCorrCacheMap key syntax"); }
1216
1217 // This first part must be a CorrType name.
1218 final String firstPart = sk.substring(0, cPos);
1219 if(null == CorrType.valueOf(firstPart))
1220 { throw new InvalidObjectException("bad object: invalid voteCorrCacheMap composite key syntax"); }
1221 }
1222
1223 final Object ov = voteCorrCacheMap.get(ok);
1224 if(!(ok instanceof Tuple.Pair))
1225 { throw new InvalidObjectException("bad object: invalid voteCorrCacheMap value type"); }
1226 final Tuple.Pair tp = (Tuple.Pair) ov;
1227 if(!(tp.first instanceof Long) ||
1228 !(tp.second instanceof ExhibitPropsComputableMutable.Factor))
1229 { throw new InvalidObjectException("bad object: invalid voteCorrCacheMap value sub-type"); }
1230 }
1231 }
1232 }
1233
1234 /**Unique Serialisation class ID generated by http://random.hd.org/. */
1235 private static final long serialVersionUID = 7048481653530545311L;
1236 }