001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.svrCore;
031
032 import java.io.IOException;
033 import java.io.InterruptedIOException;
034 import java.util.ArrayList;
035 import java.util.Arrays;
036 import java.util.BitSet;
037 import java.util.Collections;
038 import java.util.EnumMap;
039 import java.util.Enumeration;
040 import java.util.HashSet;
041 import java.util.Hashtable;
042 import java.util.Iterator;
043 import java.util.List;
044 import java.util.Set;
045 import java.util.SortedSet;
046 import java.util.concurrent.ConcurrentHashMap;
047 import java.util.concurrent.ExecutionException;
048 import java.util.concurrent.Future;
049 import java.util.concurrent.Semaphore;
050 import java.util.concurrent.atomic.AtomicInteger;
051 import java.util.concurrent.locks.ReentrantLock;
052
053 import org.apache.log4j.Logger;
054 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
055 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
056 import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
057 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
058 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
059 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
060 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
061
062 import ORG.hd.d.IsDebug;
063
064 /**
065 * Created by IntelliJ IDEA.
066 * User: DHD
067 * Date: 02-Jul-2006
068 * Time: 16:40:48
069 */
070
071 /**Class to cache vote computations and correlated values.
072 * In particular this aims to avoid repeated expensive recomputation
073 * of underlying vote values so as global correlations can be
074 * efficiently computed.
075 */
076 public final class ExhibitPropsComputableMutableVoteCache implements ExhibitPropsComputableMutableVoteCacheIF
077 {
078 private static final Logger logger = Logger.getLogger(ExhibitPropsComputableMutableVoteCache.class);
079
080 /**Construct default empty cache. */
081 public ExhibitPropsComputableMutableVoteCache() { }
082
083 /**Correlation types.
084 * By default most correlation types/data is "dense",
085 * ie there should be many examples for and against each value.
086 * <p>
087 * Some types are "sparse",
088 * ie where we usually expect only one or two examples for a given value,
089 * and so we want to filter these more gently than usual.
090 * <p>
091 * TODO: accession year and month
092 * <p>
093 * TODO: EXIF/JAI values
094 */
095 public static enum CorrType
096 {
097 /**By variation on same exhibit (sparse). */ VARIANT(true),
098 /**By similar exhibit (sparse). */ SIMILAR(true),
099 /**By exhibit size (log bytes). */ SIZE,
100 /**By exhibit attribute word(s). */ ATTRIBUTE,
101 /**By exhibit resolution (eg log pixels). */ RESOLUTION,
102 /**By exhibit main word. */ MAIN_WORD,
103 /**By exhibit category. */ CATEGORY,
104 /**By exhibit type. */ TYPE,
105 /**By author. */ AUTHOR,
106 /**By MIME primary type component. */ MIME1,
107 /**By prefixes. */ PREFIX,
108 /**By all stem words. */ STEM_WORD;
109
110 /**By default correlations are not expected to be sparse. */
111 private CorrType() { sparse = false; }
112 /**Construct correlation with specified sparseness. */
113 private CorrType(final boolean isSparse) { sparse = isSparse; }
114
115 /**Is this correlation "sparse"? */
116 private final boolean sparse;
117 /**Get the "sparseness" flag. */
118 public final boolean isSparse() { return(sparse); }
119
120 /**Unique (one-character) prefix, intern()ed for good luck; not null. */
121 private final String keyPrefix = Integer.toString(ordinal(), Character.MAX_RADIX).intern();
122 /**Make unique cache key prefix; never null nor empty.
123 * Either fixed length or followed by a ':'.
124 */
125 public final String makeUniqueKeyPrefix() { return(keyPrefix); }
126 /**Verify that all prefixes will be exactly one character long. */
127 static { if(IsDebug.isDebug) { assert(CorrType.values().length <= Character.MAX_RADIX); } }
128 /**Extract key type from prefix of key as created by makeUniqueKeyPrefix(); never null. */
129 public static CorrType extractKeyTypeFromPrefix(final CS8Bit key)
130 { return(CorrType.values()[Character.digit(key.charAt(0), Character.MAX_RADIX)]); }
131 /**Remove prefix from (non-null, non-empty) key returning the tail; never null. */
132 public static CharSequence extractKeyTail(final CS8Bit key)
133 { return(key.subSequence(1, key.length())); }
134
135 /**Make the appropriate cache lookup key(s) for a given value and full valid exhibit name; never null.
136 * Add zero-or-more, non-null, non-empty cache key(s) of the form:
137 * <samp>uniqueprefixforenumvalue value</samp> eg <samp>SIZE:16</samp> or <samp>y16</samp>
138 * <p>
139 * For keys whose plain form would be bulky (and thus represent significant memory footprint)
140 * their hash may be stored instead, with the attendant minuscule chance of false collision with another key value.
141 *
142 * @param corrType the correlation type; never null
143 * @param esa exhibit details; never null
144 * @param aep never null
145 * @param result non-null, mutable Set to which result is added
146 *
147 * @throws IOException if key cannot be constructed
148 */
149 static void makeCacheKey(final CorrType corrType,
150 final ExhibitStaticAttr esa,
151 final AllExhibitProperties aep,
152 final Set<CS8Bit> result)
153 throws IOException
154 {
155 assert(corrType != null);
156 assert(esa != null);
157 assert(aep != null);
158
159 final ExhibitFull exhibitFullName = esa.getExhibitFullName();
160 switch(corrType)
161 {
162 case AUTHOR:
163 { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ExhibitName.getAuthorComponent(exhibitFullName))); return; }
164
165 case TYPE:
166 { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ExhibitName.getExtensionComponent(exhibitFullName))); return; }
167
168 case CATEGORY:
169 { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ExhibitName.getCategoryComponent(exhibitFullName))); return; }
170
171 case MAIN_WORD:
172 {
173 // Just the main word.
174 final Name.ExhibitShort f = exhibitFullName.getShortName();
175 final int dash = TextUtils.indexOf(f, ExhibitName.WORD_SEP);
176 final CharSequence mainWord = f.subSequence(0, dash);
177 result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + mainWord));
178 return;
179 }
180
181 case MIME1:
182 {
183 final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(exhibitFullName));
184 if(type == null) { return; }
185 final int slash = type.mimeType.indexOf('/');
186 if(slash < 1) { return; }
187 final String primaryComponent = type.mimeType.substring(0, slash);
188 result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + primaryComponent));
189 return;
190 }
191
192 case SIMILAR:
193 {
194 // "Similar" means the main-words component matches.
195 // Can be long, so stored by hash code.
196 result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() +
197 TextUtils.hashCodeHexString(exhibitFullName.getShortName().getMainWordsComponent(ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet()))));
198 return;
199 }
200
201 case VARIANT:
202 {
203 // Can be long, so stored by hash code.
204 final CharSequence mainWords = exhibitFullName.getShortName().getMainWordsComponent(
205 ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
206 final int num = ExhibitName.getNumberInSeriesComponent(exhibitFullName);
207 final CharSequence auth = ExhibitName.getAuthorComponent(exhibitFullName);
208 final StringBuilder sb = new StringBuilder(8 + mainWords.length());
209 sb.append(mainWords).append('-');
210 sb.append(num).append('-');
211 sb.append(auth);
212 result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + TextUtils.hashCodeHexString(sb)));
213 return;
214 }
215
216 case SIZE: // Size in bytes grouped into buckets by exponent.
217 { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + GenUtils.log2Approx(esa.length))); return; }
218
219 case RESOLUTION:
220 {
221 final StringBuilder sb = new StringBuilder();
222 sb.append(corrType.makeUniqueKeyPrefix());
223
224 // Some notion of the raw information content of the exhibit.
225 // Zero-or-more items extracted from meta-data.
226 final ExhibitPropsComputable ePC =
227 aep.getExhibitPropsComputable(exhibitFullName);
228 if(ePC != null)
229 {
230 final java.awt.Dimension xyDim = ePC.getXyDimensions();
231 if(xyDim != null)
232 {
233 // Add pixel resolution for images.
234 sb.append("px");
235 sb.append(GenUtils.log2Approx(xyDim.width * (long) xyDim.height));
236 result.add(new CS8Bit(sb));
237 }
238
239 // TODO: should have sample rate for audio, etc.
240 // final Node md = ePC.getMetadata();
241 }
242
243 // We will not have created any key
244 // if no useful metadata was available.
245 return;
246 }
247
248 case ATTRIBUTE:
249 {
250 // Get the exhibit's attribute words, if any.
251 final Set<String> aws = ExhibitName.getAttributeWordsComponentSortedSet(
252 exhibitFullName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
253 // Prefix them and add them to the result.
254 for(final String aw : aws)
255 { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + aw)); }
256 return;
257 }
258
259 case STEM_WORD:
260 {
261 // By all (mono-cased) stem words.
262 final Enumeration<?> mainWords = ExhibitName.getMainWords(
263 exhibitFullName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
264 while(mainWords.hasMoreElements())
265 { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ((String) mainWords.nextElement()).toLowerCase())); }
266 return;
267 }
268
269 case PREFIX:
270 {
271 // By all stem prefixes of the "virtual" name.
272 // Can be long, so stored by hash code.
273 final Enumeration<?> mainWords = ExhibitName.getMainWords(
274 exhibitFullName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
275 final StringBuilder sb = new StringBuilder(exhibitFullName.length() - 4);
276 sb.append(ExhibitName.getCategoryComponent(exhibitFullName));
277 sb.append('/');
278 while(mainWords.hasMoreElements())
279 {
280 final String word = ((String) mainWords.nextElement());
281 sb.append(word).append(ExhibitName.WORD_SEP);
282 result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + TextUtils.hashCodeHexString(sb)));
283 }
284 return;
285 }
286
287 default:
288 { throw new Error("Cannot generate key for correlation type " + corrType); }
289 }
290 }
291
292 /**Make all cache keys for a given exhibit; never null.
293 * This may return a variable number of keys,
294 * and may ignore those that cannot currently be made and
295 * are unlikely ever to be made,
296 * but is as consistent as possible for any one exhibit.
297 *
298 * @return non-null, possibly-empty, set of cache keys
299 */
300 static Set<CS8Bit> makeCacheKeys(final ExhibitStaticAttr esa,
301 final AllExhibitProperties aep)
302 {
303 final HashSet<CS8Bit> result = new HashSet<CS8Bit>();
304 for(final CorrType ct : CorrType.values())
305 {
306 try { makeCacheKey(ct, esa, aep, result); }
307 catch(final IOException e) { /* Ignore ungeneratable keys. */ }
308 }
309 return(result);
310 }
311 }
312
313 /**Immutable records of computed Factor and the period for/in which it was computed. */
314 static final class PeriodAndFactor
315 {
316 /**VLONG period number. */
317 final long period;
318 /**Computed Factor; never null. */
319 final ExhibitPropsComputableMutable.Factor f;
320 /**Make an instance. */
321 PeriodAndFactor(final long period, final ExhibitPropsComputableMutable.Factor f)
322 {
323 assert(null != f);
324 this.period = period;
325 this.f = f;
326 }
327 }
328
329 /**Map from vote/correlation key to computed Factor and period for which it was computed; never null after construction/deserialisation.
330 * Since all our calculations depend on VLONG events,
331 * cache entries need only be invalidated when the period number changes.
332 * <p>
333 * The key format is a CorrType name followed by a colon
334 * followed by the rest of the key.
335 * <p>
336 * This state can be serialised; it is also recomputed on demand.
337 * <p>
338 * We use a Hashtable to be thread-safe.
339 */
340 private final Hashtable<CS8Bit, PeriodAndFactor> voteCorrCacheMap = new Hashtable<CS8Bit, PeriodAndFactor>();
341
342 /**Map from full exhibit name to computed Factor and period for which it was computed; never null after construction/deserialisation.
343 * Since all our calculations depend on VLONG events,
344 * cache entries need only be invalidated when the period number changes.
345 * <p>
346 * This state can be serialised; it is also recomputed on demand.
347 * <p>
348 * We use a Hashtable to be thread-safe.
349 */
350 private final Hashtable<Name.ExhibitFull, PeriodAndFactor> voteCorrCacheMapByFullName = new Hashtable<Name.ExhibitFull, PeriodAndFactor>();
351
352 /**Compute the vote or retrieve from cache; never null.
353 * A computed/cached value is valid until the VLONG period changes.
354 * <p>
355 * The returned factor's goodness can range from -1 to +1,
356 * and confidence from 0 to +1;
357 * any scaling required will have to be applied elsewhere.
358 *
359 * @param exhibitName full, valid exhibit name; never null
360 * @param aep never null
361 * @param vars never null
362 * @return vote Factor for the given exhibit; never null.
363 * @throws java.io.IOException
364 */
365 public final ExhibitPropsComputableMutable.Factor calcVoteFactor(
366 final ExhibitFull exhibitName,
367 final AllExhibitProperties aep,
368 final BasicVarMgrInterface vars)
369 throws IOException
370 {
371 if((null == exhibitName) || (aep == null) || (vars == null)) { throw new IllegalArgumentException(); }
372
373 // Avoid clogging up our cache for non-existent exhibits...
374 if(!aep.aeid.isPresent(exhibitName))
375 { throw new IllegalArgumentException("no such exhibit"); }
376
377 // Compute the current period.
378 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
379
380 // Get the current cached value, if any.
381 final PeriodAndFactor cached =
382 voteCorrCacheMapByFullName.get(exhibitName);
383 if(cached != null)
384 {
385 // If it is not null, but is stale,
386 // then fall through to recompute the entry.
387 // (There is a small race risk here,
388 // but the worst that can happen is some wasted
389 // recomputations, not a wrong result.)
390 final long l = cached.period;
391 // If the value is still current then return it!
392 if(l == currentPeriod)
393 { return(cached.f); }
394 // // Clear stale cached value.
395 // else
396 // { voteCorrCacheMapByFullName.remove(exhibitName); }
397 }
398
399 // No cached value, or stale, so (re)compute it,
400 // and store the new value in the cache.
401 final ExhibitPropsComputableMutable.Factor result =
402 ExhibitPropsComputableMutable.calcVoteFactor(exhibitName, aep, vars, 0);
403 voteCorrCacheMapByFullName.put(exhibitName,
404 new PeriodAndFactor(currentPeriod, result));
405
406 // Return our result...
407 return(result);
408 }
409
410 /**Get correlates for specified exhibit; never null but result may be empty.
411 * This is based on votes of other "related" exhibits.
412 * <p>
413 * The goodness of each Factor is either -1 or +1,
414 * with the correlation/confidence ranging between 0 and 1.
415 * <p>
416 * This will return values computed up to one period ago if need be,
417 * to help avoid a sudden splurge of CPU effort as we tick from one period
418 * to the next.
419 * <p>
420 * No particular ordering of the results is guaranteed,
421 * but there will be no duplicates and no nulls.
422 *
423 * @param force if true, force complete computation if need be,
424 * else we will just return what we have in cache;
425 * complete recomputation may be expensive but should last a long time
426 * and we will abort with an IOException if we cannot complete
427 * the recomputation in a reasonable time
428 *
429 * @throws IOException cannot extract required correlates
430 */
431 public ExhibitPropsComputableMutable.Factor[] getCorrelates(
432 final ExhibitStaticAttr esa,
433 final AllExhibitProperties aep,
434 final BasicVarMgrInterface vars,
435 final boolean force)
436 throws IOException
437 {
438 if((esa == null) || (aep == null) || (vars == null))
439 { throw new IllegalArgumentException(); }
440
441 // Avoid clogging up our cache for non-existent exhibits...
442 if(!aep.aeid.isPresent(esa.getExhibitFullName()))
443 { return(NO_CORRELATES); }
444
445 // Compute the current period.
446 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
447
448 // Non-stale cached values available.
449 final List<ExhibitPropsComputableMutable.Factor> result =
450 new ArrayList<ExhibitPropsComputableMutable.Factor>();
451
452 // Check all correlation types.
453 // Accept any cached value however ancient, unless "forcing".
454 _findCorrelates(esa, aep, currentPeriod, result, force);
455
456 // If we got no valid correlates for this exhibit at all,
457 // then try for a full recomputation and re-extract our factors.
458 if(force && (result.size() == 0))
459 {
460 // Update/recompute values if needed.
461 update(aep, vars, false);
462
463 // Re-do our factor extraction...
464 _findCorrelates(esa, aep, currentPeriod, result, force);
465 }
466
467 if(IsDebug.isDebug) { logger.info("Correlates size " + result.size() + " for " + esa.getCharSequence()); }
468
469 if(result.size() == 0)
470 { return(NO_CORRELATES); /* Efficient way to return empty list. */ }
471
472 final ExhibitPropsComputableMutable.Factor[] r =
473 new ExhibitPropsComputableMutable.Factor[result.size()];
474 result.toArray(r);
475 return(r);
476 }
477
478 /**Find out if a category is rated "good"/popular or not.
479 * Returns TRUE if rated good, FALSE if bad,
480 * null if not significantly either or if not known.
481 *
482 * @param categoryDir the initial directory component of an extant exhibit
483 * @param force if true may force (expensive) computation to give
484 * a more accurate answer,
485 * else may return a more approximate or stale answer,
486 * or none at all (null)
487 */
488 public Boolean isCategoryGood(final CharSequence categoryDir,
489 final AllExhibitProperties aep,
490 final BasicVarMgrInterface vars,
491 final boolean force)
492 throws IOException
493 {
494 if(!ExhibitName.validNameInitialComponentSyntax(categoryDir))
495 { throw new IllegalArgumentException(); }
496
497 //System.out.println("***Looking for category goodness: " + categoryDir);
498
499 // Make a syntactically-correct fake name/attrs from the category.
500 final ExhibitStaticAttr esa = new ExhibitStaticAttr(
501 categoryDir + "/a-A.a", 1, System.currentTimeMillis());
502
503 // Compute the correlate name for this category.
504 final Set<CS8Bit> keys = new HashSet<CS8Bit>();
505 ExhibitPropsComputableMutableVoteCache.CorrType.makeCacheKey(
506 ExhibitPropsComputableMutableVoteCache.CorrType.CATEGORY,
507 esa, aep, keys);
508 assert(keys.size() == 1); /* We expect to get exactly one key. */
509 final CS8Bit key = keys.iterator().next();
510
511 //System.out.println("***Looking for key: " + key);
512
513 // Ensure data is current, if so requested.
514 if(force) { update(aep, vars, true); }
515
516 final PeriodAndFactor pf = voteCorrCacheMap.get(key);
517 // If no Factor or too old then return null immediately.
518 if(pf == null)
519 { return(null); }
520 if(force &&
521 (pf.period < EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis())-1))
522 { return(null); }
523
524 //System.out.println("***Got: " + f.second);
525
526 // If Factor not definite/strong enough then return null...
527 if(Math.abs(pf.f.goodness * pf.f.correlation) < ExhibitPropsComputableMutable.GOODBAD_LIMIT)
528 { return(null); }
529
530 // Return a definite good/bad result.
531 return((pf.f.goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
532 }
533
534 /**Append to the result List any factors we find for this exhibit.
535 * This ignores any keys that cannot be constructed.
536 *
537 * @param esa the exhibit name/attr; never null
538 * @param currentPeriod current VLONG period; strictly positive
539 * @param result found correlates are appended to this value
540 * @param force if false then accept any extant value;
541 * if true then only accept values up to one period old
542 */
543 private void _findCorrelates(final ExhibitStaticAttr esa,
544 final AllExhibitProperties aep,
545 final long currentPeriod,
546 final List<ExhibitPropsComputableMutable.Factor> result,
547 final boolean force)
548 {
549 // Make all cache keys for this exhibit...
550 final Set<CS8Bit> keys = CorrType.makeCacheKeys(esa, aep);
551 // and update all keyed entries all with this exhibit's rating.
552 for(final CS8Bit key : keys)
553 {
554 final PeriodAndFactor pf = voteCorrCacheMap.get(key);
555 if(pf == null) { continue; }
556 if(!force)
557 { result.add(pf.f); }
558 else if(pf.period >= currentPeriod-1)
559 { result.add(pf.f); }
560 // else
561 // { result.remove(key); /* Remove stale value. */ }
562 }
563 }
564
565 /**Immutable empty correlates list. */
566 private static final ExhibitPropsComputableMutable.Factor[] NO_CORRELATES =
567 new ExhibitPropsComputableMutable.Factor[0];
568
569 /**Significance confidence threshold; non-negative.
570 * Computed correlations of confidence less than +/- this
571 * are discarded to save space.
572 * <p>
573 * A value between 0.001 (0.1%) and 0.1 (10%) is probably reasonable.
574 */
575 private static final float SIGNIFICANCE_CONF_THRESHOLD = 0.003f;
576
577 /**Significance count threshold for non-sparse correlations; non-negative.
578 * Computed non-sparse correlations with counts less than this
579 * are discarded to save space and time.
580 * <p>
581 * A value between 2 and 10 is probably reasonable;
582 * an odd value avoids recording a "neutral" score at minimum count.
583 */
584 private static final int SIGNIFICANCE_COUNT_THRESHOLD = 3;
585
586 /**The class in which we accumulate stats while recomputing correlations. */
587 private static final class Accum
588 {
589 /**Number of samples that have contributed to this value; strictly positive. */
590 final AtomicInteger count = new AtomicInteger();
591
592 /**Accumulated (sum) confidence value with negative goodness being treated as a negative confidence.
593 * All access protected by the instance lock.
594 */
595 private double totalConfidence;
596
597 synchronized double getTotalConfidence()
598 { return(totalConfidence); }
599
600 synchronized void addToTotalConfidence(final double delta)
601 { totalConfidence += delta; }
602 }
603
604 /**Time limit for update() in ms; strictly positive.
605 * A value of the order of a second or so is probably right.
606 * This allows some significant work to get done and cached,
607 * but should not block anything (such as page generation)
608 * for an unbearable amount of time even to a visitor.
609 * <p>
610 * This should still let us get a reasonable amount of work done
611 * so that we can rebuild the data incrementally if necessary.
612 */
613 private final int UPDATE_TIME_LIMIT_MS = 1023;
614
615 /**Private lock to prevent more than one thread at once expending effort in the core part of update(). */
616 private final ReentrantLock _uCoreLock = new ReentrantLock();
617
618 /**Maximum number of threads to allow in update(); strictly positive.
619 * It is probably worth allowing into update() a few threads
620 * to try to parallelise probably-I/O-bound sections at the start
621 * and to do CPU-bound computations concurrently on multi-CPU systems,
622 * but using too many concurrent threads will possibly just waste resources
623 * and overload I/O subsystems (eg back-end HTTP connections).
624 */
625 private static final int _computeMaxConcurrentUpdateThreads = Math.min(7,
626 1 + Runtime.getRuntime().availableProcessors());
627
628 /**Counting semaphore to limit update() first-phase concurrency; never null. */
629 private final Semaphore _uStartSem = new Semaphore(_computeMaxConcurrentUpdateThreads);
630
631 /**If true then try to load/compute early the scores of voted-for-exhibits.
632 * An eager set-up attempts to start its computations
633 * while part of its data (one of the "all" buckets) is still being fetched.
634 * <p>
635 * An eager setup should be able to compute the correlations more quickly,
636 * but at increased risk of using part/all somewhat/very stale values,
637 * and thus generating a less good result.
638 */
639 private static final boolean EAGER_VOTE_LOAD = true;
640
641 /**If true then try to use the "all" votes buckets rather than summing the other buckets.
642 * Using the "all" bucket may less accurate/comprehensive,
643 * and may delay initialisation if it has to be fetched from upstream.
644 */
645 private static final boolean USE_ALL_BUCKET = false;
646
647 /**Bring correlations data up to date.
648 * This may also perform housekeeping such as clearing stale state.
649 * <p>
650 * This may be a relatively expensive call,
651 * though if values are up-to-date then this should be quite cheap.
652 * <p>
653 * This is thread-safe and will use multiple caller threads to help fetch
654 * vote data and concurrently recompute exhibit scores,
655 * though second and subsequent concurrent calling threads
656 * will return immediately upon reaching the core correlation computations
657 * in order to prevent wasted duplicate effort.
658 * <p>
659 * If this routine cannot get values it needs then it throws an IOException.
660 * <p>
661 * No work will be done for an empty AEP.
662 * <p>
663 * This will abort with an IOException if taking too long
664 * (of the order of many seconds) or cannot otherwise ensure that
665 * the correlations data is up-to-date.
666 * Thus if this returns without throwing an IOException then
667 * the correlations data can be assumed to be up-to-date.
668 *
669 * @param aep current exhibit properties; never null
670 * @param vars handle on system variables; never null
671 * @param noTimeLimit if true, this runs until complete if possible
672 *
673 * @throws IOException if the correlations data cannot be guaranteed
674 * to be up-to-date
675 * and/or a problem was found fetching required data
676 * and/or the routine was taking to long to complete the calculations
677 */
678 public void update(final AllExhibitProperties aep,
679 final BasicVarMgrInterface vars,
680 final boolean noTimeLimit)
681 throws IOException
682 {
683 if((aep == null) || (vars == null))
684 { throw new IllegalArgumentException(); }
685
686 // Nothing to be done for an empty set of exhibits.
687 if(aep.aeid.length == 0) { return; }
688
689 // If we can find our marker key,
690 // and it is for the current period,
691 // then we can assume that computations are up-to-date.
692 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
693 final PeriodAndFactor entry = voteCorrCacheMap.get(MARKER_KEY);
694 if((entry != null) && (entry.period == currentPeriod))
695 { return; /* Apparently completely up-to-date! */ }
696
697 if(_uCoreLock.isLocked())
698 { throw new IOException("update() already busy in core phase; vote cache data is stale"); }
699
700
701 // The complete set of voted-for exhibits (pro and con).
702 // Guess size based on a few votes per sample period (ie per day).
703 // Note that these values are seen only by this thread.
704 final Set<Name.ExhibitFull> votedForNames = new HashSet<Name.ExhibitFull>(8 * SystemVariables.EVENT_SAMPLES_RETAINED);
705 final long currentPeriodL;
706
707 // If lots of external threads are needing the update() to be done
708 // then allow a few of them in to help with the work.
709 if(!_uStartSem.tryAcquire())
710 { throw new IOException("update() already busy in first phase; vote cache data is stale"); }
711 try
712 {
713 // Are we probably the first thread into this section?
714 // (It doesn't matter if we get this wrong occasionally.)
715 final boolean firstThreadIn = _uStartSem.availablePermits() == (_computeMaxConcurrentUpdateThreads-1);
716
717 // We are now really getting started...
718 final long startTime = System.currentTimeMillis();
719 // Note the (target) limit time;
720 // if really out of date then allow MUCH longer before timing out
721 // (unless conserving power right now)
722 // But in any case don't quit until this has made some progress
723 // to ensure that this does eventually complete.
724 final boolean reasonablyUpToDate =
725 ((entry != null) && (entry.period == currentPeriod-1));
726 final long stopBy = startTime +
727 Rnd.fastRnd.nextInt(UPDATE_TIME_LIMIT_MS) +
728 ((reasonablyUpToDate || GenUtils.mustConservePower()) ?
729 (UPDATE_TIME_LIMIT_MS>>1) : (UPDATE_TIME_LIMIT_MS<<1));
730
731 if(IsDebug.isDebug) { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): cache size: " + voteCorrCacheMap.size() + ".]"); }
732
733
734 // Request/fetch the current-period event sets now,
735 // so that they may be ready later to be added into the mix
736 // if not immediately available.
737 final EventVariableValue pcur = vars.getEventValue(SystemVariables.VOTE_PRO, EventPeriod.VLONG, true);
738 final EventVariableValue ccur = vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, true);
739
740 final List<EventVariableValue> availableALLBuckets = new ArrayList<EventVariableValue>(2);
741 if(USE_ALL_BUCKET)
742 {
743 // Get pro and con votes' "all" period event set.
744 // This is a heuristic to abort if we cannot get vote "all" buckets,
745 // and to try to encourage fetching of such data by the cache,
746 // or to continue quietly if what we currently have is OK,
747 // even empty if there seems to have been no recent voting.
748 //
749 // If we get null/stale/empty "all" buckets
750 // then abort and hope that they will be present/fresh
751 // the next time that we are called
752 // (eg allowing for asynchronous fetch/cacheing of these values).
753 final EventVariableValue pevvs[] = vars.getEventValues(
754 SystemVariables.VOTE_PRO, EventPeriod.VLONG, 0, null);
755 final EventVariableValue cevvs[] = vars.getEventValues(
756 SystemVariables.VOTE_CON, EventPeriod.VLONG, 0, null);
757 if((pevvs.length != 0) && (pevvs[0] != null)) { availableALLBuckets.add(pevvs[0]); }
758 if((cevvs.length != 0) && (cevvs[0] != null)) { availableALLBuckets.add(cevvs[0]); }
759 // If not eager and we don't have both "all" buckets right now
760 // then give up quickly, without burning much CPU time.
761 if(availableALLBuckets.size() < (EAGER_VOTE_LOAD ? 1 : 2))
762 { throw new IOException("missing vote one or both all buckets (early)"); }
763 }
764
765 // List of buckets from which we will extract (short) exhibit names.
766 // Starts with the pro/con 'all' buckets.
767 final List<EventVariableValue> buckets = new ArrayList<EventVariableValue>(availableALLBuckets);
768
769 // If not using the "all" vote buckets or
770 // if we are using "all" buckets but either is completely empty
771 // then we may wish to resort
772 // to explicitly fetching all/most available recent event periods
773 // and computing the union of their values.
774 if(!USE_ALL_BUCKET ||
775 ((availableALLBuckets.size() == 2) &&
776 ((availableALLBuckets.get(0).getTotalEventCount() == 0) ||
777 (availableALLBuckets.get(1).getTotalEventCount() == 0))))
778 {
779 if(IsDebug.isDebug && USE_ALL_BUCKET) { logger.warn("[ExhibitPropsComputableMutableVoteCache.update(): WARNING: at least one 'all' bucket is empty: collecting vote data the slow way...]"); }
780
781 // Remove extant dubious 'all' values.
782 if(USE_ALL_BUCKET) { buckets.clear(); }
783
784 // Forcing alternative (slow) mechanism...
785 // Get events for all retained slots.
786 final BitSet whichValues = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
787 whichValues.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
788 final SimpleVariableDefinition[] defs = new SimpleVariableDefinition[]{ SystemVariables.VOTE_PRO, SystemVariables.VOTE_CON };
789 // Have extra threads try for values in a different order for concurrency...
790 if(!firstThreadIn) { Collections.shuffle(Arrays.asList(defs), GenUtils.mustConservePowerExtreme() ? Rnd.fastRnd : Rnd.goodRnd); }
791 for(final SimpleVariableDefinition def : defs)
792 {
793 final EventVariableValue evvsAll[] = vars.getEventValues(
794 def, EventPeriod.VLONG, currentPeriod, whichValues);
795 for(final EventVariableValue evv : evvsAll)
796 {
797 if((evv != null) && (evv.getTotalEventCount() != 0))
798 { buckets.add(evv); }
799 }
800 }
801 }
802
803 // Throw in current-slot votes at the last moment.
804 // Try and fetch them again if they were empty when we last tried.
805 // These data are nice-to-have if we can get them,
806 // since they make the correlation values as fresh as possible.
807 buckets.add((pcur.getTotalEventCount() != 0) ? pcur :
808 vars.getEventValue(SystemVariables.VOTE_PRO, EventPeriod.VLONG, true));
809 buckets.add((ccur.getTotalEventCount() != 0) ? ccur :
810 vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, true));
811
812
813 // If eager then get/compute votes/scores ASAP,
814 // even when we don't have both "all" buckets.
815 //
816 // We convert from short name to long/full form
817 // dropping any exhibit (name) no longer extant.
818 final long calcVoteFactorStart = System.currentTimeMillis();
819 currentPeriodL = MemoryTools.intern(Long.valueOf(currentPeriod));
820 // Scramble order of buckets to help ensure that
821 // parallel threads don't waste too much time redoing the same work.
822 if(!firstThreadIn) { Collections.shuffle(buckets, Rnd.fastRnd); }
823 int checked = 0; // To enable some sort of progress report...
824 for(final EventVariableValue evv : buckets)
825 {
826 ++checked; // Track progress...
827
828 // Bucket entries should never be null.
829 assert(evv != null);
830
831 List<Object> so = evv.getDistinctValuesInRankOrder();
832 // Again, shuffle work order to help avoid threads colliding.
833 if(!firstThreadIn)
834 {
835 so = new ArrayList<Object>(so);
836 Collections.shuffle(so, Rnd.fastRnd);
837 }
838 for(final Object key : so)
839 {
840 if(!(key instanceof String)) { continue; }
841 final String shortName = (String) key;
842 final Name.ExhibitFull exhibitName = aep.aeid.getFullName(shortName);
843 if(exhibitName == null) { continue; }
844 // Skip if we've already handled/added this exhibitName.
845 if(!votedForNames.add(exhibitName)) { continue; }
846
847 // If eagerly fetching/computing vote values then do it now,
848 // possibly in parallel with fetching one of the "all" buckets.
849 if(EAGER_VOTE_LOAD)
850 {
851 // ----------------------------------------------------
852 // Bring at least one vote up to date
853 // if not all yet up-to-date.
854
855 // Get any extant cached vote value for this exhibit.
856 final PeriodAndFactor oldVVB = voteCorrCacheMapByFullName.get(exhibitName);
857 if((oldVVB != null) && (oldVVB.period == currentPeriod))
858 { continue; /* Already up-to-date. */ }
859
860 // Compute a new/current vote value for this exhibit.
861 final ExhibitPropsComputableMutable.Factor result =
862 ExhibitPropsComputableMutable.calcVoteFactor(exhibitName, aep, vars, 0);
863 voteCorrCacheMapByFullName.put(exhibitName,
864 new PeriodAndFactor(currentPeriodL, result));
865
866 // Abort if we've run out of time.
867 if(!noTimeLimit && (System.currentTimeMillis() > stopBy))
868 {
869 /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): ABORTED calcVoteFactor: ran out of time (checked "+checked+"/"+buckets.size()+") @ "+(System.currentTimeMillis()-calcVoteFactorStart)+"ms]"); }
870 throw new IOException("ran out of time in update(); data not brought up-to-date yet because not all voted-for-exhibits up-to-date yet");
871 }
872
873 // Abort if another thread has already made it as far as
874 // the core section (so we can't get in).
875 if(_uCoreLock.isLocked())
876 {
877 /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): ABORTED (another thread already in update() core) calcVoteFactor @ time "+(System.currentTimeMillis()-calcVoteFactorStart)+"ms]"); }
878 throw new IOException("another thread already in update() core");
879 }
880 }
881 }
882 }
883 final long calcVoteFactorEnd = System.currentTimeMillis();
884 /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): calcVoteFactor time "+(calcVoteFactorEnd-calcVoteFactorStart)+"ms]"); }
885
886 // If we have not yet managed to get both buckets
887 // and thus all the exhibit names that they contain
888 // then we cannot go any further for the moment.
889 if(EAGER_VOTE_LOAD && USE_ALL_BUCKET && (availableALLBuckets.size() < 2))
890 { throw new IOException("too few all vote buckets (EAGER)"); }
891 if(USE_ALL_BUCKET) { assert(availableALLBuckets.size() == 2) : "must have at least both pro and con `all' buckets or full set of pro/con buckets by now"; }
892
893 if(IsDebug.isDebug) { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): votedForNames size: " + votedForNames.size() + ".]"); }
894 }
895 finally
896 { _uStartSem.release(); }
897
898 // Only allow at most one (external) thread at once
899 // to consume CPU in the core part of update()
900 // so as to avoid wasting resources
901 // though we will inject local parallelism directly for speed.
902 if(!_uCoreLock.tryLock()) { throw new IOException("update() already busy in core; vote cache data is stale"); }
903 try
904 {
905 // Double-check that the work was not completed by another thread
906 // while we were in an earlier section!
907 final PeriodAndFactor e2 = voteCorrCacheMap.get(MARKER_KEY);
908 if((e2 != null) && (e2.period == currentPeriod))
909 { return; /* Apparently completely up-to-date! */ }
910
911 // For every voted-for exhibit,
912 // and for every correlation type that we can compute,
913 // accumulate correlation stats.
914 // We then discard any that are insignificant,
915 // and insert the remaining significant values into the cache.
916 //
917 // Also compute the mean count per non-sparse value
918 // as a baseline against which non-sparse values with lower counts
919 // can be rated for confidence/certainty.
920 //
921 // We optimistically assume that in the typical case
922 // we can get through this entire operation quickly
923 // (especially since we parallelise it)
924 // so we don't bother splitting it into phases.
925 //
926 // We expect this work to be mainly compute-bound.
927 final SortedSet<String> attrWords = ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet();
928
929 final AtomicInteger totalNonSparseEntries = new AtomicInteger();
930 final AtomicInteger totalNonSparseCount = new AtomicInteger();
931 final ConcurrentHashMap<CS8Bit,Accum> working = new ConcurrentHashMap<CS8Bit, Accum>(17 + 19*votedForNames.size());
932 final long beforeCompute = System.currentTimeMillis();
933
934 // List of results that we can wait for completion of.
935 final List<Future<?>> tasks = new ArrayList<Future<?>>(votedForNames.size());
936
937 // Post tasks for each of the exhibits that has been voted for.
938 for(final Name.ExhibitFull exhibitName : votedForNames)
939 {
940 tasks.add(ThreadUtils.computeIntensiveThreadPool.submit(new Runnable(){
941 public final void run()
942 {
943 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
944 // If exhibit appears not to exist any more then we're done...
945 // (This is in practice very rare.)
946 if(esa == null) { return; }
947
948 // Get/compute the current/new vote value for this exhibit.
949 final ExhibitPropsComputableMutable.Factor vote;
950 try { vote = calcVoteFactor(exhibitName, aep, vars); }
951 catch(final IOException e)
952 {
953 // If we get an IOException here
954 // then we probably have to abort the entire calculation
955 // and restart later.
956 e.printStackTrace();
957 throw new IllegalStateException("unexpected IOException: will have to restart", e);
958 }
959 assert(vote != null);
960
961 // Make all cache keys for this exhibit.
962 final Set<CS8Bit> keys = CorrType.makeCacheKeys(esa, aep);
963 // Count the words in this exhibit for scaling...
964 final int nWords = ExhibitName.getMainWordsCount(esa.getCharSequence(), attrWords);
965 // Update all keyed entries with this exhibit's rating.
966 for(final CS8Bit key : keys)
967 {
968 // Is this a "sparse" correlation type?
969 final CorrType corrType = CorrType.extractKeyTypeFromPrefix(key);
970 final boolean isSparse = corrType.isSparse();
971
972 // Is this an especially "dense" term that needs scaling
973 // so as not to swamp other results.
974 final boolean needsScalingByWordCount =
975 (corrType == CorrType.STEM_WORD) ||
976 (corrType == CorrType.PREFIX);
977
978 // Retrieve existing accumulator, or create a new one.
979 Accum ac;
980 while(null == (ac = working.get(key)))
981 {
982 // Create new all-zeros accumulator
983 // if one not already inserted.
984 // (This is race-free c/o putIfAbsent().)
985 working.putIfAbsent(key, new Accum());
986 }
987 // Update the accumulator.
988 final int count = ac.count.incrementAndGet();
989 final float corr = (!needsScalingByWordCount) ?
990 vote.correlation : (vote.correlation / nWords);
991 if(vote.goodness < 0) { ac.addToTotalConfidence(-corr); }
992 else if(vote.goodness > 0) { ac.addToTotalConfidence(+corr); }
993
994 // Note non-sparse entries that have a significant count.
995 if(!isSparse && (count >= SIGNIFICANCE_COUNT_THRESHOLD))
996 {
997 if(count == SIGNIFICANCE_COUNT_THRESHOLD)
998 { totalNonSparseEntries.incrementAndGet(); /* Note new (non-sparse) entry. */ }
999 totalNonSparseCount.incrementAndGet();
1000 }
1001 }
1002 }
1003 }));
1004 }
1005
1006 // Wait for all the tasks to complete...
1007 assert(tasks.size() == votedForNames.size());
1008 for(final Future<?> task : tasks)
1009 {
1010 try { task.get(); }
1011 catch(final InterruptedException e)
1012 {
1013 e.printStackTrace();
1014 final IOException et = new InterruptedIOException("unexpected interrupt during calculation");
1015 et.initCause(e);
1016 throw et;
1017 }
1018 catch(final ExecutionException e)
1019 {
1020 e.printStackTrace();
1021 final IOException et = new IOException("unexpected exception during calculation");
1022 et.initCause(e);
1023 throw et;
1024 }
1025 }
1026
1027 final long afterCompute = System.currentTimeMillis();
1028 /*if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): *** correlation factors compute time "+(afterCompute-beforeCompute)+"ms.]"); }
1029
1030 final int tNSE = totalNonSparseEntries.get();
1031 final double meanNonSparseCount = (tNSE < 1) ? 0.0
1032 : (totalNonSparseCount.get() / (double) tNSE);
1033 final double mNSC_min = Math.max(meanNonSparseCount,
1034 SIGNIFICANCE_COUNT_THRESHOLD);
1035
1036 /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): votedForNames.size()="+votedForNames.size()+", working.size()="+working.size()+", meanNonSparseCount="+meanNonSparseCount + ".]"); }
1037
1038 // Go through the working set,
1039 // converting significant results into main cache entries.
1040 final boolean doComment = !IsDebug.isDebug; // MemoryTools.lotsFree() && !GenUtils.mustConservePowerExtreme();
1041 final long finalComputePhaseStart = System.currentTimeMillis();
1042 for(final CS8Bit key : working.keySet())
1043 {
1044 // Is this a "sparse" correlation type?
1045 final boolean isSparse = CorrType.extractKeyTypeFromPrefix(key).isSparse();
1046
1047 final Accum ac = working.get(key);
1048 assert(ac != null);
1049
1050 // Ignore values with very small counts
1051 // (except for explicit "sparse" types)
1052 // as potentially unreliable (or at least very noisy).
1053 final int count = ac.count.get();
1054 if(!isSparse && (count < SIGNIFICANCE_COUNT_THRESHOLD))
1055 { continue; }
1056
1057 assert(ac.count.get() > 0);
1058 assert(!Double.isInfinite(ac.getTotalConfidence()));
1059 assert(!Double.isNaN(ac.getTotalConfidence()));
1060 // Compute mean/normalised confidence
1061 // scaling as appropriate for below-mean counts.
1062 // We scale by the expected extra variance of a low sample size.
1063 final double meanConf = Math.abs(ac.getTotalConfidence() / count);
1064 final double scaledConf =
1065 (/* isSparse || */ (count >= mNSC_min)) ? meanConf :
1066 (meanConf * Math.sqrt(count / mNSC_min));
1067
1068 // If too small to be significant, then don't use it.
1069 // Test in such a way as to reject NaN values too,
1070 // ie unless a test succeeds, since all tests against NaN fail.
1071 if(!(scaledConf > SIGNIFICANCE_CONF_THRESHOLD)) { continue; }
1072
1073 // Make our factor with a helpful comment.
1074 final String comment = doComment ? (key + " count=" + count) : null;
1075 final ExhibitPropsComputableMutable.Factor f = new ExhibitPropsComputableMutable.Factor(
1076 (ac.getTotalConfidence() < 0) ? -1 : +1,
1077 scaledConf);
1078
1079 // Insert the new Factor into the cache.
1080 // There will be many, necessarily-unique, keys,
1081 // so don't needlessly add overhead and inflate memory by intern()ing them.
1082 voteCorrCacheMap.put(key, new PeriodAndFactor(currentPeriodL, f));
1083 }
1084 final long finalComputePhaseEnd = System.currentTimeMillis();
1085 /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): final phase compute time "+(finalComputePhaseEnd-finalComputePhaseStart)+"ms.]"); }
1086
1087 // Insert a distinguished marker key
1088 // so that it is quick to check when we last recomputed values here.
1089 // We do this only when all updates are complete.
1090 voteCorrCacheMap.put(MARKER_KEY,
1091 new PeriodAndFactor(currentPeriodL, ExhibitPropsComputableMutable.FACTOR_ZERO));
1092
1093 // Remove any stale state since we now have up-to-date values.
1094 _clearStaleState(voteCorrCacheMap);
1095
1096 /* if(IsDebug.isDebug) */ { logger.info("update(): final cache size: " + voteCorrCacheMap.size() + ", voteCorrCacheMapByFullName.size()="+voteCorrCacheMapByFullName.size()); }
1097 }
1098 finally { _uCoreLock.unlock(); }
1099
1100 // Dump the correlations table for debug purposes,
1101 // but outside the lock so as not to hold things up unduly...
1102 // Only show the non-sparse entries.
1103 if(IsDebug.isDebug)
1104 {
1105 // Nicely dump the table in sorted order...
1106 final List<CS8Bit> keys = new ArrayList<CS8Bit>(voteCorrCacheMap.size());
1107 // Capture the key set atomically...
1108 synchronized(voteCorrCacheMap) { keys.addAll(voteCorrCacheMap.keySet()); }
1109 // ... and sort our copy to iterate over.
1110 Collections.sort(keys);
1111 // Make a re-usable working buffer.
1112 final StringBuilder sb = new StringBuilder(255);
1113
1114 // Count of key types.
1115 final EnumMap<CorrType, Integer> tc = new EnumMap<ExhibitPropsComputableMutableVoteCache.CorrType, Integer>(CorrType.class);
1116
1117 for(final CS8Bit key : keys)
1118 {
1119 final CorrType keyType = CorrType.extractKeyTypeFromPrefix(key);
1120 if(!tc.containsKey(keyType)) { tc.put(keyType, 0); } else { tc.put(keyType, 1 + tc.get(keyType)); }
1121
1122 // Skip the sparse entries (there may be *lots* of them)...
1123 if(keyType.isSparse()) { continue; }
1124
1125 sb.setLength(0); // Clear the buffer.
1126 sb.append(" "); // Indent...
1127 sb.append(keyType).append(':').append(CorrType.extractKeyTail(key)).append(" = ");
1128 final PeriodAndFactor pf = voteCorrCacheMap.get(key);
1129 if(pf != null)
1130 {
1131 sb.append((pf.f.goodness < 0) ? '-' : '+');
1132 sb.append(pf.f.correlation);
1133 }
1134 logger.info(sb);
1135 }
1136
1137 logger.info("count by key type: " + tc);
1138
1139 // As as 2012/03/15:
1140 // final cache size: 2039
1141 // count by key type: {VARIANT=365, SIMILAR=326, SIZE=10, ATTRIBUTE=13, RESOLUTION=9, MAIN_WORD=62, CATEGORY=21, TYPE=2, AUTHOR=22, MIME1=0, PREFIX=456, STEM_WORDS=741}
1142 }
1143 }
1144
1145 /**Special marker key value to indicate when correlations last computed; not null.
1146 * Legal key, and never conflicts with any normal usage.
1147 */
1148 private static final CS8Bit MARKER_KEY = new CS8Bit(CorrType.AUTHOR.makeUniqueKeyPrefix());
1149
1150
1151 /**Remove stale state from the map passed in.
1152 * A lock is held on the map while this is done
1153 * to prevent it being altered unexpectedly.
1154 * <p>
1155 * Only remove things more than one period old
1156 * so that it is possible to gently recompute values in the background
1157 * rather than being bounced into it on the roll into a new period.
1158 *
1159 * @param map non-null map.
1160 */
1161 static private void _clearStaleState(final Hashtable<?, PeriodAndFactor> map)
1162 {
1163 synchronized(map)
1164 {
1165 // Compute the current period.
1166 final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
1167
1168 for(final Iterator<PeriodAndFactor> it = map.values().iterator(); it.hasNext(); )
1169 {
1170 final PeriodAndFactor tp = it.next();
1171 // Simply weed out stale entries,
1172 // ie anything not in the current VLONG period or the one before.
1173 if(tp.period < currentPeriod-1)
1174 { it.remove(); continue; }
1175 }
1176 }
1177 }
1178 }