001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.svrCore;
031    
032    import java.io.IOException;
033    import java.io.InterruptedIOException;
034    import java.util.ArrayList;
035    import java.util.Arrays;
036    import java.util.BitSet;
037    import java.util.Collections;
038    import java.util.EnumMap;
039    import java.util.Enumeration;
040    import java.util.HashSet;
041    import java.util.Hashtable;
042    import java.util.Iterator;
043    import java.util.List;
044    import java.util.Set;
045    import java.util.SortedSet;
046    import java.util.concurrent.ConcurrentHashMap;
047    import java.util.concurrent.ExecutionException;
048    import java.util.concurrent.Future;
049    import java.util.concurrent.Semaphore;
050    import java.util.concurrent.atomic.AtomicInteger;
051    import java.util.concurrent.locks.ReentrantLock;
052    
053    import org.apache.log4j.Logger;
054    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
055    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
056    import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
057    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
058    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
059    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
060    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
061    
062    import ORG.hd.d.IsDebug;
063    
064    /**
065     * Created by IntelliJ IDEA.
066     * User: DHD
067     * Date: 02-Jul-2006
068     * Time: 16:40:48
069     */
070    
071    /**Class to cache vote computations and correlated values.
072     * In particular this aims to avoid repeated expensive recomputation
073     * of underlying vote values so as global correlations can be
074     * efficiently computed.
075     */
076    public final class ExhibitPropsComputableMutableVoteCache implements ExhibitPropsComputableMutableVoteCacheIF
077        {
078        private static final Logger logger = Logger.getLogger(ExhibitPropsComputableMutableVoteCache.class);
079    
080        /**Construct default empty cache. */
081        public ExhibitPropsComputableMutableVoteCache() { }
082    
083        /**Correlation types.
084         * By default most correlation types/data is "dense",
085         * ie there should be many examples for and against each value.
086         * <p>
087         * Some types are "sparse",
088         * ie where we usually expect only one or two examples for a given value,
089         * and so we want to filter these more gently than usual.
090         * <p>
091         * TODO: accession year and month
092         * <p>
093         * TODO: EXIF/JAI values
094         */
095        public static enum CorrType
096            {
097            /**By variation on same exhibit (sparse). */        VARIANT(true),
098            /**By similar exhibit (sparse). */                  SIMILAR(true),
099            /**By exhibit size (log bytes). */                  SIZE,
100            /**By exhibit attribute word(s). */                 ATTRIBUTE,
101            /**By exhibit resolution (eg log pixels). */        RESOLUTION,
102            /**By exhibit main word. */                         MAIN_WORD,
103            /**By exhibit category. */                          CATEGORY,
104            /**By exhibit type. */                              TYPE,
105            /**By author. */                                    AUTHOR,
106            /**By MIME primary type component. */               MIME1,
107            /**By prefixes. */                                  PREFIX,
108            /**By all stem words. */                            STEM_WORD;
109    
110            /**By default correlations are not expected to be sparse. */
111            private CorrType() { sparse = false; }
112            /**Construct correlation with specified sparseness. */
113            private CorrType(final boolean isSparse) { sparse = isSparse; }
114    
115            /**Is this correlation "sparse"? */
116            private final boolean sparse;
117            /**Get the "sparseness" flag. */
118            public final boolean isSparse() { return(sparse); }
119    
120            /**Unique (one-character) prefix, intern()ed for good luck; not null. */
121            private final String keyPrefix = Integer.toString(ordinal(), Character.MAX_RADIX).intern();
122            /**Make unique cache key prefix; never null nor empty.
123             * Either fixed length or followed by a ':'.
124             */
125            public final String makeUniqueKeyPrefix() { return(keyPrefix); }
126            /**Verify that all prefixes will be exactly one character long. */
127            static { if(IsDebug.isDebug) { assert(CorrType.values().length <= Character.MAX_RADIX); } }
128            /**Extract key type from prefix of key as created by makeUniqueKeyPrefix(); never null. */
129            public static CorrType extractKeyTypeFromPrefix(final CS8Bit key)
130                { return(CorrType.values()[Character.digit(key.charAt(0), Character.MAX_RADIX)]); }
131            /**Remove prefix from (non-null, non-empty) key returning the tail; never null. */
132            public static CharSequence extractKeyTail(final CS8Bit key)
133                { return(key.subSequence(1, key.length())); }
134    
135            /**Make the appropriate cache lookup key(s) for a given value and full valid exhibit name; never null.
136             * Add zero-or-more, non-null, non-empty cache key(s) of the form:
137             *     <samp>uniqueprefixforenumvalue value</samp> eg <samp>SIZE:16</samp> or <samp>y16</samp>
138             * <p>
139             * For keys whose plain form would be bulky (and thus represent significant memory footprint)
140             * their hash may be stored instead, with the attendant minuscule chance of false collision with another key value.
141             *
142             * @param corrType  the correlation type; never null
143             * @param esa  exhibit details; never null
144             * @param aep  never null
145             * @param result  non-null, mutable Set to which result is added
146             *
147             * @throws IOException  if key cannot be constructed
148             */
149            static void makeCacheKey(final CorrType corrType,
150                                     final ExhibitStaticAttr esa,
151                                     final AllExhibitProperties aep,
152                                     final Set<CS8Bit> result)
153                throws IOException
154                {
155                assert(corrType != null);
156                assert(esa != null);
157                assert(aep != null);
158    
159                final ExhibitFull exhibitFullName = esa.getExhibitFullName();
160                switch(corrType)
161                    {
162                    case AUTHOR:
163                        { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ExhibitName.getAuthorComponent(exhibitFullName))); return; }
164    
165                    case TYPE:
166                        { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ExhibitName.getExtensionComponent(exhibitFullName))); return; }
167    
168                    case CATEGORY:
169                        { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ExhibitName.getCategoryComponent(exhibitFullName))); return; }
170    
171                    case MAIN_WORD:
172                        {
173                        // Just the main word.
174                        final Name.ExhibitShort f = exhibitFullName.getShortName();
175                        final int dash = TextUtils.indexOf(f, ExhibitName.WORD_SEP);
176                        final CharSequence mainWord = f.subSequence(0, dash);
177                        result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + mainWord));
178                        return;
179                        }
180    
181                    case MIME1:
182                        {
183                        final ExhibitMIME.ExhibitTypeParameters type = (ExhibitMIME.getInputFileType(exhibitFullName));
184                        if(type == null) { return; }
185                        final int slash = type.mimeType.indexOf('/');
186                        if(slash < 1) { return; }
187                        final String primaryComponent = type.mimeType.substring(0, slash);
188                        result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + primaryComponent));
189                        return;
190                        }
191    
192                    case SIMILAR:
193                        {
194                        // "Similar" means the main-words component matches.
195                        // Can be long, so stored by hash code.
196                        result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() +
197                                TextUtils.hashCodeHexString(exhibitFullName.getShortName().getMainWordsComponent(ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet()))));
198                        return;
199                        }
200    
201                    case VARIANT:
202                        {
203                        // Can be long, so stored by hash code.
204                        final CharSequence mainWords = exhibitFullName.getShortName().getMainWordsComponent(
205                            ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
206                        final int num = ExhibitName.getNumberInSeriesComponent(exhibitFullName);
207                        final CharSequence auth = ExhibitName.getAuthorComponent(exhibitFullName);
208                        final StringBuilder sb = new StringBuilder(8 + mainWords.length());
209                        sb.append(mainWords).append('-');
210                        sb.append(num).append('-');
211                        sb.append(auth);
212                        result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + TextUtils.hashCodeHexString(sb)));
213                        return;
214                        }
215    
216                    case SIZE: // Size in bytes grouped into buckets by exponent.
217                        { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + GenUtils.log2Approx(esa.length))); return; }
218    
219                    case RESOLUTION:
220                        {
221                        final StringBuilder sb = new StringBuilder();
222                        sb.append(corrType.makeUniqueKeyPrefix());
223    
224                        // Some notion of the raw information content of the exhibit.
225                        // Zero-or-more items extracted from meta-data.
226                        final ExhibitPropsComputable ePC =
227                            aep.getExhibitPropsComputable(exhibitFullName);
228                        if(ePC != null)
229                            {
230                            final java.awt.Dimension xyDim = ePC.getXyDimensions();
231                            if(xyDim != null)
232                                {
233                                // Add pixel resolution for images.
234                                sb.append("px");
235                                sb.append(GenUtils.log2Approx(xyDim.width * (long) xyDim.height));
236                                result.add(new CS8Bit(sb));
237                                }
238    
239                            // TODO: should have sample rate for audio, etc.
240    //                        final Node md = ePC.getMetadata();
241                            }
242    
243                        // We will not have created any key
244                        // if no useful metadata was available.
245                        return;
246                        }
247    
248                    case ATTRIBUTE:
249                        {
250                        // Get the exhibit's attribute words, if any.
251                        final Set<String> aws = ExhibitName.getAttributeWordsComponentSortedSet(
252                            exhibitFullName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
253                        // Prefix them and add them to the result.
254                        for(final String aw : aws)
255                            { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + aw)); }
256                        return;
257                        }
258    
259                    case STEM_WORD:
260                        {
261                        // By all (mono-cased) stem words.
262                        final Enumeration<?> mainWords = ExhibitName.getMainWords(
263                            exhibitFullName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
264                        while(mainWords.hasMoreElements())
265                            { result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + ((String) mainWords.nextElement()).toLowerCase())); }
266                        return;
267                        }
268    
269                    case PREFIX:
270                        {
271                        // By all stem prefixes of the "virtual" name.
272                        // Can be long, so stored by hash code.
273                        final Enumeration<?> mainWords = ExhibitName.getMainWords(
274                            exhibitFullName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
275                        final StringBuilder sb = new StringBuilder(exhibitFullName.length() - 4);
276                        sb.append(ExhibitName.getCategoryComponent(exhibitFullName));
277                        sb.append('/');
278                        while(mainWords.hasMoreElements())
279                            {
280                            final String word = ((String) mainWords.nextElement());
281                            sb.append(word).append(ExhibitName.WORD_SEP);
282                            result.add(new CS8Bit(corrType.makeUniqueKeyPrefix() + TextUtils.hashCodeHexString(sb)));
283                            }
284                        return;
285                        }
286    
287                    default:
288                        { throw new Error("Cannot generate key for correlation type " + corrType); }
289                    }
290                }
291    
292            /**Make all cache keys for a given exhibit; never null.
293             * This may return a variable number of keys,
294             * and may ignore those that cannot currently be made and
295             * are unlikely ever to be made,
296             * but is as consistent as possible for any one exhibit.
297             *
298             * @return non-null, possibly-empty, set of cache keys
299             */
300            static Set<CS8Bit> makeCacheKeys(final ExhibitStaticAttr esa,
301                                           final AllExhibitProperties aep)
302                {
303                final HashSet<CS8Bit> result = new HashSet<CS8Bit>();
304                for(final CorrType ct : CorrType.values())
305                    {
306                    try { makeCacheKey(ct, esa, aep, result); }
307                    catch(final IOException e) { /* Ignore ungeneratable keys. */ }
308                    }
309                return(result);
310                }
311            }
312    
313        /**Immutable records of computed Factor and the period for/in which it was computed. */
314        static final class PeriodAndFactor
315            {
316            /**VLONG period number. */
317            final long period;
318            /**Computed Factor; never null. */
319            final ExhibitPropsComputableMutable.Factor f;
320            /**Make an instance. */
321            PeriodAndFactor(final long period, final ExhibitPropsComputableMutable.Factor f)
322                {
323                assert(null != f);
324                this.period = period;
325                this.f = f;
326                }
327            }
328    
329        /**Map from vote/correlation key to computed Factor and period for which it was computed; never null after construction/deserialisation.
330         * Since all our calculations depend on VLONG events,
331         * cache entries need only be invalidated when the period number changes.
332         * <p>
333         * The key format is a CorrType name followed by a colon
334         * followed by the rest of the key.
335         * <p>
336         * This state can be serialised; it is also recomputed on demand.
337         * <p>
338         * We use a Hashtable to be thread-safe.
339         */
340        private final Hashtable<CS8Bit, PeriodAndFactor> voteCorrCacheMap = new Hashtable<CS8Bit, PeriodAndFactor>();
341    
342        /**Map from full exhibit name to computed Factor and period for which it was computed; never null after construction/deserialisation.
343         * Since all our calculations depend on VLONG events,
344         * cache entries need only be invalidated when the period number changes.
345         * <p>
346         * This state can be serialised; it is also recomputed on demand.
347         * <p>
348         * We use a Hashtable to be thread-safe.
349         */
350        private final Hashtable<Name.ExhibitFull, PeriodAndFactor> voteCorrCacheMapByFullName = new Hashtable<Name.ExhibitFull, PeriodAndFactor>();
351    
352        /**Compute the vote or retrieve from cache; never null.
353         * A computed/cached value is valid until the VLONG period changes.
354         * <p>
355         * The returned factor's goodness can range from -1 to +1,
356         * and confidence from 0 to +1;
357         * any scaling required will have to be applied elsewhere.
358         *
359         * @param exhibitName  full, valid exhibit name; never null
360         * @param aep  never null
361         * @param vars  never null
362         * @return  vote Factor for the given exhibit; never null.
363         * @throws java.io.IOException
364         */
365        public final ExhibitPropsComputableMutable.Factor calcVoteFactor(
366                final ExhibitFull exhibitName,
367                final AllExhibitProperties aep,
368                final BasicVarMgrInterface vars)
369            throws IOException
370            {
371            if((null == exhibitName) || (aep == null) || (vars == null)) { throw new IllegalArgumentException(); }
372    
373            // Avoid clogging up our cache for non-existent exhibits...
374            if(!aep.aeid.isPresent(exhibitName))
375                { throw new IllegalArgumentException("no such exhibit"); }
376    
377            // Compute the current period.
378            final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
379    
380            // Get the current cached value, if any.
381            final PeriodAndFactor cached =
382                voteCorrCacheMapByFullName.get(exhibitName);
383            if(cached != null)
384                {
385                // If it is not null, but is stale,
386                // then fall through to recompute the entry.
387                // (There is a small race risk here,
388                // but the worst that can happen is some wasted
389                // recomputations, not a wrong result.)
390                final long l = cached.period;
391                // If the value is still current then return it!
392                if(l == currentPeriod)
393                    { return(cached.f); }
394    //            // Clear stale cached value.
395    //            else
396    //                { voteCorrCacheMapByFullName.remove(exhibitName); }
397                }
398    
399            // No cached value, or stale, so (re)compute it,
400            // and store the new value in the cache.
401            final ExhibitPropsComputableMutable.Factor result =
402                ExhibitPropsComputableMutable.calcVoteFactor(exhibitName, aep, vars, 0);
403            voteCorrCacheMapByFullName.put(exhibitName,
404                new PeriodAndFactor(currentPeriod, result));
405    
406            // Return our result...
407            return(result);
408            }
409    
410        /**Get correlates for specified exhibit; never null but result may be empty.
411         * This is based on votes of other "related" exhibits.
412         * <p>
413         * The goodness of each Factor is either -1 or +1,
414         * with the correlation/confidence ranging between 0 and 1.
415         * <p>
416         * This will return values computed up to one period ago if need be,
417         * to help avoid a sudden splurge of CPU effort as we tick from one period
418         * to the next.
419         * <p>
420         * No particular ordering of the results is guaranteed,
421         * but there will be no duplicates and no nulls.
422         *
423         * @param force  if true, force complete computation if need be,
424         *     else we will just return what we have in cache;
425         *     complete recomputation may be expensive but should last a long time
426         *     and we will abort with an IOException if we cannot complete
427         *     the recomputation in a reasonable time
428         *
429         * @throws IOException cannot extract required correlates
430         */
431        public ExhibitPropsComputableMutable.Factor[] getCorrelates(
432                final ExhibitStaticAttr esa,
433                final AllExhibitProperties aep,
434                final BasicVarMgrInterface vars,
435                final boolean force)
436            throws IOException
437            {
438            if((esa == null) || (aep == null) || (vars == null))
439                { throw new IllegalArgumentException(); }
440    
441            // Avoid clogging up our cache for non-existent exhibits...
442            if(!aep.aeid.isPresent(esa.getExhibitFullName()))
443                { return(NO_CORRELATES); }
444    
445            // Compute the current period.
446            final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
447    
448            // Non-stale cached values available.
449            final List<ExhibitPropsComputableMutable.Factor> result =
450                new ArrayList<ExhibitPropsComputableMutable.Factor>();
451    
452            // Check all correlation types.
453            // Accept any cached value however ancient, unless "forcing".
454            _findCorrelates(esa, aep, currentPeriod, result, force);
455    
456            // If we got no valid correlates for this exhibit at all,
457            // then try for a full recomputation and re-extract our factors.
458            if(force && (result.size() == 0))
459                {
460                // Update/recompute values if needed.
461                update(aep, vars, false);
462    
463                // Re-do our factor extraction...
464                _findCorrelates(esa, aep, currentPeriod, result, force);
465                }
466    
467    if(IsDebug.isDebug) { logger.info("Correlates size " + result.size() + " for " + esa.getCharSequence()); }
468    
469            if(result.size() == 0)
470                { return(NO_CORRELATES); /* Efficient way to return empty list. */ }
471    
472            final ExhibitPropsComputableMutable.Factor[] r =
473                new ExhibitPropsComputableMutable.Factor[result.size()];
474            result.toArray(r);
475            return(r);
476            }
477    
478        /**Find out if a category is rated "good"/popular or not.
479         * Returns TRUE if rated good, FALSE if bad,
480         * null if not significantly either or if not known.
481         *
482         * @param categoryDir  the initial directory component of an extant exhibit
483         * @param force  if true may force (expensive) computation to give
484         *     a more accurate answer,
485         *     else may return a more approximate or stale answer,
486         *     or none at all (null)
487         */
488        public Boolean isCategoryGood(final CharSequence categoryDir,
489                                      final AllExhibitProperties aep,
490                                      final BasicVarMgrInterface vars,
491                                      final boolean force)
492            throws IOException
493            {
494            if(!ExhibitName.validNameInitialComponentSyntax(categoryDir))
495                { throw new IllegalArgumentException(); }
496    
497    //System.out.println("***Looking for category goodness: " + categoryDir);
498    
499            // Make a syntactically-correct fake name/attrs from the category.
500            final ExhibitStaticAttr esa = new ExhibitStaticAttr(
501                categoryDir + "/a-A.a", 1, System.currentTimeMillis());
502    
503            // Compute the correlate name for this category.
504            final Set<CS8Bit> keys = new HashSet<CS8Bit>();
505            ExhibitPropsComputableMutableVoteCache.CorrType.makeCacheKey(
506                ExhibitPropsComputableMutableVoteCache.CorrType.CATEGORY,
507                esa, aep, keys);
508            assert(keys.size() == 1); /* We expect to get exactly one key. */
509            final CS8Bit key = keys.iterator().next();
510    
511    //System.out.println("***Looking for key: " + key);
512    
513            // Ensure data is current, if so requested.
514            if(force) { update(aep, vars, true); }
515    
516            final PeriodAndFactor pf = voteCorrCacheMap.get(key);
517            // If no Factor or too old then return null immediately.
518            if(pf == null)
519                { return(null); }
520            if(force &&
521               (pf.period < EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis())-1))
522                { return(null); }
523    
524    //System.out.println("***Got: " + f.second);
525    
526            // If Factor not definite/strong enough then return null...
527            if(Math.abs(pf.f.goodness * pf.f.correlation) < ExhibitPropsComputableMutable.GOODBAD_LIMIT)
528                { return(null); }
529    
530            // Return a definite good/bad result.
531            return((pf.f.goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
532            }
533    
534        /**Append to the result List any factors we find for this exhibit.
535         * This ignores any keys that cannot be constructed.
536         *
537         * @param esa  the exhibit name/attr; never null
538         * @param currentPeriod  current VLONG period; strictly positive
539         * @param result  found correlates are appended to this value
540         * @param force  if false then accept any extant value;
541         *     if true then only accept values up to one period old
542         */
543        private void _findCorrelates(final ExhibitStaticAttr esa,
544                                     final AllExhibitProperties aep,
545                                     final long currentPeriod,
546                                     final List<ExhibitPropsComputableMutable.Factor> result,
547                                     final boolean force)
548            {
549            // Make all cache keys for this exhibit...
550            final Set<CS8Bit> keys = CorrType.makeCacheKeys(esa, aep);
551            // and update all keyed entries all with this exhibit's rating.
552            for(final CS8Bit key : keys)
553                {
554                final PeriodAndFactor pf = voteCorrCacheMap.get(key);
555                if(pf == null) { continue; }
556                if(!force)
557                    { result.add(pf.f); }
558                else if(pf.period >= currentPeriod-1)
559                    { result.add(pf.f); }
560    //            else
561    //                { result.remove(key); /* Remove stale value. */ }
562                }
563            }
564    
565        /**Immutable empty correlates list. */
566        private static final ExhibitPropsComputableMutable.Factor[] NO_CORRELATES =
567            new ExhibitPropsComputableMutable.Factor[0];
568    
569        /**Significance confidence threshold; non-negative.
570         * Computed correlations of confidence less than +/- this
571         * are discarded to save space.
572         * <p>
573         * A value between 0.001 (0.1%) and 0.1 (10%) is probably reasonable.
574         */
575        private static final float SIGNIFICANCE_CONF_THRESHOLD = 0.003f;
576    
577        /**Significance count threshold for non-sparse correlations; non-negative.
578         * Computed non-sparse correlations with counts less than this
579         * are discarded to save space and time.
580         * <p>
581         * A value between 2 and 10 is probably reasonable;
582         * an odd value avoids recording a "neutral" score at minimum count.
583         */
584        private static final int SIGNIFICANCE_COUNT_THRESHOLD = 3;
585    
586        /**The class in which we accumulate stats while recomputing correlations. */
587        private static final class Accum
588            {
589            /**Number of samples that have contributed to this value; strictly positive. */
590            final AtomicInteger count = new AtomicInteger();
591    
592            /**Accumulated (sum) confidence value with negative goodness being treated as a negative confidence.
593             * All access protected by the instance lock.
594             */
595            private double totalConfidence;
596    
597            synchronized double getTotalConfidence()
598                { return(totalConfidence); }
599    
600            synchronized void addToTotalConfidence(final double delta)
601                { totalConfidence += delta; }
602            }
603    
604        /**Time limit for update() in ms; strictly positive.
605         * A value of the order of a second or so is probably right.
606         * This allows some significant work to get done and cached,
607         * but should not block anything (such as page generation)
608         * for an unbearable amount of time even to a visitor.
609         * <p>
610         * This should still let us get a reasonable amount of work done
611         * so that we can rebuild the data incrementally if necessary.
612         */
613        private final int UPDATE_TIME_LIMIT_MS = 1023;
614    
615        /**Private lock to prevent more than one thread at once expending effort in the core part of update(). */
616        private final ReentrantLock _uCoreLock = new ReentrantLock();
617    
618        /**Maximum number of threads to allow in update(); strictly positive.
619         * It is probably worth allowing into update() a few threads
620         * to try to parallelise probably-I/O-bound sections at the start
621         * and to do CPU-bound computations concurrently on multi-CPU systems,
622         * but using too many concurrent threads will possibly just waste resources
623         * and overload I/O subsystems (eg back-end HTTP connections).
624         */
625        private static final int _computeMaxConcurrentUpdateThreads = Math.min(7,
626                    1 + Runtime.getRuntime().availableProcessors());
627    
628        /**Counting semaphore to limit update() first-phase concurrency; never null. */
629        private final Semaphore _uStartSem = new Semaphore(_computeMaxConcurrentUpdateThreads);
630    
631        /**If true then try to load/compute early the scores of voted-for-exhibits.
632         * An eager set-up attempts to start its computations
633         * while part of its data (one of the "all" buckets) is still being fetched.
634         * <p>
635         * An eager setup should be able to compute the correlations more quickly,
636         * but at increased risk of using part/all somewhat/very stale values,
637         * and thus generating a less good result.
638         */
639        private static final boolean EAGER_VOTE_LOAD = true;
640    
641        /**If true then try to use the "all" votes buckets rather than summing the other buckets.
642         * Using the "all" bucket may less accurate/comprehensive,
643         * and may delay initialisation if it has to be fetched from upstream.
644         */
645        private static final boolean USE_ALL_BUCKET = false;
646    
647        /**Bring correlations data up to date.
648         * This may also perform housekeeping such as clearing stale state.
649         * <p>
650         * This may be a relatively expensive call,
651         * though if values are up-to-date then this should be quite cheap.
652         * <p>
653         * This is thread-safe and will use multiple caller threads to help fetch
654         * vote data and concurrently recompute exhibit scores,
655         * though second and subsequent concurrent calling threads
656         * will return immediately upon reaching the core correlation computations
657         * in order to prevent wasted duplicate effort.
658         * <p>
659         * If this routine cannot get values it needs then it throws an IOException.
660         * <p>
661         * No work will be done for an empty AEP.
662         * <p>
663         * This will abort with an IOException if taking too long
664         * (of the order of many seconds) or cannot otherwise ensure that
665         * the correlations data is up-to-date.
666         * Thus if this returns without throwing an IOException then
667         * the correlations data can be assumed to be up-to-date.
668         *
669         * @param aep  current exhibit properties; never null
670         * @param vars  handle on system variables; never null
671         * @param noTimeLimit  if true, this runs until complete if possible
672         *
673         * @throws IOException  if the correlations data cannot be guaranteed
674         *     to be up-to-date
675         *     and/or a problem was found fetching required data
676         *     and/or the routine was taking to long to complete the calculations
677         */
678        public void update(final AllExhibitProperties aep,
679                           final BasicVarMgrInterface vars,
680                           final boolean noTimeLimit)
681            throws IOException
682            {
683            if((aep == null) || (vars == null))
684                { throw new IllegalArgumentException(); }
685    
686            // Nothing to be done for an empty set of exhibits.
687            if(aep.aeid.length == 0) { return; }
688    
689            // If we can find our marker key,
690            // and it is for the current period,
691            // then we can assume that computations are up-to-date.
692            final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
693            final PeriodAndFactor entry = voteCorrCacheMap.get(MARKER_KEY);
694            if((entry != null) && (entry.period == currentPeriod))
695                { return; /* Apparently completely up-to-date! */ }
696    
697            if(_uCoreLock.isLocked())
698                { throw new IOException("update() already busy in core phase; vote cache data is stale"); }
699    
700    
701            // The complete set of voted-for exhibits (pro and con).
702            // Guess size based on a few votes per sample period (ie per day).
703            // Note that these values are seen only by this thread.
704            final Set<Name.ExhibitFull> votedForNames = new HashSet<Name.ExhibitFull>(8 * SystemVariables.EVENT_SAMPLES_RETAINED);
705            final long currentPeriodL;
706    
707            // If lots of external threads are needing the update() to be done
708            // then allow a few of them in to help with the work.
709            if(!_uStartSem.tryAcquire())
710                { throw new IOException("update() already busy in first phase; vote cache data is stale"); }
711            try
712                {
713                // Are we probably the first thread into this section?
714                // (It doesn't matter if we get this wrong occasionally.)
715                final boolean firstThreadIn = _uStartSem.availablePermits() == (_computeMaxConcurrentUpdateThreads-1);
716    
717                // We are now really getting started...
718                final long startTime = System.currentTimeMillis();
719                // Note the (target) limit time;
720                // if really out of date then allow MUCH longer before timing out
721                // (unless conserving power right now)
722                // But in any case don't quit until this has made some progress
723                // to ensure that this does eventually complete.
724                final boolean reasonablyUpToDate =
725                    ((entry != null) && (entry.period == currentPeriod-1));
726                final long stopBy = startTime +
727                                    Rnd.fastRnd.nextInt(UPDATE_TIME_LIMIT_MS) +
728                    ((reasonablyUpToDate || GenUtils.mustConservePower()) ?
729                            (UPDATE_TIME_LIMIT_MS>>1) : (UPDATE_TIME_LIMIT_MS<<1));
730    
731    if(IsDebug.isDebug) { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): cache size: " + voteCorrCacheMap.size() + ".]"); }
732    
733    
734                // Request/fetch the current-period event sets now,
735                // so that they may be ready later to be added into the mix
736                // if not immediately available.
737                final EventVariableValue pcur = vars.getEventValue(SystemVariables.VOTE_PRO, EventPeriod.VLONG, true);
738                final EventVariableValue ccur = vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, true);
739    
740                final List<EventVariableValue> availableALLBuckets = new ArrayList<EventVariableValue>(2);
741                if(USE_ALL_BUCKET)
742                    {
743                    // Get pro and con votes' "all" period event set.
744                    // This is a heuristic to abort if we cannot get vote "all" buckets,
745                    // and to try to encourage fetching of such data by the cache,
746                    // or to continue quietly if what we currently have is OK,
747                    // even empty if there seems to have been no recent voting.
748                    //
749                    // If we get null/stale/empty "all" buckets
750                    // then abort and hope that they will be present/fresh
751                    // the next time that we are called
752                    // (eg allowing for asynchronous fetch/cacheing of these values).
753                    final EventVariableValue pevvs[] = vars.getEventValues(
754                        SystemVariables.VOTE_PRO, EventPeriod.VLONG, 0, null);
755                    final EventVariableValue cevvs[] = vars.getEventValues(
756                        SystemVariables.VOTE_CON, EventPeriod.VLONG, 0, null);
757                    if((pevvs.length != 0) && (pevvs[0] != null)) { availableALLBuckets.add(pevvs[0]); }
758                    if((cevvs.length != 0) && (cevvs[0] != null)) { availableALLBuckets.add(cevvs[0]); }
759                    // If not eager and we don't have both "all" buckets right now
760                    // then give up quickly, without burning much CPU time.
761                    if(availableALLBuckets.size() < (EAGER_VOTE_LOAD ? 1 : 2))
762                        { throw new IOException("missing vote one or both all buckets (early)"); }
763                    }
764    
765                // List of buckets from which we will extract (short) exhibit names.
766                // Starts with the pro/con 'all' buckets.
767                final List<EventVariableValue> buckets = new ArrayList<EventVariableValue>(availableALLBuckets);
768    
769                // If not using the "all" vote buckets or
770                // if we are using "all" buckets but either is completely empty
771                // then we may wish to resort
772                // to explicitly fetching all/most available recent event periods
773                // and computing the union of their values.
774                if(!USE_ALL_BUCKET ||
775                   ((availableALLBuckets.size() == 2) &&
776                    ((availableALLBuckets.get(0).getTotalEventCount() == 0) ||
777                     (availableALLBuckets.get(1).getTotalEventCount() == 0))))
778                    {
779    if(IsDebug.isDebug && USE_ALL_BUCKET) { logger.warn("[ExhibitPropsComputableMutableVoteCache.update(): WARNING: at least one 'all' bucket is empty: collecting vote data the slow way...]"); }
780    
781                    // Remove extant dubious 'all' values.
782                    if(USE_ALL_BUCKET) { buckets.clear(); }
783    
784                    // Forcing alternative (slow) mechanism...
785                    // Get events for all retained slots.
786                    final BitSet whichValues = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
787                    whichValues.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
788                    final SimpleVariableDefinition[] defs = new SimpleVariableDefinition[]{ SystemVariables.VOTE_PRO, SystemVariables.VOTE_CON };
789                    // Have extra threads try for values in a different order for concurrency...
790                    if(!firstThreadIn) { Collections.shuffle(Arrays.asList(defs), GenUtils.mustConservePowerExtreme() ? Rnd.fastRnd : Rnd.goodRnd); }
791                    for(final SimpleVariableDefinition def : defs)
792                        {
793                        final EventVariableValue evvsAll[] = vars.getEventValues(
794                            def, EventPeriod.VLONG, currentPeriod, whichValues);
795                        for(final EventVariableValue evv : evvsAll)
796                            {
797                            if((evv != null) && (evv.getTotalEventCount() != 0))
798                                { buckets.add(evv); }
799                            }
800                        }
801                    }
802    
803                // Throw in current-slot votes at the last moment.
804                // Try and fetch them again if they were empty when we last tried.
805                // These data are nice-to-have if we can get them,
806                // since they make the correlation values as fresh as possible.
807                buckets.add((pcur.getTotalEventCount() != 0) ? pcur :
808                            vars.getEventValue(SystemVariables.VOTE_PRO, EventPeriod.VLONG, true));
809                buckets.add((ccur.getTotalEventCount() != 0) ? ccur :
810                            vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, true));
811    
812    
813                // If eager then get/compute votes/scores ASAP,
814                // even when we don't have both "all" buckets.
815                //
816                // We convert from short name to long/full form
817                // dropping any exhibit (name) no longer extant.
818                final long calcVoteFactorStart = System.currentTimeMillis();
819                currentPeriodL = MemoryTools.intern(Long.valueOf(currentPeriod));
820                // Scramble order of buckets to help ensure that
821                // parallel threads don't waste too much time redoing the same work.
822                if(!firstThreadIn) { Collections.shuffle(buckets, Rnd.fastRnd); }
823                int checked = 0; // To enable some sort of progress report...
824                for(final EventVariableValue evv : buckets)
825                    {
826                    ++checked; // Track progress...
827    
828                    // Bucket entries should never be null.
829                    assert(evv != null);
830    
831                    List<Object> so = evv.getDistinctValuesInRankOrder();
832                    // Again, shuffle work order to help avoid threads colliding.
833                    if(!firstThreadIn)
834                        {
835                        so = new ArrayList<Object>(so);
836                        Collections.shuffle(so, Rnd.fastRnd);
837                        }
838                    for(final Object key : so)
839                        {
840                        if(!(key instanceof String)) { continue; }
841                        final String shortName = (String) key;
842                        final Name.ExhibitFull exhibitName = aep.aeid.getFullName(shortName);
843                        if(exhibitName == null) { continue; }
844                        // Skip if we've already handled/added this exhibitName.
845                        if(!votedForNames.add(exhibitName)) { continue; }
846    
847                        // If eagerly fetching/computing vote values then do it now,
848                        // possibly in parallel with fetching one of the "all" buckets.
849                        if(EAGER_VOTE_LOAD)
850                            {
851                            // ----------------------------------------------------
852                            // Bring at least one vote up to date
853                            // if not all yet up-to-date.
854    
855                            // Get any extant cached vote value for this exhibit.
856                            final PeriodAndFactor oldVVB = voteCorrCacheMapByFullName.get(exhibitName);
857                            if((oldVVB != null) && (oldVVB.period == currentPeriod))
858                                { continue; /* Already up-to-date. */ }
859    
860                            // Compute a new/current vote value for this exhibit.
861                            final ExhibitPropsComputableMutable.Factor result =
862                                ExhibitPropsComputableMutable.calcVoteFactor(exhibitName, aep, vars, 0);
863                            voteCorrCacheMapByFullName.put(exhibitName,
864                                new PeriodAndFactor(currentPeriodL, result));
865    
866                            // Abort if we've run out of time.
867                            if(!noTimeLimit && (System.currentTimeMillis() > stopBy))
868                                {
869    /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): ABORTED calcVoteFactor: ran out of time (checked "+checked+"/"+buckets.size()+") @ "+(System.currentTimeMillis()-calcVoteFactorStart)+"ms]"); }
870                                throw new IOException("ran out of time in update(); data not brought up-to-date yet because not all voted-for-exhibits up-to-date yet");
871                                }
872    
873                            // Abort if another thread has already made it as far as
874                            // the core section (so we can't get in).
875                            if(_uCoreLock.isLocked())
876                                {
877    /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): ABORTED (another thread already in update() core) calcVoteFactor @ time "+(System.currentTimeMillis()-calcVoteFactorStart)+"ms]"); }
878                                throw new IOException("another thread already in update() core");
879                                }
880                            }
881                        }
882                    }
883                final long calcVoteFactorEnd = System.currentTimeMillis();
884    /* if(IsDebug.isDebug) */ { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): calcVoteFactor time "+(calcVoteFactorEnd-calcVoteFactorStart)+"ms]"); }
885    
886                // If we have not yet managed to get both buckets
887                // and thus all the exhibit names that they contain
888                // then we cannot go any further for the moment.
889                if(EAGER_VOTE_LOAD && USE_ALL_BUCKET && (availableALLBuckets.size() < 2))
890                    { throw new IOException("too few all vote buckets (EAGER)"); }
891                if(USE_ALL_BUCKET) { assert(availableALLBuckets.size() == 2) : "must have at least both pro and con `all' buckets or full set of pro/con buckets by now"; }
892    
893    if(IsDebug.isDebug) { System.out.println("[ExhibitPropsComputableMutableVoteCache.update(): votedForNames size: " + votedForNames.size() + ".]"); }
894                }
895            finally
896                { _uStartSem.release(); }
897    
898            // Only allow at most one (external) thread at once
899            // to consume CPU in the core part of update()
900            // so as to avoid wasting resources
901            // though we will inject local parallelism directly for speed.
902            if(!_uCoreLock.tryLock()) { throw new IOException("update() already busy in core; vote cache data is stale"); }
903            try
904                {
905                // Double-check that the work was not completed by another thread
906                // while we were in an earlier section!
907                final PeriodAndFactor e2 = voteCorrCacheMap.get(MARKER_KEY);
908                if((e2 != null) && (e2.period == currentPeriod))
909                    { return; /* Apparently completely up-to-date! */ }
910    
911                // For every voted-for exhibit,
912                // and for every correlation type that we can compute,
913                // accumulate correlation stats.
914                // We then discard any that are insignificant,
915                // and insert the remaining significant values into the cache.
916                //
917                // Also compute the mean count per non-sparse value
918                // as a baseline against which non-sparse values with lower counts
919                // can be rated for confidence/certainty.
920                //
921                // We optimistically assume that in the typical case
922                // we can get through this entire operation quickly
923                // (especially since we parallelise it)
924                // so we don't bother splitting it into phases.
925                //
926                // We expect this work to be mainly compute-bound.
927                final SortedSet<String> attrWords = ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet();
928    
929                final AtomicInteger totalNonSparseEntries = new AtomicInteger();
930                final AtomicInteger totalNonSparseCount = new AtomicInteger();
931                final ConcurrentHashMap<CS8Bit,Accum> working = new ConcurrentHashMap<CS8Bit, Accum>(17 + 19*votedForNames.size());
932                final long beforeCompute = System.currentTimeMillis();
933    
934                // List of results that we can wait for completion of.
935                final List<Future<?>> tasks = new ArrayList<Future<?>>(votedForNames.size());
936    
937                // Post tasks for each of the exhibits that has been voted for.
938                for(final Name.ExhibitFull exhibitName : votedForNames)
939                    {
940                    tasks.add(ThreadUtils.computeIntensiveThreadPool.submit(new Runnable(){
941                        public final void run()
942                            {
943                            final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
944                            // If exhibit appears not to exist any more then we're done...
945                            // (This is in practice very rare.)
946                            if(esa == null) { return; }
947    
948                            // Get/compute the current/new vote value for this exhibit.
949                            final ExhibitPropsComputableMutable.Factor vote;
950                            try { vote = calcVoteFactor(exhibitName, aep, vars); }
951                            catch(final IOException e)
952                                {
953                                // If we get an IOException here
954                                // then we probably have to abort the entire calculation
955                                // and restart later.
956                                e.printStackTrace();
957                                throw new IllegalStateException("unexpected IOException: will have to restart", e);
958                                }
959                            assert(vote != null);
960    
961                            // Make all cache keys for this exhibit.
962                            final Set<CS8Bit> keys = CorrType.makeCacheKeys(esa, aep);
963                            // Count the words in this exhibit for scaling...
964                            final int nWords = ExhibitName.getMainWordsCount(esa.getCharSequence(), attrWords);
965                            // Update all keyed entries with this exhibit's rating.
966                            for(final CS8Bit key : keys)
967                                {
968                                // Is this a "sparse" correlation type?
969                                final CorrType corrType = CorrType.extractKeyTypeFromPrefix(key);
970                                final boolean isSparse = corrType.isSparse();
971    
972                                // Is this an especially "dense" term that needs scaling
973                                // so as not to swamp other results.
974                                final boolean needsScalingByWordCount =
975                                    (corrType == CorrType.STEM_WORD) ||
976                                    (corrType == CorrType.PREFIX);
977    
978                                // Retrieve existing accumulator, or create a new one.
979                                Accum ac;
980                                while(null == (ac = working.get(key)))
981                                    {
982                                    // Create new all-zeros accumulator
983                                    // if one not already inserted.
984                                    // (This is race-free c/o putIfAbsent().)
985                                    working.putIfAbsent(key, new Accum());
986                                    }
987                                // Update the accumulator.
988                                final int count = ac.count.incrementAndGet();
989                                final float corr = (!needsScalingByWordCount) ?
990                                    vote.correlation : (vote.correlation / nWords);
991                                if(vote.goodness < 0) { ac.addToTotalConfidence(-corr); }
992                                else if(vote.goodness > 0) { ac.addToTotalConfidence(+corr); }
993    
994                                // Note non-sparse entries that have a significant count.
995                                if(!isSparse && (count >= SIGNIFICANCE_COUNT_THRESHOLD))
996                                    {
997                                    if(count == SIGNIFICANCE_COUNT_THRESHOLD)
998                                        { totalNonSparseEntries.incrementAndGet(); /* Note new (non-sparse) entry. */ }
999                                    totalNonSparseCount.incrementAndGet();
1000                                    }
1001                                }
1002                            }
1003                        }));
1004                    }
1005    
1006                // Wait for all the tasks to complete...
1007                assert(tasks.size() == votedForNames.size());
1008                for(final Future<?> task : tasks)
1009                    {
1010                    try { task.get(); }
1011                    catch(final InterruptedException e)
1012                        {
1013                        e.printStackTrace();
1014                        final IOException et = new InterruptedIOException("unexpected interrupt during calculation");
1015                        et.initCause(e);
1016                        throw et;
1017                       }
1018                    catch(final ExecutionException e)
1019                        {
1020                        e.printStackTrace();
1021                        final IOException et = new IOException("unexpected exception during calculation");
1022                        et.initCause(e);
1023                        throw et;
1024                        }
1025                    }
1026    
1027                final long afterCompute = System.currentTimeMillis();
1028    /*if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): *** correlation factors compute time "+(afterCompute-beforeCompute)+"ms.]"); }
1029    
1030                final int tNSE = totalNonSparseEntries.get();
1031                final double meanNonSparseCount = (tNSE < 1) ? 0.0
1032                    : (totalNonSparseCount.get() / (double) tNSE);
1033                final double mNSC_min = Math.max(meanNonSparseCount,
1034                                                 SIGNIFICANCE_COUNT_THRESHOLD);
1035    
1036    /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): votedForNames.size()="+votedForNames.size()+", working.size()="+working.size()+", meanNonSparseCount="+meanNonSparseCount + ".]"); }
1037    
1038                // Go through the working set,
1039                // converting significant results into main cache entries.
1040                final boolean doComment = !IsDebug.isDebug; // MemoryTools.lotsFree() && !GenUtils.mustConservePowerExtreme();
1041                final long finalComputePhaseStart = System.currentTimeMillis();
1042                for(final CS8Bit key : working.keySet())
1043                    {
1044                    // Is this a "sparse" correlation type?
1045                    final boolean isSparse = CorrType.extractKeyTypeFromPrefix(key).isSparse();
1046    
1047                    final Accum ac = working.get(key);
1048                    assert(ac != null);
1049    
1050                    // Ignore values with very small counts
1051                    // (except for explicit "sparse" types)
1052                    // as potentially unreliable (or at least very noisy).
1053                    final int count = ac.count.get();
1054                    if(!isSparse && (count < SIGNIFICANCE_COUNT_THRESHOLD))
1055                        { continue; }
1056    
1057                    assert(ac.count.get() > 0);
1058                    assert(!Double.isInfinite(ac.getTotalConfidence()));
1059                    assert(!Double.isNaN(ac.getTotalConfidence()));
1060                    // Compute mean/normalised confidence
1061                    // scaling as appropriate for below-mean counts.
1062                    // We scale by the expected extra variance of a low sample size.
1063                    final double meanConf = Math.abs(ac.getTotalConfidence() / count);
1064                    final double scaledConf =
1065                        (/* isSparse || */ (count >= mNSC_min)) ? meanConf :
1066                        (meanConf * Math.sqrt(count / mNSC_min));
1067    
1068                    // If too small to be significant, then don't use it.
1069                    // Test in such a way as to reject NaN values too,
1070                    // ie unless a test succeeds, since all tests against NaN fail.
1071                    if(!(scaledConf > SIGNIFICANCE_CONF_THRESHOLD)) { continue; }
1072    
1073                    // Make our factor with a helpful comment.
1074                    final String comment = doComment ? (key + " count=" + count) : null;
1075                    final ExhibitPropsComputableMutable.Factor f = new ExhibitPropsComputableMutable.Factor(
1076                        (ac.getTotalConfidence() < 0) ? -1 : +1,
1077                        scaledConf);
1078    
1079                    // Insert the new Factor into the cache.
1080                    // There will be many, necessarily-unique, keys,
1081                    // so don't needlessly add overhead and inflate memory by intern()ing them.
1082                    voteCorrCacheMap.put(key, new PeriodAndFactor(currentPeriodL, f));
1083                    }
1084                final long finalComputePhaseEnd = System.currentTimeMillis();
1085    /* if(IsDebug.isDebug) */ { logger.info("[ExhibitPropsComputableMutableVoteCache.update(): final phase compute time "+(finalComputePhaseEnd-finalComputePhaseStart)+"ms.]"); }
1086    
1087                // Insert a distinguished marker key
1088                // so that it is quick to check when we last recomputed values here.
1089                // We do this only when all updates are complete.
1090                voteCorrCacheMap.put(MARKER_KEY,
1091                                     new PeriodAndFactor(currentPeriodL, ExhibitPropsComputableMutable.FACTOR_ZERO));
1092    
1093                // Remove any stale state since we now have up-to-date values.
1094                _clearStaleState(voteCorrCacheMap);
1095    
1096    /* if(IsDebug.isDebug) */ { logger.info("update(): final cache size: " + voteCorrCacheMap.size() + ", voteCorrCacheMapByFullName.size()="+voteCorrCacheMapByFullName.size()); }
1097                }
1098            finally { _uCoreLock.unlock(); }
1099    
1100            // Dump the correlations table for debug purposes,
1101            // but outside the lock so as not to hold things up unduly...
1102            // Only show the non-sparse entries.
1103            if(IsDebug.isDebug)
1104                {
1105                // Nicely dump the table in sorted order...
1106                final List<CS8Bit> keys = new ArrayList<CS8Bit>(voteCorrCacheMap.size());
1107                // Capture the key set atomically...
1108                synchronized(voteCorrCacheMap) { keys.addAll(voteCorrCacheMap.keySet()); }
1109                // ... and sort our copy to iterate over.
1110                Collections.sort(keys);
1111                // Make a re-usable working buffer.
1112                final StringBuilder sb = new StringBuilder(255);
1113    
1114                // Count of key types.
1115                final EnumMap<CorrType, Integer> tc = new EnumMap<ExhibitPropsComputableMutableVoteCache.CorrType, Integer>(CorrType.class);
1116    
1117                for(final CS8Bit key : keys)
1118                    {
1119                    final CorrType keyType = CorrType.extractKeyTypeFromPrefix(key);
1120                    if(!tc.containsKey(keyType)) { tc.put(keyType, 0); } else { tc.put(keyType, 1 + tc.get(keyType)); }
1121    
1122                    // Skip the sparse entries (there may be *lots* of them)...
1123                    if(keyType.isSparse()) { continue; }
1124    
1125                    sb.setLength(0); // Clear the buffer.
1126                    sb.append("    "); // Indent...
1127                    sb.append(keyType).append(':').append(CorrType.extractKeyTail(key)).append(" = ");
1128                    final PeriodAndFactor pf = voteCorrCacheMap.get(key);
1129                    if(pf != null)
1130                        {
1131                        sb.append((pf.f.goodness < 0) ? '-' : '+');
1132                        sb.append(pf.f.correlation);
1133                        }
1134                    logger.info(sb);
1135                    }
1136    
1137                logger.info("count by key type: " + tc);
1138    
1139                // As as 2012/03/15:
1140                // final cache size: 2039
1141                // count by key type: {VARIANT=365, SIMILAR=326, SIZE=10, ATTRIBUTE=13, RESOLUTION=9, MAIN_WORD=62, CATEGORY=21, TYPE=2, AUTHOR=22, MIME1=0, PREFIX=456, STEM_WORDS=741}
1142                }
1143            }
1144    
1145        /**Special marker key value to indicate when correlations last computed; not null.
1146         * Legal key, and never conflicts with any normal usage.
1147         */
1148        private static final CS8Bit MARKER_KEY = new CS8Bit(CorrType.AUTHOR.makeUniqueKeyPrefix());
1149    
1150    
1151        /**Remove stale state from the map passed in.
1152         * A lock is held on the map while this is done
1153         * to prevent it being altered unexpectedly.
1154         * <p>
1155         * Only remove things more than one period old
1156         * so that it is possible to gently recompute values in the background
1157         * rather than being bounced into it on the roll into a new period.
1158         *
1159         * @param map  non-null map.
1160         */
1161        static private void _clearStaleState(final Hashtable<?, PeriodAndFactor> map)
1162            {
1163            synchronized(map)
1164                {
1165                // Compute the current period.
1166                final long currentPeriod = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis());
1167    
1168                for(final Iterator<PeriodAndFactor> it = map.values().iterator(); it.hasNext(); )
1169                    {
1170                    final PeriodAndFactor tp = it.next();
1171                    // Simply weed out stale entries,
1172                    // ie anything not in the current VLONG period or the one before.
1173                    if(tp.period < currentPeriod-1)
1174                        { it.remove(); continue; }
1175                    }
1176                }
1177            }
1178        }