001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore;
030    
031    import java.io.IOException;
032    import java.io.InvalidObjectException;
033    import java.io.ObjectInputValidation;
034    import java.io.Serializable;
035    import java.util.ArrayList;
036    import java.util.BitSet;
037    import java.util.Collection;
038    import java.util.Collections;
039    import java.util.Enumeration;
040    import java.util.HashSet;
041    import java.util.Iterator;
042    import java.util.List;
043    import java.util.Random;
044    import java.util.Set;
045    
046    import org.hd.d.pg2k.ai.scorer.ScoreAndConf;
047    import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
048    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
049    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
050    import org.hd.d.pg2k.svrCore.location.Location;
051    import org.hd.d.pg2k.svrCore.props.GenProps;
052    import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
053    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
054    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
055    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
056    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
057    
058    import ORG.hd.d.IsDebug;
059    
060    
061    /**Immutable (and serialisable) store of all ephemeral computable auxiliary properties of a single exhibit.
062     * These are properties that are computed from exhibit and non-exhibit
063     * data and that need to be periodically recomputed,
064     * such as popularity score.
065     * <p>
066     * This is is called "mutable" because although any one instance is immutable,
067     * successive values retrieved for the same exhibit may be different.
068     * <p>
069     * This data has limited lifetime,
070     * but may be somewhat useful even when stale,
071     * so can be persisted if desired.
072     * <p>
073     * This is designed to be efficient and robust on the wire and in memory,
074     * since these details will be held for each and every exhibit.
075     */
076    public final class ExhibitPropsComputableMutable implements Serializable,
077                                                                ObjectInputValidation
078        {
079        /**Limit of goodness*correlation to consider something (eg category) significantly good or bad. */
080        public static final float GOODBAD_LIMIT = 0.4f;
081    
082        /**Limit of goodness*correlation to consider something (eg category) significantly good or bad, is int value. */
083        public static final int GOODBAD_LIMIT_INT = Math.round(GOODBAD_LIMIT * Integer.MAX_VALUE);
084    
085        /**Compute a "quick"/approximate value for a specified exhibit; never null.
086         * This uses only values from the exhibit static data
087         * plus the system properties object
088         * to compute a fast approximation to a full value.
089         * <p>
090         * The value is immediately marked as stale
091         * (unless it is possible to compute the exact value fast),
092         * but is useful to generate something quickly if the system is busy,
093         * or at start-up for example.
094         *
095         * @param esa  the exhibit to compute properties for; never null
096         * @param gp   the system properties; never null
097         *
098         * @return non-null quick approximation
099         */
100        public static ExhibitPropsComputableMutable generateFastApproximation(
101                                            final ExhibitStaticAttr esa,
102                                            final GenProps gp)
103            {
104            return(compute(esa, gp, null, null, null, null));
105            }
106    
107        /**Compute (accurate) value for a specified exhibit; never null.
108         * This uses values from the exhibit static data
109         * plus the system properties object
110         * plus the data source (and thus might take a long time and be expensive)
111         * to compute an accurate/full value.
112         * <p>
113         * Providing the supplied data source is non-null and functional,
114         * and the GenProps does not appear to be empty (zero timestamp)
115         * then the value will generally not become stale for at least MAX_AGE_BEFORE_STALE_MS/2,
116         * and will become stale in no more than MAX_AGE_BEFORE_STALE_MS ms.
117         * If in low-power mode and/or the AI scorers are not available
118         * then this may ignore them and set a much shorter time to stale,
119         * short enough to force recomputation well within 24 hours
120         * if sufficient power becomes available in that time.
121         * <p>
122         * If the data source is not fully available/functional then
123         * an approximation will be returned instead,
124         * and marked stale to allow recalculation later.
125         *
126         * @param esa  the exhibit to compute properties for; never null
127         * @param gp  the system properties; never null
128         * @param aep  the exhibit properties; can be null
129         * @param vars  source of event data; can be null
130         * @param voteCache  vote cache; can be null
131         *
132         * @return non-null full value or approximation
133         *     depending on the available data sources
134         */
135        public static ExhibitPropsComputableMutable compute(final ExhibitStaticAttr esa,
136                                                            final GenProps gp,
137                                                            final AllExhibitProperties aep,
138                                                            final BasicVarMgrInterface vars,
139                                                            final ExhibitPropsComputableMutableVoteCacheIF voteCache,
140                                                            final ScorerCacheIF scorers)
141            {
142            return(new ExhibitPropsComputableMutable(esa, gp, aep, vars, voteCache, scorers));
143            }
144    
145        /**Create an instance with the specified goodness and stale 'best before' time. */
146        private ExhibitPropsComputableMutable(final long staleAfter, final int goodness)
147            {
148            this.staleAfter = staleAfter;
149            this.goodness = goodness;
150    
151            // Verify object state.
152            try { validateObject(); }
153            catch(final InvalidObjectException e)
154                { throw new IllegalArgumentException(e.getMessage()); }
155            }
156    
157        /**Trivially-stale entirely-neutral instance; not null. */
158        public static final ExhibitPropsComputableMutable TRIVIAL_NEUTRAL = new ExhibitPropsComputableMutable(0, 0);
159    
160        /**Create a fully populated ExhibitPropsComputableMutable object.
161         * This is given the data sources from which it can fetch
162         * the exhibit data one or more times to do its computations.
163         * <p>
164         * The data source references are not stored in the object.
165         *
166         * @param esa  the exhibit to compute properties for; never null
167         * @param gp  the system properties; never null
168         * @param aep  the exhibit properties; can be null
169         * @param vars  source of event data; can be null
170         * @param scorers  source of content-based scoring; can be null
171         * @param voteCache  vote cache; can be null
172         */
173        private ExhibitPropsComputableMutable(final ExhibitStaticAttr esa,
174                                              final GenProps gp,
175                                              final AllExhibitProperties aep,
176                                              final BasicVarMgrInterface vars,
177                                              final ExhibitPropsComputableMutableVoteCacheIF voteCache,
178                                              final ScorerCacheIF scorers)
179            {
180            if((esa == null) || (gp == null))
181                { throw new IllegalArgumentException(); }
182    
183            // Initial set of data-dependent Factor items.
184            final List<Factor> components = new ArrayList<Factor>();
185    
186    
187            // Calculate factors that depend on access to event histories and other 'live' data.
188            // If an I/O problem is encountered then generate an approximate (and stale) value
189            // with all results gathered up until then.
190            boolean dataAccessTrouble = true; // Note if 'stale'/partial because data not complete...
191            // Avoid Scorer data in persistent low-power mode...
192            final boolean noResourcesForScorers = GenUtils.mustConserveCPU() || GenUtils.mustConservePowerLongTerm();
193            final boolean noScorers = noResourcesForScorers || (null == scorers);
194            if((aep != null) && (vars != null) && (voteCache != null))
195                {
196                try
197                    {
198                    // Compute synthetic factors based on access stats.
199                    final ExhibitFull exhibitFullName = esa.getExhibitFullName();
200                    components.addAll(calcAccessFactors(exhibitFullName, aep, vars));
201    
202                    // First, compute factor based on explicit voting for this exhibit.
203                    final Factor vote = voteCache.calcVoteFactor(exhibitFullName, aep, vars);
204                    // We use the minimum of the actual and maximum/limit confidence
205                    // to realistically boost the vote contribution as much as possible.
206                    components.add(new Factor(vote.goodness, Math.min(POP_VOTES, vote.correlation)));
207    
208                    // Compute factors based on vote correlations, etc.
209                    components.addAll(calcCorrelationFactors(esa, aep, vars, voteCache));
210    
211                    if(!noScorers)
212                        {
213                        // Compute factor, if any, from AI Scorers, weighting by confidence.
214                        // We allow possibly-slightly-stale Scorer values (assumed still to be useful)
215                        // so as not to be pole-axed whenever the date/period rolls.
216                        // At start-up we may simply not have all Scorers loaded and assessed.
217                        final ScoreAndConf compositeScoreAndConf = scorers.computeCompositeScoreAndConfidence(exhibitFullName, true);
218                        // Only bother even computing the Factor if we have a non-zero confidence...
219                        if(compositeScoreAndConf.confidence != 0)
220                            {
221                            // We use the minimum of the actual and maximum/limit confidence
222                            // to realistically boost the Scorer contribution as much as possible.
223                            final Factor scorerFactor = new Factor(compositeScoreAndConf.score / (float) ScoreAndConf.MAX,
224                                                                   Math.min(POP_AI_SCORER, compositeScoreAndConf.confidence / (float) ScoreAndConf.MAX));
225                            components.add(scorerFactor);
226    if(IsDebug.isDebug) { System.out.println("[EPCM: scorerFactor=" + scorerFactor + " for " +esa.getCharSequence() + ".]"); }
227                            }
228                        }
229    
230                    // Managed to gather all the data required.
231                    dataAccessTrouble = false;
232                    }
233                catch(final IOException e) { /* Absorb error. */ }
234                }
235    
236    
237            // Compute the full goodness value
238            // and scale it to the form we hold it internally.
239            final Factor gf = _computeTotalGoodnessFactor(esa, gp, aep, components);
240            goodness = Math.max(-Integer.MAX_VALUE, Math.min(Integer.MAX_VALUE,
241                Math.round(Integer.MAX_VALUE * gf.goodness)));
242    
243            // Data is marked stale if any data source is null/empty/default
244            // or if we had trouble fetching some data dynamically,
245            // else data is non-stale and we pick a random expiry time between 3/4 and maximum.
246            // Expiry may be further postponed if the system is conserving resources,
247            // eg if it is in low-power mode.
248    
249            // Trivially state if critical data missing or suspect.
250            if(dataAccessTrouble || (aep == null) || (aep.longHash == 0) || (gp.timestamp == 0))
251                { staleAfter = 0; }
252            // Much-reduced lifetime if no Scorer data
253            // unless this system doesn't have the resources to support scorers at all.
254            else if(noScorers && !noResourcesForScorers)
255                { staleAfter = System.currentTimeMillis() + MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS; }
256            // Everything seems to be in place so set a full lifetime.
257            // Vary the lifetime a little to minimise recomputation collisions.
258            else
259                {
260                staleAfter = System.currentTimeMillis() + ((3*MAX_AGE_BEFORE_STALE_MS)/4)
261                                             + Rnd.fastRnd.nextInt(1 + MAX_AGE_BEFORE_STALE_MS/4);
262                }
263    
264            // Verify object state.
265            try { validateObject(); }
266            catch(final InvalidObjectException e)
267                { throw new IllegalArgumentException(e.getMessage()); }
268            }
269    
270        /**If true then force correlation data up to date.
271         * If false then accept somewhat stale or missing data,
272         * since this only changes slowly and a complete absence is neutral.
273         */
274        private static final boolean FORCE_CORR_RECOMP = true;
275    
276        /**Compute factors based on correlations.
277         * These have a confidence pre-scaled in the range 0 to POP_VOTE_CORR.
278         */
279        private static List<Factor> calcCorrelationFactors(final ExhibitStaticAttr esa,
280                        final AllExhibitProperties aep,
281                        final BasicVarMgrInterface vars,
282                        final ExhibitPropsComputableMutableVoteCacheIF voteCache)
283            throws IOException
284            {
285            // Get the raw factors; will need scaling.
286            // Force retrieved values to be (reasonably) up-to-date.
287            final Factor[] rawFactors = voteCache.getCorrelates(esa, aep, vars, FORCE_CORR_RECOMP);
288    
289            // Full set of scaled values.
290            // We scale the certainty down in relation to other factors.
291            final List<Factor> result = new ArrayList<Factor>(rawFactors.length);
292            for(final Factor f : rawFactors)
293                { result.add(new Factor(f.goodness, f.correlation * POP_VOTE_CORR)); }
294    
295            return(result);
296            }
297    
298        /**Time until which this data is considered valid; beyond this time is stale.
299         * If zero, or before the current time of day, the data is stale.
300         * <p>
301         * If the stale date is too far in the future for us to believe,
302         * ie much more than would be allowed with this version of the object,
303         * then we treat the data as stale in case it is from a version of this
304         * object with very different staleness limits or there was a persistence error.
305         */
306        private final long staleAfter;
307    
308        /**If true, the data in this object is stale.
309         * The data may still be correct,
310         * but should be treated as an approximation,
311         * and recomputed as soon as possible.
312         * <p>
313         * It is possible for a item previously non-stale that then became 'stale'
314         * to become non-stale again for a limited time
315         * while the system is conserving energy in low-power mode,
316         * thus avoiding the need for some expensive recomputation.
317         * <p>
318         * Trivially-stale instances,
319         * ie that were never fully computed,
320         * never become non-stale again in this way.
321         */
322        public boolean isStale()
323            {
324            // Object was immediately (trivially) stale.
325            // It cannot become non-stale.
326            if(isTriviallyStale()) { return(true); }
327    
328            final long now = System.currentTimeMillis();
329    
330            // Expired/stale.
331            // It may become/stay non-stale for much longer
332            // iff the system is in low-power (temporary energy saving) mode right now
333            // and even longer for 'bad' exhibits to save wasting too much effort/energy on them.
334            if(staleAfter < now) /* Normally stale. */
335                {
336                final long extraTime = (goodness >= 0) ? CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS :
337                    (2 * CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS); // Keep a bad 'un down for longer...
338                if((staleAfter + extraTime < now) || !GenUtils.mustConservePower())
339                    { return(true); }
340                }
341    
342            // Suspicious stale date much too far in the future, so treat data as stale.
343            if(staleAfter >= now + 4*MAX_AGE_BEFORE_STALE_MS)
344                { return(true); }
345    
346            // Looks OK!
347            return(false);
348            }
349    
350        /**If true, the data in this object is not only stale; it is only even a fast approximation.
351         * The data is probably not worth persisting, for example,
352         * since it was not fully computed and could probably be very quickly recomputed.
353         */
354        public boolean isTriviallyStale()
355            {
356            // Object was immediately (trivially) stale.
357            // It cannot become non-stale.
358            return(staleAfter == 0);
359            }
360    
361        /**Get the basic 'best-before' time.
362         * Once this has passed the data is potentially stale
363         * and isStale() will be true unless the system is in power-conserving mode,
364         * at which point isStale() can be deferred for some considerable time.
365         * <p>
366         * This value is useful to help schedule recomputations most-stale-first.
367         * <p>
368         * For trivially-stale instances this is always zero.
369         */
370        public final long bestBefore() { return(staleAfter); }
371    
372        /**Maximum age (in ms) of instances before considered them stale; strictly positive.
373         * <em>Slightly</em> (much less than a factor of two) different limit value
374         * between class/JVM/mirror instances to spread out recomputations.
375         * <p>
376         * Each instance of this object should pick a random time
377         * between about half of this value and the full value before going stale
378         * in order to spread out refresh computations over a reasonable interval.
379         * Recomputing these values may prove quite expensive.
380         * <p>
381         * This value is selected so that most exhibits actual scores
382         * will not have changed significantly in the given interval;
383         * something of the order of tens of hours to tens of days is good.
384         */
385        public static final int MAX_AGE_BEFORE_STALE_MS = 64 * 3600 * 1000 + Rnd.fastRnd.nextInt(8 * 3600 * 1000);
386    
387        /**Maximum age (in ms) of instances built with all other than Scorer data before considered stale; strictly positive.
388         * Should be (much) shorter than MAX_AGE_BEFORE_STALE_MS,
389         * and much less than 24 hours to force recomputation in favour of the full value
390         * in a full daily cycle if sufficient energy becomes available.
391         * <p>
392         * This allows us to get some rating and ranking done in the absence of
393         * heavy-weight AI computations when low on power,
394         * with this interval acting as a cache of the partially-computed value.
395         */
396        public static final int MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS = Math.min(12 * 3600 * 1000 + Rnd.fastRnd.nextInt(1 * 3600 * 1000),
397                MAX_AGE_BEFORE_STALE_MS/4);
398    
399    
400        /**The "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
401         * This is a composite of many factors,
402         * including a small random element.
403         * <p>
404         * Has enough significant digits to allow a total ordering
405         * even over a very large number of exhibits.
406         * <p>
407         * This is nominally logically thought of in the range [-1.0, +1.0].
408         */
409        private final int goodness;
410    
411        /**Get the "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
412         * This gets the value as a raw int value;
413         * useful for fast compares, sorts, etc.
414         */
415        public int getGoodness()
416            { return(goodness); }
417    
418        /**Get the "goodness" score as a normalised float in the range -1 (bad) to +1 (good), 0 is neutral.
419         * This is represented as a float which can lose precision/information,
420         * but may be easier to work with for display and some computations.
421         */
422        public float getGoodnessAsFloat()
423            {
424            return((float) (((double) goodness) / Integer.MAX_VALUE));
425            }
426    
427        /**Find out if this exhibit is rated "good"/popular or not.
428         * Returns TRUE if rated good, FALSE if bad,
429         * null if not significantly either (ie too close to neutral) or if unknown.
430         * <p>
431         * This is based on the computed "goodness" score.
432         */
433        public Boolean isGood()
434            {
435            if((goodness < GOODBAD_LIMIT_INT) && (goodness > -GOODBAD_LIMIT_INT)) { return(null); }
436            return((goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
437            }
438    
439        /**Maximum weighting for vox pop user votes.
440         * Should be large but possibly not overwhelming,
441         * eg because of pranks to attempt to "fix" the votes.
442         * <p>
443         * The vote element can count for or against an exhibit (ie be +ve or -ve),
444         * and an absence of any votes may tend to dilute goodness back towards neutral.
445         */
446        public static final float POP_VOTES = 0.7f;
447    
448        /**Maximum weighting from correlated voting on related exhibits.
449         * Possibly quite a crude measure,
450         * though will be more valuable with more data available,
451         * and so if all our required samples are in place,
452         * then we'll give it a rating that should be visible to users,
453         * and more significant than just newness, for example.
454         */
455        public static final float POP_VOTE_CORR = 0.1331f;
456    
457        /**Maximum weighting from Scorer judgement of exhibit content.
458         * This AI-based mechanism inspects the content of each exhibit,
459         * and is driven to get the best possible fit with voting data.
460         * <p>
461         * We cap this at a higher weighting than the metadata-driven statistical methods,
462         * but lower than the real votes.
463         */
464        public static final float POP_AI_SCORER = Math.min(2*POP_VOTE_CORR, POP_VOTES/2);
465    
466        /**Maximum weighting for exhibit recently viewed or downloaded.
467         * Possibly quite a crude measure,
468         * though will be more valuable with more data available,
469         * and so if all our required samples are in place,
470         * we'll give it a rating that should be visible to users,
471         * and more significant than just newness, for example.
472         * <p>
473         * We make this a little less important than vote correlations.
474         */
475        public static final float POP_RECENT_ACCESS = POP_VOTE_CORR * 0.73f;
476    
477        /**Weighting/correlation of "newness" component of popularity.
478         * This is only applied as a positive (proportional) factor
479         * for exhibits considered "new",
480         * since newer items are potentially more interesting.
481         * <p>
482         * This factor should be relatively small,
483         * not overwhelming manually-set weightings for example,
484         * and mainly intended as a tie-breaker.
485         */
486        public static final float POP_CORR_NEWNESS = 0.001f;
487    
488        /**Weighting/correlation of "random" component of popularity.
489         * Exists to help break ties and perturb the popularity ordering a little.
490         * Should be very small so as not to outweigh any genuine factor,
491         * but more-or-less force a total ordering.
492         * <p>
493         * This factor falls equally either side of zero, is a +/- limit.
494         */
495        public static final float POP_CORR_RANDOM = 0.00001f;
496    
497        /**Weighting/correlation of has-description component of popularity.
498         * Applied to items that have a description
499         * as slightly more interesting than those without.
500         * <p>
501         * Should generally be greater than the randomness weighting,
502         * and probably less than the newness rating.
503         */
504        public static final float POP_CORR_HASDESC = Math.min(POP_CORR_RANDOM * 10,
505                                                              POP_CORR_NEWNESS / 3);
506    
507        /**Weighting factor for an item with a specific description. */
508        private static final Factor FACTOR_HASDESC =
509                new Factor(1, POP_CORR_HASDESC);
510    
511        /**Weighting factor for an item with a generic description.
512         * Carries less weight than having a specific description.
513         * <p>
514         * May apply to having location or AKA or description text.
515         */
516        private static final Factor FACTOR_HASGENERICDESC =
517                new Factor(1, POP_CORR_HASDESC/2);
518    
519        /**Single (immutable) component of multi-factor value.
520         * Consists of:
521         * <ul>
522         * <li>a goodness value [-1,+1]
523         * <li>a correlation/confidence rating [-1,+1]
524         * </ul>
525         * <p>
526         * The value added into the final value is proportional to:
527         * <samp>(goodness * correlation) / |correlation|</samp>.
528         * <p>
529         * Uses float values since there is probably not even that much
530         * real precision available, and doing so probably saves space and time.
531         * Double values can be passed to the constructors,
532         * but the extra precision is discarded.
533         */
534        static public final class Factor
535            {
536            public Factor(final float goodness, final float correlation)
537                {
538                if((!(goodness >= -1) && (goodness <= +1)) ||
539                   (!(correlation >= -1) && (correlation <= +1)))
540                    { throw new IllegalArgumentException(); }
541                this.goodness = goodness;
542                this.correlation = correlation;
543                }
544            /**Goodness contributed by this factor: range [-1,+1]. */
545            public final float goodness;
546            /**Correlation of this value with overall result: range [-1,+1]. */
547            public final float correlation;
548    
549            /**We will accept double values, but discard the extra precision. */
550            public Factor(final double goodness, final double correlation)
551                { this((float) goodness, (float) correlation); }
552    
553            /**Convert to human-readable form. */
554            @Override
555            public String toString()
556                {
557                final StringBuilder sb = new StringBuilder();
558                sb.append("Factor");
559                sb.append(":goodness=").append(goodness);
560                sb.append(":correlation=").append(correlation);
561                return(sb.toString());
562                }
563            }
564    
565        /**Zero factor; zero weight, zero goodness. */
566        public static final Factor FACTOR_ZERO = new Factor(0, 0);
567    
568        /**Compute random "goodness" factor symmetrical about zero; never null. */
569        private static Factor _computeRandomGoodnessFactor()
570            {
571            return new Factor((Rnd.fastRnd.nextBoolean() ?
572                                  Rnd.fastRnd.nextDouble() : -Rnd.fastRnd.nextDouble()),
573                              POP_CORR_RANDOM);
574            }
575    
576        /**Compute newness bonus (if any); never null.
577         * If not new enough to be considered "new" then a zero factor is returned.
578         * <p>
579         * This is not necessarily precisely aligned with the "isExhibitNew()" result,
580         * though except in race conditions they should be aligned.
581         *
582         * @param esa  exhibit data; never null
583         * @return factor, positive but low weight if exhibit "new",
584         *     else zero with zero weighting
585         */
586        private static Factor _computeNewnessBonusFactor(final ExhibitStaticAttr esa)
587            {
588            // Also for some wobble is server time vs new-exhibit timestamps.
589            final long age = Math.max(0, System.currentTimeMillis() - esa.timestamp);
590            if(age <= CoreConsts.MAX_EXHIBIT_AGE_NEW_MS)
591                {
592                // A very new exhibit has a goodness of nearly 1.0.
593                // An exhibit only just "new" has a goodness of just over 0.0.
594                final double goodness = (CoreConsts.MAX_EXHIBIT_AGE_NEW_MS - age) /
595                        (double) CoreConsts.MAX_EXHIBIT_AGE_NEW_MS;
596                return(new Factor(goodness, POP_CORR_NEWNESS));
597                }
598    
599            // Exhibit not "new", so zero goodness but the same weight.
600            return(_NOT_NEW);
601            }
602    
603        /**Contribution for goodness of non-new exhibits; zero goodness but the same "newness" weight. */
604        private static final Factor _NOT_NEW = new Factor(0, POP_CORR_NEWNESS);
605    
606        /**Compute full goodness factor, using full data source if available; never null.
607         *
608         * @param esa  exhibit data; never null
609         * @param initialComponents  initial set of (data-dependent) Factor elements;
610         *     possibly empty but never null
611         */
612        private static Factor _computeTotalGoodnessFactor(final ExhibitStaticAttr esa,
613                                                          final GenProps gp,
614                                                          final AllExhibitProperties aep,
615                                                          final List<Factor> initialComponents)
616            {
617            // Compute a list of components of goodness Factors.
618            // Leave enough space to avoid need for expansion in most (time-critical) cases.
619            final List<Factor> components = new ArrayList<Factor>(5 + initialComponents.size());
620            components.addAll(initialComponents);
621    
622            // Compute the fast components that do not require a
623            // (potentially slow) data source or any other non-intrinsic data.
624            components.add(_computeRandomGoodnessFactor());
625            components.add(_computeNewnessBonusFactor(esa));
626    
627            // Calculations that require a non-default GenProps to be effective.
628            // Factor in the author weight, if any.
629            final Name.ExhibitFull exhibitName = esa.getExhibitFullName();
630            final Byte aW = gp.getPopWeightForAuth(ExhibitName.getAuthorComponent(exhibitName));
631            if(aW != null)
632                { components.add(new Factor(1, aW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
633            // Factor in the file-type weight, if any.
634            final Byte tW = gp.getPopWeightForType(ExhibitName.getExtensionComponent(exhibitName));
635            if(tW != null)
636                { components.add(new Factor(1, tW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
637            // Penalise one-word exhibits (ignoring number-in-series) for not being descriptive enough.
638            final Set<String> noAttrWords = Collections.emptySet();
639            if(TextUtils.indexOf(exhibitName.getShortName().getMainWordsComponent(noAttrWords), ExhibitName.WORD_SEP) == -1)
640                { components.add(new Factor(-1, POP_CORR_HASDESC)); }
641            // Slight penalty to exhibits that *cannot* show thumbnails, since images are most accessible.
642            if(!(ExhibitMIME.getInputFileType(esa.getCharSequence())).canPossiblyCreateThumbnailOfSameMIMEType())
643                { components.add(new Factor(-1, POP_CORR_HASDESC/3)); }
644    
645            // Do the full (but potentially-expensive) goodness calcs
646            // iff we have an apparently-working data source.
647            if(aep != null)
648                {
649                // Factor in weight from attribute words.
650                final Enumeration<?> aWEn =
651                    ExhibitName.getAttributeWordsComponentEnumeration(exhibitName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
652                if(aWEn != null)
653                    {
654                    while(aWEn.hasMoreElements())
655                        {
656                        final String attrWord = (String) aWEn.nextElement();
657                        final Byte attrW = gp.getPopWeightForAttr(attrWord);
658                        if(attrW != null)
659                            { components.add(new Factor(1, attrW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
660                        }
661                    }
662    
663                // Factor in weight from description, if any.
664                final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
665                if(epl.hasDescription())
666                    { components.add(FACTOR_HASDESC); }
667    
668                // Having a location is a generic description...
669                if(Location.NONE != aep.getLocation(exhibitName))
670                    { components.add(FACTOR_HASGENERICDESC); }
671                }
672    
673            // Now compute the weighted goodness value...
674            double g = 0;
675            double w = 0;
676            for(final Iterator<Factor> it = components.iterator(); it.hasNext(); )
677                {
678                final Factor f = it.next();
679                w += Math.abs(f.correlation);
680                g += f.goodness * f.correlation;
681                }
682    
683            // Adjust so that final weight is 1.0,
684            // and the cumulative value is divided by at least that.
685            final double wPrime = Math.max(1.0, w);
686            return(new Factor(g / wPrime, 1.0));
687            }
688    
689    
690        /**Minimum sample period to even out short cycles in Web access patterns, in ms; strictly positive.
691         * Must usually be a multiple (and minimum) of a week to allow for most common
692         * daily and weekly access patterns to be smoothed as well as possible.
693         */
694        private static final long _SAMPLE_CYCLE_PERIOD_MINOR_MS = 7 * 24 * 3600 * 1000L;
695    
696        /**The longest cycle time we will look for in historical data, in ms; strictly positive.
697         * This is usually a year to look for seasonal patterns,
698         * eg what is popular at Christmas or in the summer.
699         */
700        private static final long _SAMPLE_CYCLE_PERIOD_MAJOR_MS = 365242L * 24 * 3600; // 365.242 days in a year.
701    
702        /**Number of VLONG samples corresponding to annual cycles in data. */
703        private static final int _SEASONAL_VLONG_CYCLE_PERIODS = Math.max(1, (int)
704                    Math.round(_SAMPLE_CYCLE_PERIOD_MAJOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
705    
706        /**Minimum number of VLONG samples to smooth out daily/weekly cycles in data. */
707        private static final int _MIN_VLONG_SAMPLE_PERIODS = Math.max(1, (int)
708                    Math.round(_SAMPLE_CYCLE_PERIOD_MINOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
709    
710    
711        /**Compute Factor(s) that depend on access data.
712         * If this cannot fetch some of the data it requires due to an IOException
713         * (not the same as fetching data successfully that has no entries)
714         * then the exception is propagated to the caller.
715         * (Normally this results in the EPCM item for which this is called being marked stale
716         * so that we can try again when the data is available.)
717         * <p>
718         * The comment fields of the factors produced are
719         * the event names from which they were generated.
720         * <p>
721         * This is public to assist testing.
722         *
723         * @throws IOException  if this was unable to fetch some data required
724         */
725        public static Collection<Factor> calcAccessFactors(final Name.ExhibitFull exhibitName,
726                                                           final AllExhibitProperties aep,
727                                                           final BasicVarMgrInterface vars)
728            throws IOException
729            {
730            if((exhibitName == null) || (aep == null) || (vars == null))
731                { throw new IllegalArgumentException(); }
732    
733            // Get short name of exhibit as String,
734            // by which most stats are recorded.
735            final String shortNameAsString = exhibitName.getShortName().toString();
736    
737            // Factor in weight from access stats...
738            final int nExhibits = aep.aeid.size();
739    
740            // Threshold at which usage is considered significantly "good".
741            final int rankLimit = nExhibits / 3;
742    
743            // Set of slots/intervals in which we will sample access-pattern stats.
744            // We let the routine force some solid coverage at the near end.
745            final BitSet whichIntervals = _createSampleBitSet(0);
746    
747            // Count the full set of samples that we are looking for.
748            final int desiredSamples = whichIntervals.cardinality();
749    
750            final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
751    
752    
753    
754            // Access stats event definitions which we want and that record count by short exhibit name.
755            final List<SimpleVariableDefinition> eventVarNames = new ArrayList<SimpleVariableDefinition>(3);
756            eventVarNames.add(SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW);
757            eventVarNames.add(SystemVariables.ACCESSPATTERN_CLICKTHROUGH);
758            // Only look for downloads if not automatically excluded from being counted.
759            // (Note that this definition may vary between data being collected and being used.)
760            final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
761            if((esa != null) && !ImageUtils.canBeOwnThumbnail(esa))
762                { eventVarNames.add(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD); }
763    
764            // Create/size the results container...
765            final List<Factor> components = new ArrayList<Factor>(eventVarNames.size());
766    
767            // Retrieve data for each event...
768            // Currently we we give all intervals the same weighting.
769            // This is especially dodgy for the current interval, which may be very short and unrepresentative.
770            // We give total weighting in proportion to the square root of the number of samples obtained.
771            // Shuffle the order in which we fetch the data to improve concurrency a little.
772            Collections.shuffle(eventVarNames, Rnd.fastRnd);
773            for(final SimpleVariableDefinition def : eventVarNames)
774                {
775                assert(def.isEvent() && (def.getType() == SimpleVariableDefinition.TYPE_STRING));
776                final EventVariableValue evvs[] = vars.getEventValues(def,
777                                                                      EventPeriod.VLONG,
778                                                                      prevInterval,
779                                                                      whichIntervals);
780                assert(evvs != null); // May be empty, but never null.
781                // Count the number of results we actually retrieved.
782                int resultsFound = 0;
783                double totalScore = 0; // Cumulative score so far...
784                for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
785                    {
786                    if(i >= evvs.length) { break; /* Run out of answers... */ }
787    
788                    final EventVariableValue evv = evvs[i];
789    
790                    // Ignore null values, and those with *no* events at all for any value.
791                    if(evv == null) { continue; }
792                    if(evv.getTotalEventCount() < 1) { continue; }
793    
794                    // OK, have a real result; we can see if this exhibit ranks well or not.
795                    ++resultsFound;
796    
797                    // If the event bucket may have "overflowed" (too many distinct event values)
798                    // then we should probably ignore count==1 entries as unreliable/incomplete.
799                    final boolean possibleOverflow =
800                            (evv.getTotalDistinctValues() >= evv.getDef().getMaxDiffEventCount());
801    
802                    final int rank = evv.getRank(shortNameAsString);
803                    if((rank < rankLimit) &&
804                       (!possibleOverflow || (evv.getCount(shortNameAsString) > 1)))
805                        { totalScore += (rankLimit - rank) / (double) rankLimit; }
806                    }
807    
808                // We should always get at least one response.
809                // Maybe we should abort with an error if not.
810    //            if(resultsFound < 1) { throw new IOException("not even current results found"); }
811    
812                // Compute goodness score.
813                final double g = totalScore / Math.max(1, resultsFound);
814                // Compute confidence weighting based on number of samples.
815                final double confidence = Math.sqrt(resultsFound / (double) desiredSamples);
816    //if(IsDebug.isDebug) { System.out.println("***EPCM._calcAccessFactors(): score/resultsFound/confidence/exhibit/event = " +g+", " +resultsFound+", " +confidence+", " +exhibitName+", " +def.getName()); }
817                components.add(new Factor(g, POP_RECENT_ACCESS * confidence));
818                }
819    
820            return(components);
821            }
822    
823        /**If true then include the "all" buckets in our calculations. */
824        private static final boolean USE_ALL_BUCKET = false;
825    
826        /**Cache of semi-fixed interval sample set for power-conserving (low-power) mode; never null.
827         * Cleared (set to null) whenever not in conservation mode.
828         * <p>
829         * Any BitSet stored here is never altered once assigned.
830         * <p>
831         * Used only when conserving and when the requested minNearMs is no larger than our forced level.
832         * <p>
833         * Fixed (small) upper bound on storage required.
834         * <p>
835         * Marked null for atomic lock-free access.
836         */
837        private static transient volatile BitSet cachedLPSampleSet;
838    
839        /**Compute exactly one Factor that depends on explicit user votes for specified exhibit; never null.
840         * Because we expect voting to be sparse,
841         * we total up votes across all sample intervals to reduce noise,
842         * rather than taking each sample separately,
843         * and do various other calculations a little differently to dense measures.
844         * <p>
845         * If this cannot fetch some of the data it requires due to an IOException
846         * (not the same as fetching data successfully that has no entries)
847         * then the exception is propagated to the caller.
848         * (Normally this results in the EPCM item for which this is called being
849         * marked stale so that we can try again when the data is available.)
850         * <p>
851         * This is public to assist testing and computation of correlations.
852         * <p>
853         * The returned factor's goodness can range from -1 to +1,
854         * and confidence from 0 to +1;
855         * any scaling required will have to be applied elsewhere.
856         *
857         * @param minNearMs minimum interval (ms) at near end
858         *     to include all available samples for
859         *
860         * @throws IOException  if this was unable to fetch some data required
861         */
862        public static Factor calcVoteFactor(final Name.ExhibitFull exhibitName,
863                                            final AllExhibitProperties aep,
864                                            final BasicVarMgrInterface vars,
865                                            final long minNearMs)
866            throws IOException
867            {
868            if((exhibitName == null) || (aep == null) || (vars == null))
869                { throw new IllegalArgumentException(); }
870    
871            if(!aep.aeid.isPresent(exhibitName))
872                { throw new IllegalArgumentException("no such exhibit"); }
873    
874            // Get short name of exhibit,
875            // by which most stats are recorded.
876            final String shortName = exhibitName.getShortName().toString();
877    
878            // Set of slots/intervals in which we will sample voting behaviour.
879            // Insist on over a month's worth of near-term votes if possible,
880            // significantly less if temporarily conserving power long term.
881            // (Long term since many system may happen to be in low power mode
882            // at the time of day that this would usually be recalculated,
883            // even if generally fine on energy availability.)
884            final boolean conservingPower = GenUtils.mustConservePowerLongTerm();
885            final long forcedMinNearMs = conservingPower ?
886                    (7 * 24 * 3600 * 1000L) : (33 * 24 * 3600 * 1000L);
887            final BitSet whichIntervals;
888            if(!conservingPower)
889                {
890                // Beefy new set each time when not conserving...
891                whichIntervals = _createSampleBitSet(Math.max(minNearMs, forcedMinNearMs));
892                // Clear any cached (low-power) set...
893                cachedLPSampleSet = null;
894                }
895            else
896                {
897                if(minNearMs > forcedMinNearMs)
898                    {
899                    // Cannot use cached set if requested minimum near set larger than forced near set...
900                    whichIntervals = _createSampleBitSet(minNearMs);
901                    }
902                else
903                    {
904                    BitSet cached;
905                    // Use cached set if extant...
906                    if(null == (cached = cachedLPSampleSet))
907                        {
908                        // ... else create new fixed minimum-size set and cache it.
909                        cachedLPSampleSet = cached = _createSampleBitSet(forcedMinNearMs);
910                        }
911                    assert(null != cached);
912                    whichIntervals = (BitSet) cached.clone(); // Defensive copy to protect the cache.
913                    }
914                }
915    
916            // Count the full set of samples that we are looking for
917            // optionally including the "all" set assumed always available.
918            final int desiredSamples = whichIntervals.cardinality() + (USE_ALL_BUCKET?1:0);
919    
920            // Last available complete interval/period.
921            final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
922    
923            // Get votes "for" this exhibit.
924            final EventVariableValue vPro[] = vars.getEventValues(SystemVariables.VOTE_PRO,
925                                                                  EventPeriod.VLONG,
926                                                                  prevInterval,
927                                                                  whichIntervals);
928            // Get votes "against" this exhibit.
929            final EventVariableValue vCon[] = vars.getEventValues(SystemVariables.VOTE_CON,
930                                                                  EventPeriod.VLONG,
931                                                                  prevInterval,
932                                                                  whichIntervals);
933    
934            // Set of all event values seen in all sampled slots.
935            // We use a HashSet for speed.
936            final HashSet<Object> allVoteValues = new HashSet<Object>();
937            // Count of all votes in all slots.
938            int allVotesCount = 0;
939    
940            int resultsFound = 0; // Total results usable including "all" slot.
941            int totalProVotesThisEx = 0; // Total votes for this exhibit in the sample slots.
942            int totalConVotesThisEx = 0; // Total votes against this exhibit in the sample slots.
943    
944            if(USE_ALL_BUCKET)
945                {
946                // Get the "all" slot votes "for"...
947                final EventVariableValue[] aPro = vars.getEventValues(SystemVariables.VOTE_PRO,
948                                            EventPeriod.VLONG,
949                                            0,
950                                            null);
951    
952                // Get the "all" slot votes "against"...
953                final EventVariableValue[] aCon = vars.getEventValues(SystemVariables.VOTE_CON,
954                                            EventPeriod.VLONG,
955                                            0,
956                                            null);
957                // Regard the "all" slot as always present if we're using it
958                // (which they logically are, even if not necessarily programmatically).
959                ++resultsFound;
960                if((aPro.length > 0) && (aPro[0] != null))
961                    {
962                    allVoteValues.addAll(aPro[0].getDistinctValuesInRankOrder());
963                    allVotesCount += aPro[0].getTotalEventCount();
964                    totalProVotesThisEx += aPro[0].getCount(shortName);
965                    }
966                if((aCon.length > 0) && (aCon[0] != null))
967                    {
968                    allVoteValues.addAll(aCon[0].getDistinctValuesInRankOrder());
969                    allVotesCount += aCon[0].getTotalEventCount();
970                    totalConVotesThisEx += aCon[0].getCount(shortName);
971                    }
972                }
973    
974            for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
975                {
976                final EventVariableValue pro = (i < vPro.length) ? vPro[i] : null;
977                final EventVariableValue con = (i < vCon.length) ? vCon[i] : null;
978    
979                // Ignore slot where no votes were collected either way for any exhibit at all.
980                final int proTEC = (pro == null) ? 0 : pro.getTotalEventCount();
981                final int conTEC = (con == null) ? 0 : con.getTotalEventCount();
982                final int tEC = proTEC + conTEC;
983                if(tEC < 1) { continue; }
984    
985                // OK, have a real result; we can see if this exhibit has attracted any votes.
986                ++resultsFound;
987    
988                // Capture all the distinct event values in this slot
989                // so that we know all the exhibits that have attracted votes
990                // in the slots that we have sampled.
991                if(pro != null) { allVoteValues.addAll(pro.getDistinctValuesInRankOrder()); }
992                if(con != null) { allVoteValues.addAll(con.getDistinctValuesInRankOrder()); }
993                allVotesCount += tEC;
994    
995                // Sum up the total votes either way for this exhibit.
996                if(pro != null) { totalProVotesThisEx += pro.getCount(shortName); }
997                if(con != null) { totalConVotesThisEx += con.getCount(shortName); }
998                }
999    
1000            // Take a simplistic view that the overall vote
1001            // is simply the mean of all those cast.
1002            final int totalScoreThisEx = totalProVotesThisEx - totalConVotesThisEx;
1003            final int totalVotesCastThisEx = totalProVotesThisEx + totalConVotesThisEx;
1004    
1005            // Compute goodness score.
1006            final double g = Math.min(1, Math.max(-1, totalScoreThisEx / (double) Math.max(1, totalVotesCastThisEx)));
1007    
1008            // Compute average number of votes per voted-for-exhibit
1009            // over all the sampled slots with at least one vote...
1010            // FIXME: should re-do this to allow for "lost" vote events,
1011            // else we end up overestimating the votes-per-voted-for-exhibit value
1012            // and this under-rating even the most-voted-for exhibit(s).
1013            final double meanVotesPerEx = allVotesCount / Math.max(1.0, allVoteValues.size());
1014    
1015    //System.out.println("***EPCM.calcVoteFactor(): allVotesCount = " + allVotesCount);
1016    //System.out.println("***EPCM.calcVoteFactor(): allVoteValues.size() = " + allVoteValues.size());
1017    //System.out.println("***EPCM.calcVoteFactor(): totalVotesCastThisEx = " + totalVotesCastThisEx);
1018    //System.out.println("***EPCM.calcVoteFactor(): totalScoreThisEx = " + totalScoreThisEx);
1019    //System.out.println("***EPCM.calcVoteFactor(): meanVotesPerEx = " + meanVotesPerEx);
1020    //System.out.println("***EPCM.calcVoteFactor(): desiredSamples = " + desiredSamples + " : " + whichIntervals);
1021    
1022            // Confidence based on number of votes received for this exhibit
1023            // compared to the mean number of votes for each voted-for-exhibit,
1024            // using normal variance-reduction (sqrt) policy.
1025            // This value may exceed 1.0 to boost overall result confidence
1026            // where this exhibit has received an unusually high number of votes.
1027            final boolean enoughVotesCast = totalVotesCastThisEx >= meanVotesPerEx;
1028            final double confidenceThisExhibit = enoughVotesCast ? 1 :
1029                Math.sqrt(totalVotesCastThisEx / meanVotesPerEx);
1030            // Compute confidence weighting based on number of usable samples
1031            // and the total number of votes we got compared to the mean vote count per exhibit in sampled slots.
1032            final double confidence = Math.min(1.0, Math.sqrt(resultsFound / (double) desiredSamples) * confidenceThisExhibit);
1033    //if(IsDebug.isDebug) { System.out.println("***EPCM.calcVoteFactor(): score/resultsFound/confidence/exhibit = " +g+", " +resultsFound+", " +confidence+", " +exhibitName); }
1034            // For low-confidence (low-vote) exhibits, tone down the goodness too.
1035            return(new Factor(enoughVotesCast ? g : (g * confidenceThisExhibit), confidence));
1036            }
1037    
1038    
1039        /**If true, use ALL available votes rather than a sampling in _computeSampleBitSet().
1040         * If this is false then a caller of _computeSampleBitSet() can get a similar effect
1041         * with a large minNearMs value.
1042         */
1043        private static final boolean USE_ALL_VOTES = false;
1044    
1045        /**Create BitSet of samples of event history to take; never null nor empty.
1046         * Creates a BitSet of length up to SystemVariables.EVENT_SAMPLES_RETAINED slots,
1047         * intended to be used with the EventPeriod.VLONG interval size,
1048         * with index 0 indicating the <em>previous</em> slot (ie no use of the current period).
1049         * <p>
1050         * This includes:
1051         * <ul>
1052         * <li>near-end samples designed to smooth out short (daily/weekly) cycles,
1053         * <li>annual/seasonal look-ahead to predict what may be of interest,
1054         * <li>a random sampling of points throughout the event history for robustness.
1055         * </ul>
1056         *
1057         * @param minNearMs  if positive, the minimum number of recent ms that
1058         *     the sampling should cover (else an internal minimum will be imposed)
1059         *
1060         * @return non-null, non-empty new private BitSet
1061         */
1062        private static BitSet _createSampleBitSet(final long minNearMs)
1063            {
1064            if(USE_ALL_VOTES)
1065                {
1066                // Sample every available vote history point for maximum accuracy
1067                // at a possible performance cost.
1068                final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1069                whichIntervals.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
1070                return(whichIntervals);
1071                }
1072    
1073            // Number of our longest access periods needed to cover
1074            // "the minimum cycle" to smooth out short-term access patterns.
1075            // Typically at least 7 days to smooth out daily and weekly cycles.
1076            // We assume that our short cycle *does* fit in our data window.
1077            assert(_MIN_VLONG_SAMPLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1078    
1079            // Number of intervals in our long cycle,
1080            // typically looking for seasonal patterns.
1081            // Typically 365 days to pick up yearly festivals, seasonal patterns, etc.
1082            // We assume that our long cycle *does* fit in our data window.
1083            assert(_SEASONAL_VLONG_CYCLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1084    
1085            // We cut down the number of samples when conserving energy long term.
1086            // This makes our result more noisy, but not biased.
1087            final boolean reduceSampling = GenUtils.mustConservePowerLongTerm();
1088    
1089            // Now calculate a set of sample periods to use; index 0 is the previous interval.
1090            // We factor in:
1091            //   * a window at the short end to catch recent activity.
1092            //   * a window around our long cycle to catch seasonal patterns.
1093            //   * a large random sampling of the full window to be robust.
1094            // We take a little more than the minimum window at the short end to help predictive ability.
1095            // We DO NOT include the current interval,
1096            // since it is of variable quality and may be expensive to fetch.
1097            final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1098            // Near-end samples, NOT including the current period.
1099            // The caller can force this to be extended to cover the period they are interested in.
1100            final int blockSampleSize = reduceSampling ? _MIN_VLONG_SAMPLE_PERIODS : (4*_MIN_VLONG_SAMPLE_PERIODS);
1101            whichIntervals.set(0, Math.min(SystemVariables.EVENT_SAMPLES_RETAINED-1,
1102                    Math.max(blockSampleSize, (int)(minNearMs/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)) +
1103                    1+(MAX_AGE_BEFORE_STALE_MS/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)));
1104            // Predictive (we hope) seasonal samples; skew this forward for predictive value.
1105            final int seasonOffset = _SEASONAL_VLONG_CYCLE_PERIODS - _MIN_VLONG_SAMPLE_PERIODS;
1106            whichIntervals.set(seasonOffset, seasonOffset + (reduceSampling ? (2*_MIN_VLONG_SAMPLE_PERIODS) : (3*_MIN_VLONG_SAMPLE_PERIODS)));
1107            // One or more random samples;
1108            // take an extensive sampling of points otherwise not covered.
1109            for(int i = 1+Math.max(blockSampleSize, SystemVariables.EVENT_SAMPLES_RETAINED >> (reduceSampling ? 5 : 2)); --i >= 0; )
1110                { _chooseRandomSlot(Rnd.fastRnd, whichIntervals, true); }
1111            // Chose a new one using a good generator with some real entropy in,
1112            // thus hard to guess (but expensive),
1113            // but only if not conserving power (since we're not going to lose money on this!).
1114            if(!reduceSampling && !GenUtils.mustConservePowerExtreme()) { _chooseRandomSlot(Rnd.goodRnd, whichIntervals, true); }
1115    
1116            return(whichIntervals);
1117            }
1118    
1119        /**Set a random bit in the BitSet to take a random sample.
1120         * Sets a bit at random.
1121         * <p>
1122         * It is possible (though may be expensive) to insist that a new (previously false)
1123         * bit is set, but only if (much) less than SystemVariables.EVENT_SAMPLES_RETAINED bits
1124         * are already true.
1125         *
1126         * @param rnd  random number source; never null
1127         * @param whichIntervals  the BitSet to set a random bit in; never null
1128         * @param forceNew  if true, ensure that we set a new bit ie one that was false
1129         *     unless at least half of all possible bits have already been set
1130         */
1131        private static void _chooseRandomSlot(final Random rnd,
1132                                              final BitSet whichIntervals,
1133                                              final boolean forceNew)
1134            {
1135            do
1136                {
1137                final int bitIndex = rnd.nextInt(SystemVariables.EVENT_SAMPLES_RETAINED);
1138                if(forceNew && whichIntervals.get(bitIndex) &&
1139                        (whichIntervals.cardinality() < (SystemVariables.EVENT_SAMPLES_RETAINED/2)))
1140                    { continue; /* Selected bit already set; try again... */ }
1141    
1142                // OK, actually set the bit and stop!
1143                whichIntervals.set(bitIndex);
1144                break;
1145    
1146                } while(forceNew);
1147            }
1148    
1149        /**Our serial version... */
1150        private static final long serialVersionUID = 8518817446315221597L;
1151    
1152    //    /**Deserialise. */
1153    //    private void readObject(ObjectInputStream in)
1154    //        throws IOException, ClassNotFoundException
1155    //        {
1156    //        in.defaultReadObject();
1157    //        validateObject(); // Validate state immediately.
1158    //        }
1159    
1160        /**Deserialise: use constructor for validation, defensive copying, etc.
1161         * Replace all trivially-stale "empty" values with a single shared value.
1162         * <p>
1163         * <strong>NOTE: this may not always preserve all values as expected in future.</strong>
1164         */
1165        protected Object readResolve()
1166            // throws ObjectStreamException
1167            {
1168            // Eliminate redundant deserialised copies of TRIVIAL_NEUTRAL.
1169            if(isTriviallyStale() && (goodness == 0)) { return(TRIVIAL_NEUTRAL); }
1170    
1171            // Construct new instance of object in normal defensive way.
1172            return(new ExhibitPropsComputableMutable(staleAfter, goodness));
1173            }
1174    
1175        /**Serialise: replace all trivially-stale values being serialised with TRIVIAL_NEUTRAL.
1176         * We save all other (non-trivially-stale) values as-is, even if stale,
1177         * because they may have taken significant resources to compute
1178         * and may still be much more accurate than any fast approximation we could compute.
1179         * <p>
1180         * <strong>NOTE: this is effectively a LOSSY compression mechanism.</strong>
1181         */
1182        protected Object writeReplace()
1183            // throws ObjectStreamException
1184            {
1185            // Don't bother saving trivially-stale values to the stream
1186            // and replace them all with a single value in the output.
1187            if(isTriviallyStale()) { return(TRIVIAL_NEUTRAL); }
1188    
1189            // Serialise/persist this non-trivial value as-is.
1190            return(this);
1191            }
1192    
1193        /**Validate fields/state.
1194         * Called in the constructor and possibly after de-serialising.
1195         * <p>
1196         * Barf if something bad is found.
1197         * (Maybe allow some extra info in debug version.)
1198         */
1199        public void validateObject()
1200            throws InvalidObjectException
1201            {
1202            if(staleAfter < 0)
1203                { throw new InvalidObjectException("bad object: invalid 'staleAfter' time"); }
1204            if(goodness < -Integer.MAX_VALUE)
1205                { throw new InvalidObjectException("bad object: invalid 'goodness' value"); }
1206            }
1207        }