001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore;
030    
031    import java.io.IOException;
032    import java.io.InvalidObjectException;
033    import java.io.ObjectInputValidation;
034    import java.io.Serializable;
035    import java.util.ArrayList;
036    import java.util.BitSet;
037    import java.util.Collection;
038    import java.util.Collections;
039    import java.util.Enumeration;
040    import java.util.HashSet;
041    import java.util.Iterator;
042    import java.util.List;
043    import java.util.Random;
044    import java.util.Set;
045    
046    import org.hd.d.pg2k.ai.scorer.ScoreAndConf;
047    import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
048    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
049    import org.hd.d.pg2k.svrCore.location.Location;
050    import org.hd.d.pg2k.svrCore.props.GenProps;
051    import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
052    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
053    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
054    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
055    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
056    
057    import ORG.hd.d.IsDebug;
058    
059    
060    /**Immutable (and serialisable) store of all ephemeral computable auxiliary properties of a single exhibit.
061     * These are properties that are computed from exhibit and non-exhibit
062     * data and that need to be periodically recomputed,
063     * such as popularity score.
064     * <p>
065     * This is is called "mutable" because although any one instance is immutable,
066     * successive values retrieved for the same exhibit may be different.
067     * <p>
068     * This data has limited lifetime,
069     * but may be somewhat useful even when stale,
070     * so can be persisted if desired.
071     * <p>
072     * This is designed to be efficient and robust on the wire and in memory,
073     * since these details will be held for each and every exhibit.
074     */
075    public final class ExhibitPropsComputableMutable implements Serializable,
076                                                                ObjectInputValidation
077        {
078        /**Limit of goodness*correlation to consider something (eg category) significantly good or bad. */
079        public static final float GOODBAD_LIMIT = 0.4f;
080    
081        /**Limit of goodness*correlation to consider something (eg category) significantly good or bad, is int value. */
082        public static final int GOODBAD_LIMIT_INT = Math.round(GOODBAD_LIMIT * Integer.MAX_VALUE);
083    
084        /**Compute a "quick"/approximate value for a specified exhibit; never null.
085         * This uses only values from the exhibit static data
086         * plus the system properties object
087         * to compute a fast approximation to a full value.
088         * <p>
089         * The value is immediately marked as stale
090         * (unless it is possible to compute the exact value fast),
091         * but is useful to generate something quickly if the system is busy,
092         * or at start-up for example.
093         *
094         * @param esa  the exhibit to compute properties for; never null
095         * @param gp   the system properties; never null
096         *
097         * @return non-null quick approximation
098         */
099        public static ExhibitPropsComputableMutable generateFastApproximation(
100                                            final ExhibitStaticAttr esa,
101                                            final GenProps gp)
102            {
103            return(compute(esa, gp, null, null, null, null));
104            }
105    
106        /**Compute (accurate) value for a specified exhibit; never null.
107         * This uses values from the exhibit static data
108         * plus the system properties object
109         * plus the data source (and thus might take a long time and be expensive)
110         * to compute an accurate/full value.
111         * <p>
112         * Providing the supplied data source is non-null and functional,
113         * and the GenProps does not appear to be empty (zero timestamp)
114         * then the value will generally not become stale for at least MAX_AGE_BEFORE_STALE_MS/2,
115         * and will become stale in no more than MAX_AGE_BEFORE_STALE_MS ms.
116         * If in low-power mode and/or the AI scorers are not available
117         * then this may ignore them and set a much shorter time to stale,
118         * short enough to force recomputation well within 24 hours
119         * if sufficient power becomes available in that time.
120         * <p>
121         * If the data source is not fully available/functional then
122         * an approximation will be returned instead,
123         * and marked stale to allow recalculation later.
124         *
125         * @param esa  the exhibit to compute properties for; never null
126         * @param gp  the system properties; never null
127         * @param aep  the exhibit properties; can be null
128         * @param vars  source of event data; can be null
129         * @param voteCache  vote cache; can be null
130         *
131         * @return non-null full value or approximation
132         *     depending on the available data sources
133         */
134        public static ExhibitPropsComputableMutable compute(final ExhibitStaticAttr esa,
135                                                            final GenProps gp,
136                                                            final AllExhibitProperties aep,
137                                                            final BasicVarMgrInterface vars,
138                                                            final ExhibitPropsComputableMutableVoteCacheIF voteCache,
139                                                            final ScorerCacheIF scorers)
140            {
141            return(new ExhibitPropsComputableMutable(esa, gp, aep, vars, voteCache, scorers));
142            }
143    
144        /**Create an instance with the specified goodness and stale 'best before' time. */
145        private ExhibitPropsComputableMutable(final long staleAfter, final int goodness)
146            {
147            this.staleAfter = staleAfter;
148            this.goodness = goodness;
149    
150            // Verify object state.
151            try { validateObject(); }
152            catch(final InvalidObjectException e)
153                { throw new IllegalArgumentException(e.getMessage()); }
154            }
155    
156        /**Trivially-stale entirely-neutral instance; not null. */
157        public static final ExhibitPropsComputableMutable TRIVIAL_NEUTRAL = new ExhibitPropsComputableMutable(0, 0);
158    
159        /**Create a fully populated ExhibitPropsComputableMutable object.
160         * This is given the data sources from which it can fetch
161         * the exhibit data one or more times to do its computations.
162         * <p>
163         * The data source references are not stored in the object.
164         *
165         * @param esa  the exhibit to compute properties for; never null
166         * @param gp  the system properties; never null
167         * @param aep  the exhibit properties; can be null
168         * @param vars  source of event data; can be null
169         * @param scorers  source of content-based scoring; can be null
170         * @param voteCache  vote cache; can be null
171         */
172        private ExhibitPropsComputableMutable(final ExhibitStaticAttr esa,
173                                              final GenProps gp,
174                                              final AllExhibitProperties aep,
175                                              final BasicVarMgrInterface vars,
176                                              final ExhibitPropsComputableMutableVoteCacheIF voteCache,
177                                              final ScorerCacheIF scorers)
178            {
179            if((esa == null) || (gp == null))
180                { throw new IllegalArgumentException(); }
181    
182            // Initial set of data-dependent Factor items.
183            final List<Factor> components = new ArrayList<Factor>();
184    
185    
186            // Calculate factors that depend on access to event histories and other 'live' data.
187            // If an I/O problem if encountered then generate an approximate (and stale) value
188            // with all results gathered up until then.
189            boolean dataAccessTrouble = true; // Note if 'stale'/partial because data not complete...
190            final boolean noScorers = (null == scorers) || GenUtils.mustConserveCPU(); // Avoid Scorer data in low-power mode...
191            if((aep != null) && (vars != null) && (voteCache != null))
192                {
193                try
194                    {
195                    // Compute synthetic factors based on access stats.
196                    components.addAll(calcAccessFactors(esa.getExhibitFullName(), aep, vars));
197    
198                    // First, compute factor based on explicit voting for this exhibit.
199                    final Factor vote = voteCache.calcVoteFactor(esa.getExhibitFullName(), aep, vars);
200                    // We use the minimum of the actual and maximum/limit confidence
201                    // to realistically boost the vote contribution as much as possible.
202                    components.add(new Factor(vote.goodness, Math.min(POP_VOTES, vote.correlation), vote.comment));
203    
204                    // Compute factors based on vote correlations, etc.
205                    components.addAll(calcCorrelationFactors(esa, aep, vars, voteCache));
206    
207                    if(!noScorers)
208                        {
209                        // Compute factor, if any, from AI Scorers, weighting by confidence.
210                        // We allow possibly-slightly-stale Scorer values (assumed still to be useful)
211                        // so as not to be pole-axed whenever the date/period rolls.
212                        // At start-up we may simply not have all Scorers loaded and assessed.
213                        final ScoreAndConf compositeScoreAndConf = scorers.computeCompositeScoreAndConfidence(esa.getExhibitFullName(), true);
214                        // Only bother even computing the Factor if we have a non-zero confidence...
215                        if(compositeScoreAndConf.confidence != 0)
216                            {
217                            // We use the minimum of the actual and maximum/limit confidence
218                            // to realistically boost the Scorer contribution as much as possible.
219                            final Factor scorerFactor = new Factor(compositeScoreAndConf.score / (float) ScoreAndConf.MAX,
220                                                                   Math.min(POP_AI_SCORER, compositeScoreAndConf.confidence / (float) ScoreAndConf.MAX));
221                            components.add(scorerFactor);
222    if(IsDebug.isDebug) { System.out.println("[EPCM: scorerFactor=" + scorerFactor + " for " +esa.getCharSequence() + ".]"); }
223                            }
224                        }
225    
226                    // Managed to gather all the data required.
227                    dataAccessTrouble = false;
228                    }
229                catch(final IOException e) { /* Absorb error. */ }
230                }
231    
232    
233            // Compute the full goodness value
234            // and scale it to the form we hold it internally.
235            final Factor gf = _computeTotalGoodnessFactor(esa, gp, aep, components);
236            goodness = Math.max(-Integer.MAX_VALUE, Math.min(Integer.MAX_VALUE,
237                Math.round(Integer.MAX_VALUE * gf.goodness)));
238    
239            // Data is marked stale if any data source is null/empty/default
240            // or if we had trouble fetching some data dynamically,
241            // else data is non-stale and we pick a random expiry time between 3/4 and maximum.
242            // Expiry may be further postponed if the system is conserving resources,
243            // eg if it is in low-power mode.
244    
245            // Trivially state if critical data missing or suspect.
246            if(dataAccessTrouble || (aep == null) || (aep.longHash == 0) || (gp.timestamp == 0))
247                { staleAfter = 0; }
248            // Much-reduced lifetime if no Scorer data.
249            else if(noScorers)
250                { staleAfter = System.currentTimeMillis() + MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS; }
251            // Everything seems to be in place so set a full lifetime.
252            // Vary the lifetime a little to minimise recomputation collisions.
253            else
254                {
255                staleAfter = System.currentTimeMillis() + ((3*MAX_AGE_BEFORE_STALE_MS)/4)
256                                             + Rnd.fastRnd.nextInt(1 + MAX_AGE_BEFORE_STALE_MS/4);
257                }
258    
259            // Verify object state.
260            try { validateObject(); }
261            catch(final InvalidObjectException e)
262                { throw new IllegalArgumentException(e.getMessage()); }
263            }
264    
265        /**If true then force correlation data up to date.
266         * If false then accept somewhat stale or missing data,
267         * since this only changes slowly and a complete absence is neutral.
268         */
269        private static final boolean FORCE_CORR_RECOMP = true;
270    
271        /**Compute factors based on correlations.
272         * These have a confidence pre-scaled in the range 0 to POP_VOTE_CORR.
273         */
274        private static List<Factor> calcCorrelationFactors(final ExhibitStaticAttr esa,
275                        final AllExhibitProperties aep,
276                        final BasicVarMgrInterface vars,
277                        final ExhibitPropsComputableMutableVoteCacheIF voteCache)
278            throws IOException
279            {
280            // Get the raw factors; will need scaling.
281            // Force retrieved values to be (reasonably) up-to-date.
282            final Factor[] rawFactors = voteCache.getCorrelates(esa, aep, vars, FORCE_CORR_RECOMP);
283    
284            // Full set of scaled values.
285            // We scale the certainty down in relation to other factors.
286            final List<Factor> result = new ArrayList<Factor>(rawFactors.length);
287            for(final Factor f : rawFactors)
288                { result.add(new Factor(f.goodness, f.correlation * POP_VOTE_CORR, f.comment)); }
289    
290            return(result);
291            }
292    
293        /**Time until which this data is considered valid; beyond this time is stale.
294         * If zero, or before the current time of day, the data is stale.
295         * <p>
296         * If the stale date is too far in the future for us to believe,
297         * ie much more than would be allowed with this version of the object,
298         * then we treat the data as stale in case it is from a version of this
299         * object with very different staleness limits or there was a persistence error.
300         */
301        private final long staleAfter;
302    
303        /**If true, the data in this object is stale.
304         * The data may still be correct,
305         * but should be treated as an approximation,
306         * and recomputed as soon as possible.
307         * <p>
308         * It is possible for a item previously non-stale that then became 'stale'
309         * to become non-stale again for a limited time
310         * while the system is conserving energy in low-power mode,
311         * thus avoiding the need for some expensive recomputation.
312         * <p>
313         * Trivially-stale instances,
314         * ie that were never fully computed,
315         * never become non-stale again in this way.
316         */
317        public boolean isStale()
318            {
319            // Object was immediately (trivially) stale.
320            // It cannot become non-stale.
321            if(isTriviallyStale()) { return(true); }
322    
323            final long now = System.currentTimeMillis();
324    
325            // Expired/stale.
326            // It may become/stay non-stale for much longer
327            // iff the system is in low-power (temporary energy saving) mode
328            // and even longer for 'bad' exhibits to save wasting too much effort/energy on them.
329            if(staleAfter < now) /* Normally stale. */
330                {
331                final long extraTime = (goodness >= 0) ? CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS :
332                    (2 * CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS); // Keep a bad 'un down for longer...
333                if((staleAfter + extraTime < now) || !GenUtils.mustConservePower())
334                    { return(true); }
335                }
336    
337            // Suspicious stale date much too far in the future, so treat data as stale.
338            if(staleAfter >= now + 4*MAX_AGE_BEFORE_STALE_MS)
339                { return(true); }
340    
341            // Looks OK!
342            return(false);
343            }
344    
345        /**If true, the data in this object is not only stale; it is only even a fast approximation.
346         * The data is probably not worth persisting, for example,
347         * since it was not fully computed and could probably be very quickly recomputed.
348         */
349        public boolean isTriviallyStale()
350            {
351            // Object was immediately (trivially) stale.
352            // It cannot become non-stale.
353            return(staleAfter == 0);
354            }
355    
356        /**Get the basic 'best-before' time.
357         * Once this has passed the data is potentially stale
358         * and isStale() will be true unless the system is in power-conserving mode,
359         * at which point isStale() can be deferred for some considerable time.
360         * <p>
361         * This value is useful to help schedule recomputations most-stale-first.
362         * <p>
363         * For trivially-stale instances this is always zero.
364         */
365        public final long bestBefore() { return(staleAfter); }
366    
367        /**Maximum age (in ms) of instances before considered them stale; strictly positive.
368         * <em>Slightly</em> (much less than a factor of two) different limit value
369         * between class/JVM/mirror instances to spread out recomputations.
370         * <p>
371         * Each instance of this object should pick a random time
372         * between about half of this value and the full value before going stale
373         * in order to spread out refresh computations over a reasonable interval.
374         * Recomputing these values may prove quite expensive.
375         * <p>
376         * This value is selected so that most exhibits actual scores
377         * will not have changed significantly in the given interval;
378         * something of the order of tens of hours to tens of days is good.
379         */
380        public static final int MAX_AGE_BEFORE_STALE_MS = 64 * 3600 * 1000 + Rnd.fastRnd.nextInt(8 * 3600 * 1000);
381    
382        /**Maximum age (in ms) of instances built with all other than Scorer data before considered stale; strictly positive.
383         * Should be (much) shorter than MAX_AGE_BEFORE_STALE_MS,
384         * and much less than 24 hours to force recomputation in favour of the full value
385         * in a full daily cycle if sufficient energy becomes available.
386         * <p>
387         * This allows us to get some rating and ranking done in the absence of
388         * heavy-weight AI computations when low on power,
389         * with this interval acting as a cache of the partially-computed value.
390         */
391        public static final int MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS = Math.min(12 * 3600 * 1000 + Rnd.fastRnd.nextInt(1 * 3600 * 1000),
392                MAX_AGE_BEFORE_STALE_MS/4);
393    
394    
395        /**The "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
396         * This is a composite of many factors,
397         * including a small random element.
398         * <p>
399         * Has enough significant digits to allow a total ordering
400         * even over a very large number of exhibits.
401         * <p>
402         * This is nominally logically thought of in the range [-1.0, +1.0].
403         */
404        private final int goodness;
405    
406        /**Get the "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
407         * This gets the value as a raw int value;
408         * useful for fast compares, sorts, etc.
409         */
410        public int getGoodness()
411            { return(goodness); }
412    
413        /**Get the "goodness" score as a normalised float in the range -1 (bad) to +1 (good), 0 is neutral.
414         * This is represented as a float which can lose precision/information,
415         * but may be easier to work with for display and some computations.
416         */
417        public float getGoodnessAsFloat()
418            {
419            return((float) (((double) goodness) / Integer.MAX_VALUE));
420            }
421    
422        /**Find out if this exhibit is rated "good"/popular or not.
423         * Returns TRUE if rated good, FALSE if bad,
424         * null if not significantly either (ie too close to neutral) or if unknown.
425         * <p>
426         * This is based on the computed "goodness" score.
427         */
428        public Boolean isGood()
429            {
430            if((goodness < GOODBAD_LIMIT_INT) && (goodness > -GOODBAD_LIMIT_INT)) { return(null); }
431            return((goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
432            }
433    
434        /**Maximum weighting for vox pop user votes.
435         * Should be large but possibly not overwhelming,
436         * eg because of pranks to attempt to "fix" the votes.
437         * <p>
438         * The vote element can count for or against an exhibit (ie be +ve or -ve),
439         * and an absence of any votes may tend to dilute goodness back towards neutral.
440         */
441        public static final float POP_VOTES = 0.7f;
442    
443        /**Maximum weighting from correlated voting on related exhibits.
444         * Possibly quite a crude measure,
445         * though will be more valuable with more data available,
446         * and so if all our required samples are in place,
447         * then we'll give it a rating that should be visible to users,
448         * and more significant than just newness, for example.
449         */
450        public static final float POP_VOTE_CORR = 0.1331f;
451    
452        /**Maximum weighting from Scorer judgement of exhibit content.
453         * This AI-based mechanism inspects the content of each exhibit,
454         * and is driven to get the best possible fit with voting data.
455         * <p>
456         * We cap this at a higher weighting than the metadata-driven statistical methods,
457         * but lower than the real votes.
458         */
459        public static final float POP_AI_SCORER = Math.min(2*POP_VOTE_CORR, POP_VOTES/2);
460    
461        /**Maximum weighting for exhibit recently viewed or downloaded.
462         * Possibly quite a crude measure,
463         * though will be more valuable with more data available,
464         * and so if all our required samples are in place,
465         * we'll give it a rating that should be visible to users,
466         * and more significant than just newness, for example.
467         * <p>
468         * We make this a little less important than vote correlations.
469         */
470        public static final float POP_RECENT_ACCESS = POP_VOTE_CORR * 0.73f;
471    
472        /**Weighting/correlation of "newness" component of popularity.
473         * This is only applied as a positive (proportional) factor
474         * for exhibits considered "new",
475         * since newer items are potentially more interesting.
476         * <p>
477         * This factor should be relatively small,
478         * not overwhelming manually-set weightings for example,
479         * and mainly intended as a tie-breaker.
480         */
481        public static final float POP_CORR_NEWNESS = 0.001f;
482    
483        /**Weighting/correlation of "random" component of popularity.
484         * Exists to help break ties and perturb the popularity ordering a little.
485         * Should be very small so as not to outweigh any genuine factor,
486         * but more-or-less force a total ordering.
487         * <p>
488         * This factor falls equally either side of zero, is a +/- limit.
489         */
490        public static final float POP_CORR_RANDOM = 0.00001f;
491    
492        /**Weighting/correlation of has-description component of popularity.
493         * Applied to items that have a description
494         * as slightly more interesting than those without.
495         * <p>
496         * Should generally be greater than the randomness weighting,
497         * and probably less than the newness rating.
498         */
499        public static final float POP_CORR_HASDESC = Math.min(POP_CORR_RANDOM * 10,
500                                                              POP_CORR_NEWNESS / 3);
501    
502        /**Weighting factor for an item with a specific description. */
503        private static final Factor FACTOR_HASDESC =
504                new Factor(1, POP_CORR_HASDESC);
505    
506        /**Weighting factor for an item with a generic description.
507         * Carries less weight than having a specific description.
508         * <p>
509         * May apply to having location or AKA or description text.
510         */
511        private static final Factor FACTOR_HASGENERICDESC =
512                new Factor(1, POP_CORR_HASDESC/2);
513    
514        /**Single (immutable) component of multi-factor value.
515         * Consists of:
516         * <ul>
517         * <li>a goodness value [-1,+1]
518         * <li>a correlation/confidence rating [-1,+1]
519         * </ul>
520         * <p>
521         * The value added into the final value is proportional to:
522         * <samp>(goodness * correlation) / |correlation|</samp>.
523         * <p>
524         * Uses float values since there is probably not even that much
525         * real precision available, and doing so probably saves space and time.
526         * Double values can be passed to the constructors,
527         * but the extra precision is discarded.
528         */
529        static public final class Factor
530            {
531            public Factor(final float goodness, final float correlation)
532                { this(goodness, correlation, null); }
533            public Factor(final float goodness, final float correlation, final String comment)
534                {
535                if((!(goodness >= -1) && (goodness <= +1)) ||
536                   (!(correlation >= -1) && (correlation <= +1)))
537                    { throw new IllegalArgumentException(); }
538                this.goodness = goodness;
539                this.correlation = correlation;
540                this.comment = comment;
541                }
542            /**Goodness contributed by this factor: range [-1,+1]. */
543            public final float goodness;
544            /**Correlation of this value with overall result: range [-1,+1]. */
545            public final float correlation;
546            /**Optional comment fields; can be null. */
547            public final String comment;
548    
549            /**We will accept double values, but discard the extra precision. */
550            public Factor(final double goodness, final double correlation)
551                { this((float) goodness, (float) correlation, null); }
552            /**We will accept double values, but discard the extra precision. */
553            public Factor(final double goodness, final double correlation, final String comment)
554                { this((float) goodness, (float) correlation, comment); }
555    
556            /**Convert to human-readable form. */
557            @Override
558            public String toString()
559                {
560                final StringBuilder sb = new StringBuilder();
561                sb.append("Factor");
562                sb.append(":goodness=").append(goodness);
563                sb.append(":correlation=").append(correlation);
564                if(comment != null)
565                    { sb.append(":comment=").append(comment); }
566                return(sb.toString());
567                }
568            }
569    
570        /**Zero factor; zero weight, zero goodness. */
571        public static final Factor FACTOR_ZERO = new Factor(0, 0);
572    
573        /**Compute random "goodness" factor symmetrical about zero; never null. */
574        private static Factor _computeRandomGoodnessFactor()
575            {
576            return new Factor((Rnd.fastRnd.nextBoolean() ?
577                                  Rnd.fastRnd.nextDouble() : -Rnd.fastRnd.nextDouble()),
578                              POP_CORR_RANDOM);
579            }
580    
581        /**Compute newness bonus (if any); never null.
582         * If not new enough to be considered "new" then a zero factor is returned.
583         * <p>
584         * This is not necessarily precisely aligned with the "isExhibitNew()" result,
585         * though except in race conditions they should be aligned.
586         *
587         * @param esa  exhibit data; never null
588         * @return factor, positive but low weight if exhibit "new",
589         *     else zero with zero weighting
590         */
591        private static Factor _computeNewnessBonusFactor(final ExhibitStaticAttr esa)
592            {
593            // Also for some wobble is server time vs new-exhibit timestamps.
594            final long age = Math.max(0, System.currentTimeMillis() - esa.timestamp);
595            if(age <= CoreConsts.MAX_EXHIBIT_AGE_NEW_MS)
596                {
597                // A very new exhibit has a goodness of nearly 1.0.
598                // An exhibit only just "new" has a goodness of just over 0.0.
599                final double goodness = (CoreConsts.MAX_EXHIBIT_AGE_NEW_MS - age) /
600                        (double) CoreConsts.MAX_EXHIBIT_AGE_NEW_MS;
601                return(new Factor(goodness, POP_CORR_NEWNESS));
602                }
603    
604            // Exhibit not "new", so zero goodness but the same weight.
605            return(_NOT_NEW);
606            }
607    
608        /**Contribution for goodness of non-new exhibits; zero goodness but the same "newness" weight. */
609        private static final Factor _NOT_NEW = new Factor(0, POP_CORR_NEWNESS);
610    
611        /**Compute full goodness factor, using full data source if available; never null.
612         *
613         * @param esa  exhibit data; never null
614         * @param initialComponents  initial set of (data-dependent) Factor elements;
615         *     possibly empty but never null
616         */
617        private static Factor _computeTotalGoodnessFactor(final ExhibitStaticAttr esa,
618                                                          final GenProps gp,
619                                                          final AllExhibitProperties aep,
620                                                          final List<Factor> initialComponents)
621            {
622            // Compute a list of components of goodness Factors.
623            // Leave enough space to avoid need for expansion in most (time-critical) cases.
624            final List<Factor> components = new ArrayList<Factor>(5 + initialComponents.size());
625            components.addAll(initialComponents);
626    
627            // Compute the fast components that do not require a
628            // (potentially slow) data source or any other non-intrinsic data.
629            components.add(_computeRandomGoodnessFactor());
630            components.add(_computeNewnessBonusFactor(esa));
631    
632            // Calculations that require a non-default GenProps to be effective.
633            // Factor in the author weight, if any.
634            final Name.ExhibitFull exhibitName = esa.getExhibitFullName();
635            final Byte aW = gp.getPopWeightForAuth(ExhibitName.getAuthorComponent(exhibitName));
636            if(aW != null)
637                { components.add(new Factor(1, aW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
638            // Factor in the file-type weight, if any.
639            final Byte tW = gp.getPopWeightForType(ExhibitName.getExtensionComponent(exhibitName));
640            if(tW != null)
641                { components.add(new Factor(1, tW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
642            // Penalise one-word exhibits (ignoring number-in-series) for not being descriptive enough.
643            final Set<String> noAttrWords = Collections.emptySet();
644            if(TextUtils.indexOf(exhibitName.getShortName().getMainWordsComponent(noAttrWords), ExhibitName.WORD_SEP) == -1)
645                { components.add(new Factor(-1, POP_CORR_HASDESC)); }
646            // Slight penalty to exhibits that *cannot* show thumbnails, since images are most accessible.
647            if(!(ExhibitMIME.getInputFileType(esa.getCharSequence())).canPossiblyCreateThumbnailOfSameMIMEType())
648                { components.add(new Factor(-1, POP_CORR_HASDESC/3)); }
649    
650            // Do the full (but potentially-expensive) goodness calcs
651            // iff we have an apparently-working data source.
652            if(aep != null)
653                {
654                // Factor in weight from attribute words.
655                final Enumeration<?> aWEn =
656                    ExhibitName.getAttributeWordsComponentEnumeration(exhibitName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
657                if(aWEn != null)
658                    {
659                    while(aWEn.hasMoreElements())
660                        {
661                        final String attrWord = (String) aWEn.nextElement();
662                        final Byte attrW = gp.getPopWeightForAttr(attrWord);
663                        if(attrW != null)
664                            { components.add(new Factor(1, attrW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
665                        }
666                    }
667    
668                // Factor in weight from description, if any.
669                final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
670                if(epl.hasDescription())
671                    { components.add(FACTOR_HASDESC); }
672    
673                // Having a location is a generic description...
674                if(Location.NONE != aep.getLocation(exhibitName))
675                    { components.add(FACTOR_HASGENERICDESC); }
676                }
677    
678            // Now compute the weighted goodness value...
679            double g = 0;
680            double w = 0;
681            for(final Iterator<Factor> it = components.iterator(); it.hasNext(); )
682                {
683                final Factor f = it.next();
684                w += Math.abs(f.correlation);
685                g += f.goodness * f.correlation;
686                }
687    
688            // Adjust so that final weight is 1.0,
689            // and the cumulative value is divided by at least that.
690            final double wPrime = Math.max(1.0, w);
691            return(new Factor(g / wPrime, 1.0));
692            }
693    
694    
695        /**Minimum sample period to even out short cycles in Web access patterns, in ms; strictly positive.
696         * Must usually be a multiple (and minimum) of a week to allow for most common
697         * daily and weekly access patterns to be smoothed as well as possible.
698         */
699        private static final long _SAMPLE_CYCLE_PERIOD_MINOR_MS = 7 * 24 * 3600 * 1000L;
700    
701        /**The longest cycle time we will look for in historical data, in ms; strictly positive.
702         * This is usually a year to look for seasonal patterns,
703         * eg what is popular at Christmas or in the summer.
704         */
705        private static final long _SAMPLE_CYCLE_PERIOD_MAJOR_MS = 365242L * 24 * 3600; // 365.242 days in a year.
706    
707        /**Number of VLONG samples corresponding to annual cycles in data. */
708        private static final int _SEASONAL_VLONG_CYCLE_PERIODS = Math.max(1, (int)
709                    Math.round(_SAMPLE_CYCLE_PERIOD_MAJOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
710    
711        /**Minimum number of VLONG samples to smooth out daily/weekly cycles in data. */
712        private static final int _MIN_VLONG_SAMPLE_PERIODS = Math.max(1, (int)
713                    Math.round(_SAMPLE_CYCLE_PERIOD_MINOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
714    
715    
716        /**Compute Factor(s) that depend on access data.
717         * If this cannot fetch some of the data it requires due to an IOException
718         * (not the same as fetching data successfully that has no entries)
719         * then the exception is propagated to the caller.
720         * (Normally this results in the EPCM item for which this is called being marked stale
721         * so that we can try again when the data is available.)
722         * <p>
723         * The comment fields of the factors produced are
724         * the event names from which they were generated.
725         * <p>
726         * This is public to assist testing.
727         *
728         * @throws IOException  if this was unable to fetch some data required
729         */
730        public static Collection<Factor> calcAccessFactors(final Name.ExhibitFull exhibitName,
731                                                           final AllExhibitProperties aep,
732                                                           final BasicVarMgrInterface vars)
733            throws IOException
734            {
735            if((exhibitName == null) || (aep == null) || (vars == null))
736                { throw new IllegalArgumentException(); }
737    
738            // Get short name of exhibit as String,
739            // by which most stats are recorded.
740            final String shortNameAsString = exhibitName.getShortName().toString();
741    
742            // Factor in weight from access stats...
743            final int nExhibits = aep.aeid.size();
744    
745            // Threshold at which usage is considered significantly "good".
746            final int rankLimit = nExhibits / 3;
747    
748            // Set of slots/intervals in which we will sample access-pattern stats.
749            // We let the routine force some solid coverage at the near end.
750            final BitSet whichIntervals = _createSampleBitSet(0);
751    
752            // Count the full set of samples that we are looking for.
753            final int desiredSamples = whichIntervals.cardinality();
754    
755            final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
756    
757    
758    
759            // Access stats event definitions which we want and that record count by short exhibit name.
760            final List<SimpleVariableDefinition> eventVarNames = new ArrayList<SimpleVariableDefinition>(3);
761            eventVarNames.add(SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW);
762            eventVarNames.add(SystemVariables.ACCESSPATTERN_CLICKTHROUGH);
763            // Only look for downloads if not automatically excluded from being counted.
764            // (Note that this definition may vary between data being collected and being used.)
765            final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
766            if((esa != null) && !ImageUtils.canBeOwnThumbnail(esa))
767                { eventVarNames.add(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD); }
768    
769            // Create/size the results container...
770            final List<Factor> components = new ArrayList<Factor>(eventVarNames.size());
771    
772            // Retrieve data for each event...
773            // Currently we we give all intervals the same weighting.
774            // This is especially dodgy for the current interval, which may be very short and unrepresentative.
775            // We give total weighting in proportion to the square root of the number of samples obtained.
776            // Shuffle the order in which we fetch the data to improve concurrency a little.
777            Collections.shuffle(eventVarNames, Rnd.fastRnd);
778            for(final SimpleVariableDefinition def : eventVarNames)
779                {
780                assert(def.isEvent() && (def.getType() == SimpleVariableDefinition.TYPE_STRING));
781                final EventVariableValue evvs[] = vars.getEventValues(def,
782                                                                      EventPeriod.VLONG,
783                                                                      prevInterval,
784                                                                      whichIntervals);
785                assert(evvs != null); // May be empty, but never null.
786                // Count the number of results we actually retrieved.
787                int resultsFound = 0;
788                double totalScore = 0; // Cumulative score so far...
789                for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
790                    {
791                    if(i >= evvs.length) { break; /* Run out of answers... */ }
792    
793                    final EventVariableValue evv = evvs[i];
794    
795                    // Ignore null values, and those with *no* events at all for any value.
796                    if(evv == null) { continue; }
797                    if(evv.getTotalEventCount() < 1) { continue; }
798    
799                    // OK, have a real result; we can see if this exhibit ranks well or not.
800                    ++resultsFound;
801    
802                    // If the event bucket may have "overflowed" (too many distinct event values)
803                    // then we should probably ignore count==1 entries as unreliable/incomplete.
804                    final boolean possibleOverflow =
805                            (evv.getTotalDistinctValues() >= evv.getDef().getMaxDiffEventCount());
806    
807                    final int rank = evv.getRank(shortNameAsString);
808                    if((rank < rankLimit) &&
809                       (!possibleOverflow || (evv.getCount(shortNameAsString) > 1)))
810                        { totalScore += (rankLimit - rank) / (double) rankLimit; }
811                    }
812    
813                // We should always get at least one response.
814                // Maybe we should abort with an error if not.
815    //            if(resultsFound < 1) { throw new IOException("not even current results found"); }
816    
817                // Compute goodness score.
818                final double g = totalScore / Math.max(1, resultsFound);
819                // Compute confidence weighting based on number of samples.
820                final double confidence = Math.sqrt(resultsFound / (double) desiredSamples);
821    //if(IsDebug.isDebug) { System.out.println("***EPCM._calcAccessFactors(): score/resultsFound/confidence/exhibit/event = " +g+", " +resultsFound+", " +confidence+", " +exhibitName+", " +def.getName()); }
822                components.add(new Factor(g, POP_RECENT_ACCESS * confidence, def.getName()));
823                }
824    
825            return(components);
826            }
827    
828        /**If true then include the "all" buckets in our calculations. */
829        private static final boolean USE_ALL_BUCKET = false;
830    
831        /**Cache of semi-fixed interval sample set for power-conserving (low-power) mode; never null.
832         * Cleared (set to null) whenever not in conservation mode.
833         * <p>
834         * Any BitSet stored here is never altered once assigned.
835         * <p>
836         * Used only when conserving and when the requested minNearMs is no larger than our forced level.
837         * <p>
838         * Fixed (small) upper bound on storage required.
839         * <p>
840         * Marked null for atomic lock-free access.
841         */
842        private static transient volatile BitSet cachedLPSampleSet;
843    
844        /**Compute exactly one Factor that depends on explicit user votes for specified exhibit; never null.
845         * Because we expect voting to be sparse,
846         * we total up votes across all sample intervals to reduce noise,
847         * rather than taking each sample separately,
848         * and do various other calculations a little differently to dense measures.
849         * <p>
850         * If this cannot fetch some of the data it requires due to an IOException
851         * (not the same as fetching data successfully that has no entries)
852         * then the exception is propagated to the caller.
853         * (Normally this results in the EPCM item for which this is called being
854         * marked stale so that we can try again when the data is available.)
855         * <p>
856         * This is public to assist testing and computation of correlations.
857         * <p>
858         * The returned factor's goodness can range from -1 to +1,
859         * and confidence from 0 to +1;
860         * any scaling required will have to be applied elsewhere.
861         *
862         * @param minNearMs minimum interval (ms) at near end
863         *     to include all available samples for
864         *
865         * @throws IOException  if this was unable to fetch some data required
866         */
867        public static Factor calcVoteFactor(final Name.ExhibitFull exhibitName,
868                                            final AllExhibitProperties aep,
869                                            final BasicVarMgrInterface vars,
870                                            final long minNearMs)
871            throws IOException
872            {
873            if((exhibitName == null) || (aep == null) || (vars == null))
874                { throw new IllegalArgumentException(); }
875    
876            if(!aep.aeid.isPresent(exhibitName))
877                { throw new IllegalArgumentException("no such exhibit"); }
878    
879            // Get short name of exhibit,
880            // by which most stats are recorded.
881            final String shortName = exhibitName.getShortName().toString();
882    
883            // Set of slots/intervals in which we will sample voting behaviour.
884            // Insist on over a month's worth of near-term votes if possible,
885            // significantly less if temporarily conserving power.
886            final boolean conservingPower = GenUtils.mustConservePower();
887            final long forcedMinNearMs = conservingPower ?
888                    (7 * 24 * 3600 * 1000L) : (33 * 24 * 3600 * 1000L);
889            final BitSet whichIntervals;
890            if(!conservingPower)
891                {
892                // Beefy new set each time when not conserving...
893                whichIntervals = _createSampleBitSet(Math.max(minNearMs, forcedMinNearMs));
894                // Clear any cached (low-power) set...
895                cachedLPSampleSet = null;
896                }
897            else
898                {
899                if(minNearMs > forcedMinNearMs)
900                    {
901                    // Cannot use cached set if requested minimum near set larger than forced near set...
902                    whichIntervals = _createSampleBitSet(minNearMs);
903                    }
904                else
905                    {
906                    BitSet cached;
907                    // Use cached set if extant...
908                    if(null == (cached = cachedLPSampleSet))
909                        {
910                        // ... else create new fixed minimum-size set and cache it.
911                        cachedLPSampleSet = cached = _createSampleBitSet(forcedMinNearMs);
912                        }
913                    assert(null != cached);
914                    whichIntervals = (BitSet) cached.clone(); // Defensive copy to protect the cache.
915                    }
916                }
917    
918            // Count the full set of samples that we are looking for
919            // optionally including the "all" set assumed always available.
920            final int desiredSamples = whichIntervals.cardinality() + (USE_ALL_BUCKET?1:0);
921    
922            // Last available complete interval/period.
923            final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
924    
925            // Get votes "for" this exhibit.
926            final EventVariableValue vPro[] = vars.getEventValues(SystemVariables.VOTE_PRO,
927                                                                  EventPeriod.VLONG,
928                                                                  prevInterval,
929                                                                  whichIntervals);
930            // Get votes "against" this exhibit.
931            final EventVariableValue vCon[] = vars.getEventValues(SystemVariables.VOTE_CON,
932                                                                  EventPeriod.VLONG,
933                                                                  prevInterval,
934                                                                  whichIntervals);
935    
936            // Set of all event values seen in all sampled slots.
937            // We use a HashSet for speed.
938            final HashSet<Object> allVoteValues = new HashSet<Object>();
939            // Count of all votes in all slots.
940            int allVotesCount = 0;
941    
942            int resultsFound = 0; // Total results usable including "all" slot.
943            int totalProVotesThisEx = 0; // Total votes for this exhibit in the sample slots.
944            int totalConVotesThisEx = 0; // Total votes against this exhibit in the sample slots.
945    
946            if(USE_ALL_BUCKET)
947                {
948                // Get the "all" slot votes "for"...
949                final EventVariableValue[] aPro = vars.getEventValues(SystemVariables.VOTE_PRO,
950                                            EventPeriod.VLONG,
951                                            0,
952                                            null);
953    
954                // Get the "all" slot votes "against"...
955                final EventVariableValue[] aCon = vars.getEventValues(SystemVariables.VOTE_CON,
956                                            EventPeriod.VLONG,
957                                            0,
958                                            null);
959                // Regard the "all" slot as always present if we're using it
960                // (which they logically are, even if not necessarily programmatically).
961                ++resultsFound;
962                if((aPro.length > 0) && (aPro[0] != null))
963                    {
964                    allVoteValues.addAll(aPro[0].getDistinctValuesInRankOrder());
965                    allVotesCount += aPro[0].getTotalEventCount();
966                    totalProVotesThisEx += aPro[0].getCount(shortName);
967                    }
968                if((aCon.length > 0) && (aCon[0] != null))
969                    {
970                    allVoteValues.addAll(aCon[0].getDistinctValuesInRankOrder());
971                    allVotesCount += aCon[0].getTotalEventCount();
972                    totalConVotesThisEx += aCon[0].getCount(shortName);
973                    }
974                }
975    
976            for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
977                {
978                final EventVariableValue pro = (i < vPro.length) ? vPro[i] : null;
979                final EventVariableValue con = (i < vCon.length) ? vCon[i] : null;
980    
981                // Ignore slot where no votes were collected either way for any exhibit at all.
982                final int proTEC = (pro == null) ? 0 : pro.getTotalEventCount();
983                final int conTEC = (con == null) ? 0 : con.getTotalEventCount();
984                final int tEC = proTEC + conTEC;
985                if(tEC < 1) { continue; }
986    
987                // OK, have a real result; we can see if this exhibit has attracted any votes.
988                ++resultsFound;
989    
990                // Capture all the distinct event values in this slot
991                // so that we know all the exhibits that have attracted votes
992                // in the slots that we have sampled.
993                if(pro != null) { allVoteValues.addAll(pro.getDistinctValuesInRankOrder()); }
994                if(con != null) { allVoteValues.addAll(con.getDistinctValuesInRankOrder()); }
995                allVotesCount += tEC;
996    
997                // Sum up the total votes either way for this exhibit.
998                if(pro != null) { totalProVotesThisEx += pro.getCount(shortName); }
999                if(con != null) { totalConVotesThisEx += con.getCount(shortName); }
1000                }
1001    
1002            // Take a simplistic view that the overall vote
1003            // is simply the mean of all those cast.
1004            final int totalScoreThisEx = totalProVotesThisEx - totalConVotesThisEx;
1005            final int totalVotesCastThisEx = totalProVotesThisEx + totalConVotesThisEx;
1006    
1007            // Compute goodness score.
1008            final double g = Math.min(1, Math.max(-1, totalScoreThisEx / (double) Math.max(1, totalVotesCastThisEx)));
1009    
1010            // Compute average number of votes per voted-for-exhibit
1011            // over all the sampled slots with at least one vote...
1012            // FIXME: should re-do this to allow for "lost" vote events,
1013            // else we end up overestimating the votes-per-voted-for-exhibit value
1014            // and this under-rating even the most-voted-for exhibit(s).
1015            final double meanVotesPerEx = allVotesCount / Math.max(1.0, allVoteValues.size());
1016    
1017    //System.out.println("***EPCM.calcVoteFactor(): allVotesCount = " + allVotesCount);
1018    //System.out.println("***EPCM.calcVoteFactor(): allVoteValues.size() = " + allVoteValues.size());
1019    //System.out.println("***EPCM.calcVoteFactor(): totalVotesCastThisEx = " + totalVotesCastThisEx);
1020    //System.out.println("***EPCM.calcVoteFactor(): totalScoreThisEx = " + totalScoreThisEx);
1021    //System.out.println("***EPCM.calcVoteFactor(): meanVotesPerEx = " + meanVotesPerEx);
1022    //System.out.println("***EPCM.calcVoteFactor(): desiredSamples = " + desiredSamples + " : " + whichIntervals);
1023    
1024            // Confidence based on number of votes received for this exhibit
1025            // compared to the mean number of votes for each voted-for-exhibit,
1026            // using normal variance-reduction (sqrt) policy.
1027            // This value may exceed 1.0 to boost overall result confidence
1028            // where this exhibit has received an unusually high number of votes.
1029            final boolean enoughVotesCast = totalVotesCastThisEx >= meanVotesPerEx;
1030            final double confidenceThisExhibit = enoughVotesCast ? 1 :
1031                Math.sqrt(totalVotesCastThisEx / meanVotesPerEx);
1032            // Compute confidence weighting based on number of usable samples
1033            // and the total number of votes we got compared to the mean vote count per exhibit in sampled slots.
1034            final double confidence = Math.min(1.0, Math.sqrt(resultsFound / (double) desiredSamples) * confidenceThisExhibit);
1035    //if(IsDebug.isDebug) { System.out.println("***EPCM.calcVoteFactor(): score/resultsFound/confidence/exhibit = " +g+", " +resultsFound+", " +confidence+", " +exhibitName); }
1036            // For low-confidence (low-vote) exhibits, tone down the goodness too.
1037            return(new Factor(enoughVotesCast ? g : (g * confidenceThisExhibit), confidence, "Votes: " + shortName));
1038            }
1039    
1040    
1041        /**If true, use ALL available votes rather than a sampling in _computeSampleBitSet().
1042         * If this is false then a caller of _computeSampleBitSet() can get a similar effect
1043         * with a large minNearMs value.
1044         */
1045        private static final boolean USE_ALL_VOTES = false;
1046    
1047        /**Create BitSet of samples of event history to take; never null nor empty.
1048         * Creates a BitSet of length up to SystemVariables.EVENT_SAMPLES_RETAINED slots,
1049         * intended to be used with the EventPeriod.VLONG interval size,
1050         * with index 0 indicating the <em>previous</em> slot (ie no use of the current period).
1051         * <p>
1052         * This includes:
1053         * <ul>
1054         * <li>near-end samples designed to smooth out short (daily/weekly) cycles,
1055         * <li>annual/seasonal look-ahead to predict what may be of interest,
1056         * <li>a random sampling of points throughout the event history for robustness.
1057         * </ul>
1058         *
1059         * @param minNearMs  if positive, the minimum number of recent ms that
1060         *     the sampling should cover (else an internal minimum will be imposed)
1061         *
1062         * @return non-null, non-empty new private BitSet
1063         */
1064        private static BitSet _createSampleBitSet(final long minNearMs)
1065            {
1066            if(USE_ALL_VOTES)
1067                {
1068                // Sample every available vote history point for maximum accuracy
1069                // at a possible performance cost.
1070                final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1071                whichIntervals.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
1072                return(whichIntervals);
1073                }
1074    
1075            // Number of our longest access periods needed to cover
1076            // "the minimum cycle" to smooth out short-term access patterns.
1077            // Typically at least 7 days to smooth out daily and weekly cycles.
1078            // We assume that our short cycle *does* fit in our data window.
1079            assert(_MIN_VLONG_SAMPLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1080    
1081            // Number of intervals in our long cycle,
1082            // typically looking for seasonal patterns.
1083            // Typically 365 days to pick up yearly festivals, seasonal patterns, etc.
1084            // We assume that our long cycle *does* fit in our data window.
1085            assert(_SEASONAL_VLONG_CYCLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1086    
1087            // We cut down the number of samples when (temporarily) conserving energy.
1088            // This makes our result more noisy, but not biased.
1089            final boolean reduceSampling = GenUtils.mustConservePower();
1090    
1091            // Now calculate a set of sample periods to use; index 0 is the previous interval.
1092            // We factor in:
1093            //   * a window at the short end to catch recent activity.
1094            //   * a window around our long cycle to catch seasonal patterns.
1095            //   * a large random sampling of the full window to be robust.
1096            // We take a little more than the minimum window at the short end to help predictive ability.
1097            // We DO NOT include the current interval,
1098            // since it is of variable quality and may be expensive to fetch.
1099            final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1100            // Near-end samples, NOT including the current period.
1101            // The caller can force this to be extended to cover the period they are interested in.
1102            final int blockSampleSize = reduceSampling ? _MIN_VLONG_SAMPLE_PERIODS : (4*_MIN_VLONG_SAMPLE_PERIODS);
1103            whichIntervals.set(0, Math.min(SystemVariables.EVENT_SAMPLES_RETAINED-1,
1104                    Math.max(blockSampleSize, (int)(minNearMs/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)) +
1105                    1+(MAX_AGE_BEFORE_STALE_MS/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)));
1106            // Predictive (we hope) seasonal samples; skew this forward for predictive value.
1107            final int seasonOffset = _SEASONAL_VLONG_CYCLE_PERIODS - _MIN_VLONG_SAMPLE_PERIODS;
1108            whichIntervals.set(seasonOffset, seasonOffset + (reduceSampling ? (2*_MIN_VLONG_SAMPLE_PERIODS) : (3*_MIN_VLONG_SAMPLE_PERIODS)));
1109            // One or more random samples;
1110            // take an extensive sampling of points otherwise not covered.
1111            for(int i = 1+Math.max(blockSampleSize, SystemVariables.EVENT_SAMPLES_RETAINED >> (reduceSampling ? 5 : 2)); --i >= 0; )
1112                { _chooseRandomSlot(Rnd.fastRnd, whichIntervals, true); }
1113            // Chose a new one using a good generator with some real entropy in,
1114            // thus hard to guess (but expensive),
1115            // but only if not conserving power (since we're not going to lose money on this!).
1116            if(!reduceSampling) { _chooseRandomSlot(Rnd.goodRnd, whichIntervals, true); }
1117    
1118            return(whichIntervals);
1119            }
1120    
1121        /**Set a random bit in the BitSet to take a random sample.
1122         * Sets a bit at random.
1123         * <p>
1124         * It is possible (though may be expensive) to insist that a new (previously false)
1125         * bit is set, but only if (much) less than SystemVariables.EVENT_SAMPLES_RETAINED bits
1126         * are already true.
1127         *
1128         * @param rnd  random number source; never null
1129         * @param whichIntervals  the BitSet to set a random bit in; never null
1130         * @param forceNew  if true, ensure that we set a new bit ie one that was false
1131         *     unless at least half of all possible bits have already been set
1132         */
1133        private static void _chooseRandomSlot(final Random rnd,
1134                                              final BitSet whichIntervals,
1135                                              final boolean forceNew)
1136            {
1137            do
1138                {
1139                final int bitIndex = rnd.nextInt(SystemVariables.EVENT_SAMPLES_RETAINED);
1140                if(forceNew && whichIntervals.get(bitIndex) &&
1141                        (whichIntervals.cardinality() < (SystemVariables.EVENT_SAMPLES_RETAINED/2)))
1142                    { continue; /* Selected bit already set; try again... */ }
1143    
1144                // OK, actually set the bit and stop!
1145                whichIntervals.set(bitIndex);
1146                break;
1147    
1148                } while(forceNew);
1149            }
1150    
1151        /**Our serial version... */
1152        private static final long serialVersionUID = 8518817446315221597L;
1153    
1154    //    /**Deserialise. */
1155    //    private void readObject(ObjectInputStream in)
1156    //        throws IOException, ClassNotFoundException
1157    //        {
1158    //        in.defaultReadObject();
1159    //        validateObject(); // Validate state immediately.
1160    //        }
1161    
1162        /**Deserialise: use constructor for validation, defensive copying, etc.
1163         * Replace all trivially-stale "empty" values with a single shared value.
1164         * <p>
1165         * <strong>NOTE: this may not always preserve all values as expected in future.</strong>
1166         */
1167        protected Object readResolve()
1168            // throws ObjectStreamException
1169            {
1170            // Eliminate redundant deserialised copies of TRIVIAL_NEUTRAL.
1171            if(isTriviallyStale() && (goodness == 0)) { return(TRIVIAL_NEUTRAL); }
1172    
1173            // Construct new instance of object in normal defensive way.
1174            return(new ExhibitPropsComputableMutable(staleAfter, goodness));
1175            }
1176    
1177        /**Serialise: replace all trivially-stale values being serialised with TRIVIAL_NEUTRAL.
1178         * We save all other (non-trivially-stale) values as-is, even if stale,
1179         * because they may have taken significant resources to compute
1180         * and may still be much more accurate than any fast approximation we could compute.
1181         * <p>
1182         * <strong>NOTE: this is effectively a LOSSY compression mechanism.</strong>
1183         */
1184        protected Object writeReplace()
1185            // throws ObjectStreamException
1186            {
1187            // Don't bother saving trivially-stale values to the stream
1188            // and replace them all with a single value in the output.
1189            if(isTriviallyStale()) { return(TRIVIAL_NEUTRAL); }
1190    
1191            // Serialise/persist this non-trivial value as-is.
1192            return(this);
1193            }
1194    
1195        /**Validate fields/state.
1196         * Called in the constructor and possibly after de-serialising.
1197         * <p>
1198         * Barf if something bad is found.
1199         * (Maybe allow some extra info in debug version.)
1200         */
1201        public void validateObject()
1202            throws InvalidObjectException
1203            {
1204            if(staleAfter < 0)
1205                { throw new InvalidObjectException("bad object: invalid 'staleAfter' time"); }
1206            if(goodness < -Integer.MAX_VALUE)
1207                { throw new InvalidObjectException("bad object: invalid 'goodness' value"); }
1208            }
1209        }