001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.io.IOException;
032 import java.io.InvalidObjectException;
033 import java.io.ObjectInputValidation;
034 import java.io.Serializable;
035 import java.util.ArrayList;
036 import java.util.BitSet;
037 import java.util.Collection;
038 import java.util.Collections;
039 import java.util.Enumeration;
040 import java.util.HashSet;
041 import java.util.Iterator;
042 import java.util.List;
043 import java.util.Random;
044 import java.util.Set;
045
046 import org.hd.d.pg2k.ai.scorer.ScoreAndConf;
047 import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
048 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
049 import org.hd.d.pg2k.svrCore.location.Location;
050 import org.hd.d.pg2k.svrCore.props.GenProps;
051 import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
052 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
053 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
054 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
055 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
056
057 import ORG.hd.d.IsDebug;
058
059
060 /**Immutable (and serialisable) store of all ephemeral computable auxiliary properties of a single exhibit.
061 * These are properties that are computed from exhibit and non-exhibit
062 * data and that need to be periodically recomputed,
063 * such as popularity score.
064 * <p>
065 * This is is called "mutable" because although any one instance is immutable,
066 * successive values retrieved for the same exhibit may be different.
067 * <p>
068 * This data has limited lifetime,
069 * but may be somewhat useful even when stale,
070 * so can be persisted if desired.
071 * <p>
072 * This is designed to be efficient and robust on the wire and in memory,
073 * since these details will be held for each and every exhibit.
074 */
075 public final class ExhibitPropsComputableMutable implements Serializable,
076 ObjectInputValidation
077 {
078 /**Limit of goodness*correlation to consider something (eg category) significantly good or bad. */
079 public static final float GOODBAD_LIMIT = 0.4f;
080
081 /**Limit of goodness*correlation to consider something (eg category) significantly good or bad, is int value. */
082 public static final int GOODBAD_LIMIT_INT = Math.round(GOODBAD_LIMIT * Integer.MAX_VALUE);
083
084 /**Compute a "quick"/approximate value for a specified exhibit; never null.
085 * This uses only values from the exhibit static data
086 * plus the system properties object
087 * to compute a fast approximation to a full value.
088 * <p>
089 * The value is immediately marked as stale
090 * (unless it is possible to compute the exact value fast),
091 * but is useful to generate something quickly if the system is busy,
092 * or at start-up for example.
093 *
094 * @param esa the exhibit to compute properties for; never null
095 * @param gp the system properties; never null
096 *
097 * @return non-null quick approximation
098 */
099 public static ExhibitPropsComputableMutable generateFastApproximation(
100 final ExhibitStaticAttr esa,
101 final GenProps gp)
102 {
103 return(compute(esa, gp, null, null, null, null));
104 }
105
106 /**Compute (accurate) value for a specified exhibit; never null.
107 * This uses values from the exhibit static data
108 * plus the system properties object
109 * plus the data source (and thus might take a long time and be expensive)
110 * to compute an accurate/full value.
111 * <p>
112 * Providing the supplied data source is non-null and functional,
113 * and the GenProps does not appear to be empty (zero timestamp)
114 * then the value will generally not become stale for at least MAX_AGE_BEFORE_STALE_MS/2,
115 * and will become stale in no more than MAX_AGE_BEFORE_STALE_MS ms.
116 * If in low-power mode and/or the AI scorers are not available
117 * then this may ignore them and set a much shorter time to stale,
118 * short enough to force recomputation well within 24 hours
119 * if sufficient power becomes available in that time.
120 * <p>
121 * If the data source is not fully available/functional then
122 * an approximation will be returned instead,
123 * and marked stale to allow recalculation later.
124 *
125 * @param esa the exhibit to compute properties for; never null
126 * @param gp the system properties; never null
127 * @param aep the exhibit properties; can be null
128 * @param vars source of event data; can be null
129 * @param voteCache vote cache; can be null
130 *
131 * @return non-null full value or approximation
132 * depending on the available data sources
133 */
134 public static ExhibitPropsComputableMutable compute(final ExhibitStaticAttr esa,
135 final GenProps gp,
136 final AllExhibitProperties aep,
137 final BasicVarMgrInterface vars,
138 final ExhibitPropsComputableMutableVoteCacheIF voteCache,
139 final ScorerCacheIF scorers)
140 {
141 return(new ExhibitPropsComputableMutable(esa, gp, aep, vars, voteCache, scorers));
142 }
143
144 /**Create an instance with the specified goodness and stale 'best before' time. */
145 private ExhibitPropsComputableMutable(final long staleAfter, final int goodness)
146 {
147 this.staleAfter = staleAfter;
148 this.goodness = goodness;
149
150 // Verify object state.
151 try { validateObject(); }
152 catch(final InvalidObjectException e)
153 { throw new IllegalArgumentException(e.getMessage()); }
154 }
155
156 /**Trivially-stale entirely-neutral instance; not null. */
157 public static final ExhibitPropsComputableMutable TRIVIAL_NEUTRAL = new ExhibitPropsComputableMutable(0, 0);
158
159 /**Create a fully populated ExhibitPropsComputableMutable object.
160 * This is given the data sources from which it can fetch
161 * the exhibit data one or more times to do its computations.
162 * <p>
163 * The data source references are not stored in the object.
164 *
165 * @param esa the exhibit to compute properties for; never null
166 * @param gp the system properties; never null
167 * @param aep the exhibit properties; can be null
168 * @param vars source of event data; can be null
169 * @param scorers source of content-based scoring; can be null
170 * @param voteCache vote cache; can be null
171 */
172 private ExhibitPropsComputableMutable(final ExhibitStaticAttr esa,
173 final GenProps gp,
174 final AllExhibitProperties aep,
175 final BasicVarMgrInterface vars,
176 final ExhibitPropsComputableMutableVoteCacheIF voteCache,
177 final ScorerCacheIF scorers)
178 {
179 if((esa == null) || (gp == null))
180 { throw new IllegalArgumentException(); }
181
182 // Initial set of data-dependent Factor items.
183 final List<Factor> components = new ArrayList<Factor>();
184
185
186 // Calculate factors that depend on access to event histories and other 'live' data.
187 // If an I/O problem if encountered then generate an approximate (and stale) value
188 // with all results gathered up until then.
189 boolean dataAccessTrouble = true; // Note if 'stale'/partial because data not complete...
190 final boolean noScorers = (null == scorers) || GenUtils.mustConserveCPU(); // Avoid Scorer data in low-power mode...
191 if((aep != null) && (vars != null) && (voteCache != null))
192 {
193 try
194 {
195 // Compute synthetic factors based on access stats.
196 components.addAll(calcAccessFactors(esa.getExhibitFullName(), aep, vars));
197
198 // First, compute factor based on explicit voting for this exhibit.
199 final Factor vote = voteCache.calcVoteFactor(esa.getExhibitFullName(), aep, vars);
200 // We use the minimum of the actual and maximum/limit confidence
201 // to realistically boost the vote contribution as much as possible.
202 components.add(new Factor(vote.goodness, Math.min(POP_VOTES, vote.correlation), vote.comment));
203
204 // Compute factors based on vote correlations, etc.
205 components.addAll(calcCorrelationFactors(esa, aep, vars, voteCache));
206
207 if(!noScorers)
208 {
209 // Compute factor, if any, from AI Scorers, weighting by confidence.
210 // We allow possibly-slightly-stale Scorer values (assumed still to be useful)
211 // so as not to be pole-axed whenever the date/period rolls.
212 // At start-up we may simply not have all Scorers loaded and assessed.
213 final ScoreAndConf compositeScoreAndConf = scorers.computeCompositeScoreAndConfidence(esa.getExhibitFullName(), true);
214 // Only bother even computing the Factor if we have a non-zero confidence...
215 if(compositeScoreAndConf.confidence != 0)
216 {
217 // We use the minimum of the actual and maximum/limit confidence
218 // to realistically boost the Scorer contribution as much as possible.
219 final Factor scorerFactor = new Factor(compositeScoreAndConf.score / (float) ScoreAndConf.MAX,
220 Math.min(POP_AI_SCORER, compositeScoreAndConf.confidence / (float) ScoreAndConf.MAX));
221 components.add(scorerFactor);
222 if(IsDebug.isDebug) { System.out.println("[EPCM: scorerFactor=" + scorerFactor + " for " +esa.getCharSequence() + ".]"); }
223 }
224 }
225
226 // Managed to gather all the data required.
227 dataAccessTrouble = false;
228 }
229 catch(final IOException e) { /* Absorb error. */ }
230 }
231
232
233 // Compute the full goodness value
234 // and scale it to the form we hold it internally.
235 final Factor gf = _computeTotalGoodnessFactor(esa, gp, aep, components);
236 goodness = Math.max(-Integer.MAX_VALUE, Math.min(Integer.MAX_VALUE,
237 Math.round(Integer.MAX_VALUE * gf.goodness)));
238
239 // Data is marked stale if any data source is null/empty/default
240 // or if we had trouble fetching some data dynamically,
241 // else data is non-stale and we pick a random expiry time between 3/4 and maximum.
242 // Expiry may be further postponed if the system is conserving resources,
243 // eg if it is in low-power mode.
244
245 // Trivially state if critical data missing or suspect.
246 if(dataAccessTrouble || (aep == null) || (aep.longHash == 0) || (gp.timestamp == 0))
247 { staleAfter = 0; }
248 // Much-reduced lifetime if no Scorer data.
249 else if(noScorers)
250 { staleAfter = System.currentTimeMillis() + MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS; }
251 // Everything seems to be in place so set a full lifetime.
252 // Vary the lifetime a little to minimise recomputation collisions.
253 else
254 {
255 staleAfter = System.currentTimeMillis() + ((3*MAX_AGE_BEFORE_STALE_MS)/4)
256 + Rnd.fastRnd.nextInt(1 + MAX_AGE_BEFORE_STALE_MS/4);
257 }
258
259 // Verify object state.
260 try { validateObject(); }
261 catch(final InvalidObjectException e)
262 { throw new IllegalArgumentException(e.getMessage()); }
263 }
264
265 /**If true then force correlation data up to date.
266 * If false then accept somewhat stale or missing data,
267 * since this only changes slowly and a complete absence is neutral.
268 */
269 private static final boolean FORCE_CORR_RECOMP = true;
270
271 /**Compute factors based on correlations.
272 * These have a confidence pre-scaled in the range 0 to POP_VOTE_CORR.
273 */
274 private static List<Factor> calcCorrelationFactors(final ExhibitStaticAttr esa,
275 final AllExhibitProperties aep,
276 final BasicVarMgrInterface vars,
277 final ExhibitPropsComputableMutableVoteCacheIF voteCache)
278 throws IOException
279 {
280 // Get the raw factors; will need scaling.
281 // Force retrieved values to be (reasonably) up-to-date.
282 final Factor[] rawFactors = voteCache.getCorrelates(esa, aep, vars, FORCE_CORR_RECOMP);
283
284 // Full set of scaled values.
285 // We scale the certainty down in relation to other factors.
286 final List<Factor> result = new ArrayList<Factor>(rawFactors.length);
287 for(final Factor f : rawFactors)
288 { result.add(new Factor(f.goodness, f.correlation * POP_VOTE_CORR, f.comment)); }
289
290 return(result);
291 }
292
293 /**Time until which this data is considered valid; beyond this time is stale.
294 * If zero, or before the current time of day, the data is stale.
295 * <p>
296 * If the stale date is too far in the future for us to believe,
297 * ie much more than would be allowed with this version of the object,
298 * then we treat the data as stale in case it is from a version of this
299 * object with very different staleness limits or there was a persistence error.
300 */
301 private final long staleAfter;
302
303 /**If true, the data in this object is stale.
304 * The data may still be correct,
305 * but should be treated as an approximation,
306 * and recomputed as soon as possible.
307 * <p>
308 * It is possible for a item previously non-stale that then became 'stale'
309 * to become non-stale again for a limited time
310 * while the system is conserving energy in low-power mode,
311 * thus avoiding the need for some expensive recomputation.
312 * <p>
313 * Trivially-stale instances,
314 * ie that were never fully computed,
315 * never become non-stale again in this way.
316 */
317 public boolean isStale()
318 {
319 // Object was immediately (trivially) stale.
320 // It cannot become non-stale.
321 if(isTriviallyStale()) { return(true); }
322
323 final long now = System.currentTimeMillis();
324
325 // Expired/stale.
326 // It may become/stay non-stale for much longer
327 // iff the system is in low-power (temporary energy saving) mode
328 // and even longer for 'bad' exhibits to save wasting too much effort/energy on them.
329 if(staleAfter < now) /* Normally stale. */
330 {
331 final long extraTime = (goodness >= 0) ? CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS :
332 (2 * CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS); // Keep a bad 'un down for longer...
333 if((staleAfter + extraTime < now) || !GenUtils.mustConservePower())
334 { return(true); }
335 }
336
337 // Suspicious stale date much too far in the future, so treat data as stale.
338 if(staleAfter >= now + 4*MAX_AGE_BEFORE_STALE_MS)
339 { return(true); }
340
341 // Looks OK!
342 return(false);
343 }
344
345 /**If true, the data in this object is not only stale; it is only even a fast approximation.
346 * The data is probably not worth persisting, for example,
347 * since it was not fully computed and could probably be very quickly recomputed.
348 */
349 public boolean isTriviallyStale()
350 {
351 // Object was immediately (trivially) stale.
352 // It cannot become non-stale.
353 return(staleAfter == 0);
354 }
355
356 /**Get the basic 'best-before' time.
357 * Once this has passed the data is potentially stale
358 * and isStale() will be true unless the system is in power-conserving mode,
359 * at which point isStale() can be deferred for some considerable time.
360 * <p>
361 * This value is useful to help schedule recomputations most-stale-first.
362 * <p>
363 * For trivially-stale instances this is always zero.
364 */
365 public final long bestBefore() { return(staleAfter); }
366
367 /**Maximum age (in ms) of instances before considered them stale; strictly positive.
368 * <em>Slightly</em> (much less than a factor of two) different limit value
369 * between class/JVM/mirror instances to spread out recomputations.
370 * <p>
371 * Each instance of this object should pick a random time
372 * between about half of this value and the full value before going stale
373 * in order to spread out refresh computations over a reasonable interval.
374 * Recomputing these values may prove quite expensive.
375 * <p>
376 * This value is selected so that most exhibits actual scores
377 * will not have changed significantly in the given interval;
378 * something of the order of tens of hours to tens of days is good.
379 */
380 public static final int MAX_AGE_BEFORE_STALE_MS = 64 * 3600 * 1000 + Rnd.fastRnd.nextInt(8 * 3600 * 1000);
381
382 /**Maximum age (in ms) of instances built with all other than Scorer data before considered stale; strictly positive.
383 * Should be (much) shorter than MAX_AGE_BEFORE_STALE_MS,
384 * and much less than 24 hours to force recomputation in favour of the full value
385 * in a full daily cycle if sufficient energy becomes available.
386 * <p>
387 * This allows us to get some rating and ranking done in the absence of
388 * heavy-weight AI computations when low on power,
389 * with this interval acting as a cache of the partially-computed value.
390 */
391 public static final int MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS = Math.min(12 * 3600 * 1000 + Rnd.fastRnd.nextInt(1 * 3600 * 1000),
392 MAX_AGE_BEFORE_STALE_MS/4);
393
394
395 /**The "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
396 * This is a composite of many factors,
397 * including a small random element.
398 * <p>
399 * Has enough significant digits to allow a total ordering
400 * even over a very large number of exhibits.
401 * <p>
402 * This is nominally logically thought of in the range [-1.0, +1.0].
403 */
404 private final int goodness;
405
406 /**Get the "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
407 * This gets the value as a raw int value;
408 * useful for fast compares, sorts, etc.
409 */
410 public int getGoodness()
411 { return(goodness); }
412
413 /**Get the "goodness" score as a normalised float in the range -1 (bad) to +1 (good), 0 is neutral.
414 * This is represented as a float which can lose precision/information,
415 * but may be easier to work with for display and some computations.
416 */
417 public float getGoodnessAsFloat()
418 {
419 return((float) (((double) goodness) / Integer.MAX_VALUE));
420 }
421
422 /**Find out if this exhibit is rated "good"/popular or not.
423 * Returns TRUE if rated good, FALSE if bad,
424 * null if not significantly either (ie too close to neutral) or if unknown.
425 * <p>
426 * This is based on the computed "goodness" score.
427 */
428 public Boolean isGood()
429 {
430 if((goodness < GOODBAD_LIMIT_INT) && (goodness > -GOODBAD_LIMIT_INT)) { return(null); }
431 return((goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
432 }
433
434 /**Maximum weighting for vox pop user votes.
435 * Should be large but possibly not overwhelming,
436 * eg because of pranks to attempt to "fix" the votes.
437 * <p>
438 * The vote element can count for or against an exhibit (ie be +ve or -ve),
439 * and an absence of any votes may tend to dilute goodness back towards neutral.
440 */
441 public static final float POP_VOTES = 0.7f;
442
443 /**Maximum weighting from correlated voting on related exhibits.
444 * Possibly quite a crude measure,
445 * though will be more valuable with more data available,
446 * and so if all our required samples are in place,
447 * then we'll give it a rating that should be visible to users,
448 * and more significant than just newness, for example.
449 */
450 public static final float POP_VOTE_CORR = 0.1331f;
451
452 /**Maximum weighting from Scorer judgement of exhibit content.
453 * This AI-based mechanism inspects the content of each exhibit,
454 * and is driven to get the best possible fit with voting data.
455 * <p>
456 * We cap this at a higher weighting than the metadata-driven statistical methods,
457 * but lower than the real votes.
458 */
459 public static final float POP_AI_SCORER = Math.min(2*POP_VOTE_CORR, POP_VOTES/2);
460
461 /**Maximum weighting for exhibit recently viewed or downloaded.
462 * Possibly quite a crude measure,
463 * though will be more valuable with more data available,
464 * and so if all our required samples are in place,
465 * we'll give it a rating that should be visible to users,
466 * and more significant than just newness, for example.
467 * <p>
468 * We make this a little less important than vote correlations.
469 */
470 public static final float POP_RECENT_ACCESS = POP_VOTE_CORR * 0.73f;
471
472 /**Weighting/correlation of "newness" component of popularity.
473 * This is only applied as a positive (proportional) factor
474 * for exhibits considered "new",
475 * since newer items are potentially more interesting.
476 * <p>
477 * This factor should be relatively small,
478 * not overwhelming manually-set weightings for example,
479 * and mainly intended as a tie-breaker.
480 */
481 public static final float POP_CORR_NEWNESS = 0.001f;
482
483 /**Weighting/correlation of "random" component of popularity.
484 * Exists to help break ties and perturb the popularity ordering a little.
485 * Should be very small so as not to outweigh any genuine factor,
486 * but more-or-less force a total ordering.
487 * <p>
488 * This factor falls equally either side of zero, is a +/- limit.
489 */
490 public static final float POP_CORR_RANDOM = 0.00001f;
491
492 /**Weighting/correlation of has-description component of popularity.
493 * Applied to items that have a description
494 * as slightly more interesting than those without.
495 * <p>
496 * Should generally be greater than the randomness weighting,
497 * and probably less than the newness rating.
498 */
499 public static final float POP_CORR_HASDESC = Math.min(POP_CORR_RANDOM * 10,
500 POP_CORR_NEWNESS / 3);
501
502 /**Weighting factor for an item with a specific description. */
503 private static final Factor FACTOR_HASDESC =
504 new Factor(1, POP_CORR_HASDESC);
505
506 /**Weighting factor for an item with a generic description.
507 * Carries less weight than having a specific description.
508 * <p>
509 * May apply to having location or AKA or description text.
510 */
511 private static final Factor FACTOR_HASGENERICDESC =
512 new Factor(1, POP_CORR_HASDESC/2);
513
514 /**Single (immutable) component of multi-factor value.
515 * Consists of:
516 * <ul>
517 * <li>a goodness value [-1,+1]
518 * <li>a correlation/confidence rating [-1,+1]
519 * </ul>
520 * <p>
521 * The value added into the final value is proportional to:
522 * <samp>(goodness * correlation) / |correlation|</samp>.
523 * <p>
524 * Uses float values since there is probably not even that much
525 * real precision available, and doing so probably saves space and time.
526 * Double values can be passed to the constructors,
527 * but the extra precision is discarded.
528 */
529 static public final class Factor
530 {
531 public Factor(final float goodness, final float correlation)
532 { this(goodness, correlation, null); }
533 public Factor(final float goodness, final float correlation, final String comment)
534 {
535 if((!(goodness >= -1) && (goodness <= +1)) ||
536 (!(correlation >= -1) && (correlation <= +1)))
537 { throw new IllegalArgumentException(); }
538 this.goodness = goodness;
539 this.correlation = correlation;
540 this.comment = comment;
541 }
542 /**Goodness contributed by this factor: range [-1,+1]. */
543 public final float goodness;
544 /**Correlation of this value with overall result: range [-1,+1]. */
545 public final float correlation;
546 /**Optional comment fields; can be null. */
547 public final String comment;
548
549 /**We will accept double values, but discard the extra precision. */
550 public Factor(final double goodness, final double correlation)
551 { this((float) goodness, (float) correlation, null); }
552 /**We will accept double values, but discard the extra precision. */
553 public Factor(final double goodness, final double correlation, final String comment)
554 { this((float) goodness, (float) correlation, comment); }
555
556 /**Convert to human-readable form. */
557 @Override
558 public String toString()
559 {
560 final StringBuilder sb = new StringBuilder();
561 sb.append("Factor");
562 sb.append(":goodness=").append(goodness);
563 sb.append(":correlation=").append(correlation);
564 if(comment != null)
565 { sb.append(":comment=").append(comment); }
566 return(sb.toString());
567 }
568 }
569
570 /**Zero factor; zero weight, zero goodness. */
571 public static final Factor FACTOR_ZERO = new Factor(0, 0);
572
573 /**Compute random "goodness" factor symmetrical about zero; never null. */
574 private static Factor _computeRandomGoodnessFactor()
575 {
576 return new Factor((Rnd.fastRnd.nextBoolean() ?
577 Rnd.fastRnd.nextDouble() : -Rnd.fastRnd.nextDouble()),
578 POP_CORR_RANDOM);
579 }
580
581 /**Compute newness bonus (if any); never null.
582 * If not new enough to be considered "new" then a zero factor is returned.
583 * <p>
584 * This is not necessarily precisely aligned with the "isExhibitNew()" result,
585 * though except in race conditions they should be aligned.
586 *
587 * @param esa exhibit data; never null
588 * @return factor, positive but low weight if exhibit "new",
589 * else zero with zero weighting
590 */
591 private static Factor _computeNewnessBonusFactor(final ExhibitStaticAttr esa)
592 {
593 // Also for some wobble is server time vs new-exhibit timestamps.
594 final long age = Math.max(0, System.currentTimeMillis() - esa.timestamp);
595 if(age <= CoreConsts.MAX_EXHIBIT_AGE_NEW_MS)
596 {
597 // A very new exhibit has a goodness of nearly 1.0.
598 // An exhibit only just "new" has a goodness of just over 0.0.
599 final double goodness = (CoreConsts.MAX_EXHIBIT_AGE_NEW_MS - age) /
600 (double) CoreConsts.MAX_EXHIBIT_AGE_NEW_MS;
601 return(new Factor(goodness, POP_CORR_NEWNESS));
602 }
603
604 // Exhibit not "new", so zero goodness but the same weight.
605 return(_NOT_NEW);
606 }
607
608 /**Contribution for goodness of non-new exhibits; zero goodness but the same "newness" weight. */
609 private static final Factor _NOT_NEW = new Factor(0, POP_CORR_NEWNESS);
610
611 /**Compute full goodness factor, using full data source if available; never null.
612 *
613 * @param esa exhibit data; never null
614 * @param initialComponents initial set of (data-dependent) Factor elements;
615 * possibly empty but never null
616 */
617 private static Factor _computeTotalGoodnessFactor(final ExhibitStaticAttr esa,
618 final GenProps gp,
619 final AllExhibitProperties aep,
620 final List<Factor> initialComponents)
621 {
622 // Compute a list of components of goodness Factors.
623 // Leave enough space to avoid need for expansion in most (time-critical) cases.
624 final List<Factor> components = new ArrayList<Factor>(5 + initialComponents.size());
625 components.addAll(initialComponents);
626
627 // Compute the fast components that do not require a
628 // (potentially slow) data source or any other non-intrinsic data.
629 components.add(_computeRandomGoodnessFactor());
630 components.add(_computeNewnessBonusFactor(esa));
631
632 // Calculations that require a non-default GenProps to be effective.
633 // Factor in the author weight, if any.
634 final Name.ExhibitFull exhibitName = esa.getExhibitFullName();
635 final Byte aW = gp.getPopWeightForAuth(ExhibitName.getAuthorComponent(exhibitName));
636 if(aW != null)
637 { components.add(new Factor(1, aW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
638 // Factor in the file-type weight, if any.
639 final Byte tW = gp.getPopWeightForType(ExhibitName.getExtensionComponent(exhibitName));
640 if(tW != null)
641 { components.add(new Factor(1, tW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
642 // Penalise one-word exhibits (ignoring number-in-series) for not being descriptive enough.
643 final Set<String> noAttrWords = Collections.emptySet();
644 if(TextUtils.indexOf(exhibitName.getShortName().getMainWordsComponent(noAttrWords), ExhibitName.WORD_SEP) == -1)
645 { components.add(new Factor(-1, POP_CORR_HASDESC)); }
646 // Slight penalty to exhibits that *cannot* show thumbnails, since images are most accessible.
647 if(!(ExhibitMIME.getInputFileType(esa.getCharSequence())).canPossiblyCreateThumbnailOfSameMIMEType())
648 { components.add(new Factor(-1, POP_CORR_HASDESC/3)); }
649
650 // Do the full (but potentially-expensive) goodness calcs
651 // iff we have an apparently-working data source.
652 if(aep != null)
653 {
654 // Factor in weight from attribute words.
655 final Enumeration<?> aWEn =
656 ExhibitName.getAttributeWordsComponentEnumeration(exhibitName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
657 if(aWEn != null)
658 {
659 while(aWEn.hasMoreElements())
660 {
661 final String attrWord = (String) aWEn.nextElement();
662 final Byte attrW = gp.getPopWeightForAttr(attrWord);
663 if(attrW != null)
664 { components.add(new Factor(1, attrW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
665 }
666 }
667
668 // Factor in weight from description, if any.
669 final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
670 if(epl.hasDescription())
671 { components.add(FACTOR_HASDESC); }
672
673 // Having a location is a generic description...
674 if(Location.NONE != aep.getLocation(exhibitName))
675 { components.add(FACTOR_HASGENERICDESC); }
676 }
677
678 // Now compute the weighted goodness value...
679 double g = 0;
680 double w = 0;
681 for(final Iterator<Factor> it = components.iterator(); it.hasNext(); )
682 {
683 final Factor f = it.next();
684 w += Math.abs(f.correlation);
685 g += f.goodness * f.correlation;
686 }
687
688 // Adjust so that final weight is 1.0,
689 // and the cumulative value is divided by at least that.
690 final double wPrime = Math.max(1.0, w);
691 return(new Factor(g / wPrime, 1.0));
692 }
693
694
695 /**Minimum sample period to even out short cycles in Web access patterns, in ms; strictly positive.
696 * Must usually be a multiple (and minimum) of a week to allow for most common
697 * daily and weekly access patterns to be smoothed as well as possible.
698 */
699 private static final long _SAMPLE_CYCLE_PERIOD_MINOR_MS = 7 * 24 * 3600 * 1000L;
700
701 /**The longest cycle time we will look for in historical data, in ms; strictly positive.
702 * This is usually a year to look for seasonal patterns,
703 * eg what is popular at Christmas or in the summer.
704 */
705 private static final long _SAMPLE_CYCLE_PERIOD_MAJOR_MS = 365242L * 24 * 3600; // 365.242 days in a year.
706
707 /**Number of VLONG samples corresponding to annual cycles in data. */
708 private static final int _SEASONAL_VLONG_CYCLE_PERIODS = Math.max(1, (int)
709 Math.round(_SAMPLE_CYCLE_PERIOD_MAJOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
710
711 /**Minimum number of VLONG samples to smooth out daily/weekly cycles in data. */
712 private static final int _MIN_VLONG_SAMPLE_PERIODS = Math.max(1, (int)
713 Math.round(_SAMPLE_CYCLE_PERIOD_MINOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
714
715
716 /**Compute Factor(s) that depend on access data.
717 * If this cannot fetch some of the data it requires due to an IOException
718 * (not the same as fetching data successfully that has no entries)
719 * then the exception is propagated to the caller.
720 * (Normally this results in the EPCM item for which this is called being marked stale
721 * so that we can try again when the data is available.)
722 * <p>
723 * The comment fields of the factors produced are
724 * the event names from which they were generated.
725 * <p>
726 * This is public to assist testing.
727 *
728 * @throws IOException if this was unable to fetch some data required
729 */
730 public static Collection<Factor> calcAccessFactors(final Name.ExhibitFull exhibitName,
731 final AllExhibitProperties aep,
732 final BasicVarMgrInterface vars)
733 throws IOException
734 {
735 if((exhibitName == null) || (aep == null) || (vars == null))
736 { throw new IllegalArgumentException(); }
737
738 // Get short name of exhibit as String,
739 // by which most stats are recorded.
740 final String shortNameAsString = exhibitName.getShortName().toString();
741
742 // Factor in weight from access stats...
743 final int nExhibits = aep.aeid.size();
744
745 // Threshold at which usage is considered significantly "good".
746 final int rankLimit = nExhibits / 3;
747
748 // Set of slots/intervals in which we will sample access-pattern stats.
749 // We let the routine force some solid coverage at the near end.
750 final BitSet whichIntervals = _createSampleBitSet(0);
751
752 // Count the full set of samples that we are looking for.
753 final int desiredSamples = whichIntervals.cardinality();
754
755 final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
756
757
758
759 // Access stats event definitions which we want and that record count by short exhibit name.
760 final List<SimpleVariableDefinition> eventVarNames = new ArrayList<SimpleVariableDefinition>(3);
761 eventVarNames.add(SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW);
762 eventVarNames.add(SystemVariables.ACCESSPATTERN_CLICKTHROUGH);
763 // Only look for downloads if not automatically excluded from being counted.
764 // (Note that this definition may vary between data being collected and being used.)
765 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
766 if((esa != null) && !ImageUtils.canBeOwnThumbnail(esa))
767 { eventVarNames.add(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD); }
768
769 // Create/size the results container...
770 final List<Factor> components = new ArrayList<Factor>(eventVarNames.size());
771
772 // Retrieve data for each event...
773 // Currently we we give all intervals the same weighting.
774 // This is especially dodgy for the current interval, which may be very short and unrepresentative.
775 // We give total weighting in proportion to the square root of the number of samples obtained.
776 // Shuffle the order in which we fetch the data to improve concurrency a little.
777 Collections.shuffle(eventVarNames, Rnd.fastRnd);
778 for(final SimpleVariableDefinition def : eventVarNames)
779 {
780 assert(def.isEvent() && (def.getType() == SimpleVariableDefinition.TYPE_STRING));
781 final EventVariableValue evvs[] = vars.getEventValues(def,
782 EventPeriod.VLONG,
783 prevInterval,
784 whichIntervals);
785 assert(evvs != null); // May be empty, but never null.
786 // Count the number of results we actually retrieved.
787 int resultsFound = 0;
788 double totalScore = 0; // Cumulative score so far...
789 for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
790 {
791 if(i >= evvs.length) { break; /* Run out of answers... */ }
792
793 final EventVariableValue evv = evvs[i];
794
795 // Ignore null values, and those with *no* events at all for any value.
796 if(evv == null) { continue; }
797 if(evv.getTotalEventCount() < 1) { continue; }
798
799 // OK, have a real result; we can see if this exhibit ranks well or not.
800 ++resultsFound;
801
802 // If the event bucket may have "overflowed" (too many distinct event values)
803 // then we should probably ignore count==1 entries as unreliable/incomplete.
804 final boolean possibleOverflow =
805 (evv.getTotalDistinctValues() >= evv.getDef().getMaxDiffEventCount());
806
807 final int rank = evv.getRank(shortNameAsString);
808 if((rank < rankLimit) &&
809 (!possibleOverflow || (evv.getCount(shortNameAsString) > 1)))
810 { totalScore += (rankLimit - rank) / (double) rankLimit; }
811 }
812
813 // We should always get at least one response.
814 // Maybe we should abort with an error if not.
815 // if(resultsFound < 1) { throw new IOException("not even current results found"); }
816
817 // Compute goodness score.
818 final double g = totalScore / Math.max(1, resultsFound);
819 // Compute confidence weighting based on number of samples.
820 final double confidence = Math.sqrt(resultsFound / (double) desiredSamples);
821 //if(IsDebug.isDebug) { System.out.println("***EPCM._calcAccessFactors(): score/resultsFound/confidence/exhibit/event = " +g+", " +resultsFound+", " +confidence+", " +exhibitName+", " +def.getName()); }
822 components.add(new Factor(g, POP_RECENT_ACCESS * confidence, def.getName()));
823 }
824
825 return(components);
826 }
827
828 /**If true then include the "all" buckets in our calculations. */
829 private static final boolean USE_ALL_BUCKET = false;
830
831 /**Cache of semi-fixed interval sample set for power-conserving (low-power) mode; never null.
832 * Cleared (set to null) whenever not in conservation mode.
833 * <p>
834 * Any BitSet stored here is never altered once assigned.
835 * <p>
836 * Used only when conserving and when the requested minNearMs is no larger than our forced level.
837 * <p>
838 * Fixed (small) upper bound on storage required.
839 * <p>
840 * Marked null for atomic lock-free access.
841 */
842 private static transient volatile BitSet cachedLPSampleSet;
843
844 /**Compute exactly one Factor that depends on explicit user votes for specified exhibit; never null.
845 * Because we expect voting to be sparse,
846 * we total up votes across all sample intervals to reduce noise,
847 * rather than taking each sample separately,
848 * and do various other calculations a little differently to dense measures.
849 * <p>
850 * If this cannot fetch some of the data it requires due to an IOException
851 * (not the same as fetching data successfully that has no entries)
852 * then the exception is propagated to the caller.
853 * (Normally this results in the EPCM item for which this is called being
854 * marked stale so that we can try again when the data is available.)
855 * <p>
856 * This is public to assist testing and computation of correlations.
857 * <p>
858 * The returned factor's goodness can range from -1 to +1,
859 * and confidence from 0 to +1;
860 * any scaling required will have to be applied elsewhere.
861 *
862 * @param minNearMs minimum interval (ms) at near end
863 * to include all available samples for
864 *
865 * @throws IOException if this was unable to fetch some data required
866 */
867 public static Factor calcVoteFactor(final Name.ExhibitFull exhibitName,
868 final AllExhibitProperties aep,
869 final BasicVarMgrInterface vars,
870 final long minNearMs)
871 throws IOException
872 {
873 if((exhibitName == null) || (aep == null) || (vars == null))
874 { throw new IllegalArgumentException(); }
875
876 if(!aep.aeid.isPresent(exhibitName))
877 { throw new IllegalArgumentException("no such exhibit"); }
878
879 // Get short name of exhibit,
880 // by which most stats are recorded.
881 final String shortName = exhibitName.getShortName().toString();
882
883 // Set of slots/intervals in which we will sample voting behaviour.
884 // Insist on over a month's worth of near-term votes if possible,
885 // significantly less if temporarily conserving power.
886 final boolean conservingPower = GenUtils.mustConservePower();
887 final long forcedMinNearMs = conservingPower ?
888 (7 * 24 * 3600 * 1000L) : (33 * 24 * 3600 * 1000L);
889 final BitSet whichIntervals;
890 if(!conservingPower)
891 {
892 // Beefy new set each time when not conserving...
893 whichIntervals = _createSampleBitSet(Math.max(minNearMs, forcedMinNearMs));
894 // Clear any cached (low-power) set...
895 cachedLPSampleSet = null;
896 }
897 else
898 {
899 if(minNearMs > forcedMinNearMs)
900 {
901 // Cannot use cached set if requested minimum near set larger than forced near set...
902 whichIntervals = _createSampleBitSet(minNearMs);
903 }
904 else
905 {
906 BitSet cached;
907 // Use cached set if extant...
908 if(null == (cached = cachedLPSampleSet))
909 {
910 // ... else create new fixed minimum-size set and cache it.
911 cachedLPSampleSet = cached = _createSampleBitSet(forcedMinNearMs);
912 }
913 assert(null != cached);
914 whichIntervals = (BitSet) cached.clone(); // Defensive copy to protect the cache.
915 }
916 }
917
918 // Count the full set of samples that we are looking for
919 // optionally including the "all" set assumed always available.
920 final int desiredSamples = whichIntervals.cardinality() + (USE_ALL_BUCKET?1:0);
921
922 // Last available complete interval/period.
923 final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
924
925 // Get votes "for" this exhibit.
926 final EventVariableValue vPro[] = vars.getEventValues(SystemVariables.VOTE_PRO,
927 EventPeriod.VLONG,
928 prevInterval,
929 whichIntervals);
930 // Get votes "against" this exhibit.
931 final EventVariableValue vCon[] = vars.getEventValues(SystemVariables.VOTE_CON,
932 EventPeriod.VLONG,
933 prevInterval,
934 whichIntervals);
935
936 // Set of all event values seen in all sampled slots.
937 // We use a HashSet for speed.
938 final HashSet<Object> allVoteValues = new HashSet<Object>();
939 // Count of all votes in all slots.
940 int allVotesCount = 0;
941
942 int resultsFound = 0; // Total results usable including "all" slot.
943 int totalProVotesThisEx = 0; // Total votes for this exhibit in the sample slots.
944 int totalConVotesThisEx = 0; // Total votes against this exhibit in the sample slots.
945
946 if(USE_ALL_BUCKET)
947 {
948 // Get the "all" slot votes "for"...
949 final EventVariableValue[] aPro = vars.getEventValues(SystemVariables.VOTE_PRO,
950 EventPeriod.VLONG,
951 0,
952 null);
953
954 // Get the "all" slot votes "against"...
955 final EventVariableValue[] aCon = vars.getEventValues(SystemVariables.VOTE_CON,
956 EventPeriod.VLONG,
957 0,
958 null);
959 // Regard the "all" slot as always present if we're using it
960 // (which they logically are, even if not necessarily programmatically).
961 ++resultsFound;
962 if((aPro.length > 0) && (aPro[0] != null))
963 {
964 allVoteValues.addAll(aPro[0].getDistinctValuesInRankOrder());
965 allVotesCount += aPro[0].getTotalEventCount();
966 totalProVotesThisEx += aPro[0].getCount(shortName);
967 }
968 if((aCon.length > 0) && (aCon[0] != null))
969 {
970 allVoteValues.addAll(aCon[0].getDistinctValuesInRankOrder());
971 allVotesCount += aCon[0].getTotalEventCount();
972 totalConVotesThisEx += aCon[0].getCount(shortName);
973 }
974 }
975
976 for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
977 {
978 final EventVariableValue pro = (i < vPro.length) ? vPro[i] : null;
979 final EventVariableValue con = (i < vCon.length) ? vCon[i] : null;
980
981 // Ignore slot where no votes were collected either way for any exhibit at all.
982 final int proTEC = (pro == null) ? 0 : pro.getTotalEventCount();
983 final int conTEC = (con == null) ? 0 : con.getTotalEventCount();
984 final int tEC = proTEC + conTEC;
985 if(tEC < 1) { continue; }
986
987 // OK, have a real result; we can see if this exhibit has attracted any votes.
988 ++resultsFound;
989
990 // Capture all the distinct event values in this slot
991 // so that we know all the exhibits that have attracted votes
992 // in the slots that we have sampled.
993 if(pro != null) { allVoteValues.addAll(pro.getDistinctValuesInRankOrder()); }
994 if(con != null) { allVoteValues.addAll(con.getDistinctValuesInRankOrder()); }
995 allVotesCount += tEC;
996
997 // Sum up the total votes either way for this exhibit.
998 if(pro != null) { totalProVotesThisEx += pro.getCount(shortName); }
999 if(con != null) { totalConVotesThisEx += con.getCount(shortName); }
1000 }
1001
1002 // Take a simplistic view that the overall vote
1003 // is simply the mean of all those cast.
1004 final int totalScoreThisEx = totalProVotesThisEx - totalConVotesThisEx;
1005 final int totalVotesCastThisEx = totalProVotesThisEx + totalConVotesThisEx;
1006
1007 // Compute goodness score.
1008 final double g = Math.min(1, Math.max(-1, totalScoreThisEx / (double) Math.max(1, totalVotesCastThisEx)));
1009
1010 // Compute average number of votes per voted-for-exhibit
1011 // over all the sampled slots with at least one vote...
1012 // FIXME: should re-do this to allow for "lost" vote events,
1013 // else we end up overestimating the votes-per-voted-for-exhibit value
1014 // and this under-rating even the most-voted-for exhibit(s).
1015 final double meanVotesPerEx = allVotesCount / Math.max(1.0, allVoteValues.size());
1016
1017 //System.out.println("***EPCM.calcVoteFactor(): allVotesCount = " + allVotesCount);
1018 //System.out.println("***EPCM.calcVoteFactor(): allVoteValues.size() = " + allVoteValues.size());
1019 //System.out.println("***EPCM.calcVoteFactor(): totalVotesCastThisEx = " + totalVotesCastThisEx);
1020 //System.out.println("***EPCM.calcVoteFactor(): totalScoreThisEx = " + totalScoreThisEx);
1021 //System.out.println("***EPCM.calcVoteFactor(): meanVotesPerEx = " + meanVotesPerEx);
1022 //System.out.println("***EPCM.calcVoteFactor(): desiredSamples = " + desiredSamples + " : " + whichIntervals);
1023
1024 // Confidence based on number of votes received for this exhibit
1025 // compared to the mean number of votes for each voted-for-exhibit,
1026 // using normal variance-reduction (sqrt) policy.
1027 // This value may exceed 1.0 to boost overall result confidence
1028 // where this exhibit has received an unusually high number of votes.
1029 final boolean enoughVotesCast = totalVotesCastThisEx >= meanVotesPerEx;
1030 final double confidenceThisExhibit = enoughVotesCast ? 1 :
1031 Math.sqrt(totalVotesCastThisEx / meanVotesPerEx);
1032 // Compute confidence weighting based on number of usable samples
1033 // and the total number of votes we got compared to the mean vote count per exhibit in sampled slots.
1034 final double confidence = Math.min(1.0, Math.sqrt(resultsFound / (double) desiredSamples) * confidenceThisExhibit);
1035 //if(IsDebug.isDebug) { System.out.println("***EPCM.calcVoteFactor(): score/resultsFound/confidence/exhibit = " +g+", " +resultsFound+", " +confidence+", " +exhibitName); }
1036 // For low-confidence (low-vote) exhibits, tone down the goodness too.
1037 return(new Factor(enoughVotesCast ? g : (g * confidenceThisExhibit), confidence, "Votes: " + shortName));
1038 }
1039
1040
1041 /**If true, use ALL available votes rather than a sampling in _computeSampleBitSet().
1042 * If this is false then a caller of _computeSampleBitSet() can get a similar effect
1043 * with a large minNearMs value.
1044 */
1045 private static final boolean USE_ALL_VOTES = false;
1046
1047 /**Create BitSet of samples of event history to take; never null nor empty.
1048 * Creates a BitSet of length up to SystemVariables.EVENT_SAMPLES_RETAINED slots,
1049 * intended to be used with the EventPeriod.VLONG interval size,
1050 * with index 0 indicating the <em>previous</em> slot (ie no use of the current period).
1051 * <p>
1052 * This includes:
1053 * <ul>
1054 * <li>near-end samples designed to smooth out short (daily/weekly) cycles,
1055 * <li>annual/seasonal look-ahead to predict what may be of interest,
1056 * <li>a random sampling of points throughout the event history for robustness.
1057 * </ul>
1058 *
1059 * @param minNearMs if positive, the minimum number of recent ms that
1060 * the sampling should cover (else an internal minimum will be imposed)
1061 *
1062 * @return non-null, non-empty new private BitSet
1063 */
1064 private static BitSet _createSampleBitSet(final long minNearMs)
1065 {
1066 if(USE_ALL_VOTES)
1067 {
1068 // Sample every available vote history point for maximum accuracy
1069 // at a possible performance cost.
1070 final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1071 whichIntervals.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
1072 return(whichIntervals);
1073 }
1074
1075 // Number of our longest access periods needed to cover
1076 // "the minimum cycle" to smooth out short-term access patterns.
1077 // Typically at least 7 days to smooth out daily and weekly cycles.
1078 // We assume that our short cycle *does* fit in our data window.
1079 assert(_MIN_VLONG_SAMPLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1080
1081 // Number of intervals in our long cycle,
1082 // typically looking for seasonal patterns.
1083 // Typically 365 days to pick up yearly festivals, seasonal patterns, etc.
1084 // We assume that our long cycle *does* fit in our data window.
1085 assert(_SEASONAL_VLONG_CYCLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1086
1087 // We cut down the number of samples when (temporarily) conserving energy.
1088 // This makes our result more noisy, but not biased.
1089 final boolean reduceSampling = GenUtils.mustConservePower();
1090
1091 // Now calculate a set of sample periods to use; index 0 is the previous interval.
1092 // We factor in:
1093 // * a window at the short end to catch recent activity.
1094 // * a window around our long cycle to catch seasonal patterns.
1095 // * a large random sampling of the full window to be robust.
1096 // We take a little more than the minimum window at the short end to help predictive ability.
1097 // We DO NOT include the current interval,
1098 // since it is of variable quality and may be expensive to fetch.
1099 final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1100 // Near-end samples, NOT including the current period.
1101 // The caller can force this to be extended to cover the period they are interested in.
1102 final int blockSampleSize = reduceSampling ? _MIN_VLONG_SAMPLE_PERIODS : (4*_MIN_VLONG_SAMPLE_PERIODS);
1103 whichIntervals.set(0, Math.min(SystemVariables.EVENT_SAMPLES_RETAINED-1,
1104 Math.max(blockSampleSize, (int)(minNearMs/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)) +
1105 1+(MAX_AGE_BEFORE_STALE_MS/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)));
1106 // Predictive (we hope) seasonal samples; skew this forward for predictive value.
1107 final int seasonOffset = _SEASONAL_VLONG_CYCLE_PERIODS - _MIN_VLONG_SAMPLE_PERIODS;
1108 whichIntervals.set(seasonOffset, seasonOffset + (reduceSampling ? (2*_MIN_VLONG_SAMPLE_PERIODS) : (3*_MIN_VLONG_SAMPLE_PERIODS)));
1109 // One or more random samples;
1110 // take an extensive sampling of points otherwise not covered.
1111 for(int i = 1+Math.max(blockSampleSize, SystemVariables.EVENT_SAMPLES_RETAINED >> (reduceSampling ? 5 : 2)); --i >= 0; )
1112 { _chooseRandomSlot(Rnd.fastRnd, whichIntervals, true); }
1113 // Chose a new one using a good generator with some real entropy in,
1114 // thus hard to guess (but expensive),
1115 // but only if not conserving power (since we're not going to lose money on this!).
1116 if(!reduceSampling) { _chooseRandomSlot(Rnd.goodRnd, whichIntervals, true); }
1117
1118 return(whichIntervals);
1119 }
1120
1121 /**Set a random bit in the BitSet to take a random sample.
1122 * Sets a bit at random.
1123 * <p>
1124 * It is possible (though may be expensive) to insist that a new (previously false)
1125 * bit is set, but only if (much) less than SystemVariables.EVENT_SAMPLES_RETAINED bits
1126 * are already true.
1127 *
1128 * @param rnd random number source; never null
1129 * @param whichIntervals the BitSet to set a random bit in; never null
1130 * @param forceNew if true, ensure that we set a new bit ie one that was false
1131 * unless at least half of all possible bits have already been set
1132 */
1133 private static void _chooseRandomSlot(final Random rnd,
1134 final BitSet whichIntervals,
1135 final boolean forceNew)
1136 {
1137 do
1138 {
1139 final int bitIndex = rnd.nextInt(SystemVariables.EVENT_SAMPLES_RETAINED);
1140 if(forceNew && whichIntervals.get(bitIndex) &&
1141 (whichIntervals.cardinality() < (SystemVariables.EVENT_SAMPLES_RETAINED/2)))
1142 { continue; /* Selected bit already set; try again... */ }
1143
1144 // OK, actually set the bit and stop!
1145 whichIntervals.set(bitIndex);
1146 break;
1147
1148 } while(forceNew);
1149 }
1150
1151 /**Our serial version... */
1152 private static final long serialVersionUID = 8518817446315221597L;
1153
1154 // /**Deserialise. */
1155 // private void readObject(ObjectInputStream in)
1156 // throws IOException, ClassNotFoundException
1157 // {
1158 // in.defaultReadObject();
1159 // validateObject(); // Validate state immediately.
1160 // }
1161
1162 /**Deserialise: use constructor for validation, defensive copying, etc.
1163 * Replace all trivially-stale "empty" values with a single shared value.
1164 * <p>
1165 * <strong>NOTE: this may not always preserve all values as expected in future.</strong>
1166 */
1167 protected Object readResolve()
1168 // throws ObjectStreamException
1169 {
1170 // Eliminate redundant deserialised copies of TRIVIAL_NEUTRAL.
1171 if(isTriviallyStale() && (goodness == 0)) { return(TRIVIAL_NEUTRAL); }
1172
1173 // Construct new instance of object in normal defensive way.
1174 return(new ExhibitPropsComputableMutable(staleAfter, goodness));
1175 }
1176
1177 /**Serialise: replace all trivially-stale values being serialised with TRIVIAL_NEUTRAL.
1178 * We save all other (non-trivially-stale) values as-is, even if stale,
1179 * because they may have taken significant resources to compute
1180 * and may still be much more accurate than any fast approximation we could compute.
1181 * <p>
1182 * <strong>NOTE: this is effectively a LOSSY compression mechanism.</strong>
1183 */
1184 protected Object writeReplace()
1185 // throws ObjectStreamException
1186 {
1187 // Don't bother saving trivially-stale values to the stream
1188 // and replace them all with a single value in the output.
1189 if(isTriviallyStale()) { return(TRIVIAL_NEUTRAL); }
1190
1191 // Serialise/persist this non-trivial value as-is.
1192 return(this);
1193 }
1194
1195 /**Validate fields/state.
1196 * Called in the constructor and possibly after de-serialising.
1197 * <p>
1198 * Barf if something bad is found.
1199 * (Maybe allow some extra info in debug version.)
1200 */
1201 public void validateObject()
1202 throws InvalidObjectException
1203 {
1204 if(staleAfter < 0)
1205 { throw new InvalidObjectException("bad object: invalid 'staleAfter' time"); }
1206 if(goodness < -Integer.MAX_VALUE)
1207 { throw new InvalidObjectException("bad object: invalid 'goodness' value"); }
1208 }
1209 }