001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore;
030
031 import java.io.IOException;
032 import java.io.InvalidObjectException;
033 import java.io.ObjectInputValidation;
034 import java.io.Serializable;
035 import java.util.ArrayList;
036 import java.util.BitSet;
037 import java.util.Collection;
038 import java.util.Collections;
039 import java.util.Enumeration;
040 import java.util.HashSet;
041 import java.util.Iterator;
042 import java.util.List;
043 import java.util.Random;
044 import java.util.Set;
045
046 import org.hd.d.pg2k.ai.scorer.ScoreAndConf;
047 import org.hd.d.pg2k.ai.scorer.ScorerCacheIF;
048 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
049 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
050 import org.hd.d.pg2k.svrCore.location.Location;
051 import org.hd.d.pg2k.svrCore.props.GenProps;
052 import org.hd.d.pg2k.svrCore.vars.BasicVarMgrInterface;
053 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
054 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
055 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
056 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
057
058 import ORG.hd.d.IsDebug;
059
060
061 /**Immutable (and serialisable) store of all ephemeral computable auxiliary properties of a single exhibit.
062 * These are properties that are computed from exhibit and non-exhibit
063 * data and that need to be periodically recomputed,
064 * such as popularity score.
065 * <p>
066 * This is is called "mutable" because although any one instance is immutable,
067 * successive values retrieved for the same exhibit may be different.
068 * <p>
069 * This data has limited lifetime,
070 * but may be somewhat useful even when stale,
071 * so can be persisted if desired.
072 * <p>
073 * This is designed to be efficient and robust on the wire and in memory,
074 * since these details will be held for each and every exhibit.
075 */
076 public final class ExhibitPropsComputableMutable implements Serializable,
077 ObjectInputValidation
078 {
079 /**Limit of goodness*correlation to consider something (eg category) significantly good or bad. */
080 public static final float GOODBAD_LIMIT = 0.4f;
081
082 /**Limit of goodness*correlation to consider something (eg category) significantly good or bad, is int value. */
083 public static final int GOODBAD_LIMIT_INT = Math.round(GOODBAD_LIMIT * Integer.MAX_VALUE);
084
085 /**Compute a "quick"/approximate value for a specified exhibit; never null.
086 * This uses only values from the exhibit static data
087 * plus the system properties object
088 * to compute a fast approximation to a full value.
089 * <p>
090 * The value is immediately marked as stale
091 * (unless it is possible to compute the exact value fast),
092 * but is useful to generate something quickly if the system is busy,
093 * or at start-up for example.
094 *
095 * @param esa the exhibit to compute properties for; never null
096 * @param gp the system properties; never null
097 *
098 * @return non-null quick approximation
099 */
100 public static ExhibitPropsComputableMutable generateFastApproximation(
101 final ExhibitStaticAttr esa,
102 final GenProps gp)
103 {
104 return(compute(esa, gp, null, null, null, null));
105 }
106
107 /**Compute (accurate) value for a specified exhibit; never null.
108 * This uses values from the exhibit static data
109 * plus the system properties object
110 * plus the data source (and thus might take a long time and be expensive)
111 * to compute an accurate/full value.
112 * <p>
113 * Providing the supplied data source is non-null and functional,
114 * and the GenProps does not appear to be empty (zero timestamp)
115 * then the value will generally not become stale for at least MAX_AGE_BEFORE_STALE_MS/2,
116 * and will become stale in no more than MAX_AGE_BEFORE_STALE_MS ms.
117 * If in low-power mode and/or the AI scorers are not available
118 * then this may ignore them and set a much shorter time to stale,
119 * short enough to force recomputation well within 24 hours
120 * if sufficient power becomes available in that time.
121 * <p>
122 * If the data source is not fully available/functional then
123 * an approximation will be returned instead,
124 * and marked stale to allow recalculation later.
125 *
126 * @param esa the exhibit to compute properties for; never null
127 * @param gp the system properties; never null
128 * @param aep the exhibit properties; can be null
129 * @param vars source of event data; can be null
130 * @param voteCache vote cache; can be null
131 *
132 * @return non-null full value or approximation
133 * depending on the available data sources
134 */
135 public static ExhibitPropsComputableMutable compute(final ExhibitStaticAttr esa,
136 final GenProps gp,
137 final AllExhibitProperties aep,
138 final BasicVarMgrInterface vars,
139 final ExhibitPropsComputableMutableVoteCacheIF voteCache,
140 final ScorerCacheIF scorers)
141 {
142 return(new ExhibitPropsComputableMutable(esa, gp, aep, vars, voteCache, scorers));
143 }
144
145 /**Create an instance with the specified goodness and stale 'best before' time. */
146 private ExhibitPropsComputableMutable(final long staleAfter, final int goodness)
147 {
148 this.staleAfter = staleAfter;
149 this.goodness = goodness;
150
151 // Verify object state.
152 try { validateObject(); }
153 catch(final InvalidObjectException e)
154 { throw new IllegalArgumentException(e.getMessage()); }
155 }
156
157 /**Trivially-stale entirely-neutral instance; not null. */
158 public static final ExhibitPropsComputableMutable TRIVIAL_NEUTRAL = new ExhibitPropsComputableMutable(0, 0);
159
160 /**Create a fully populated ExhibitPropsComputableMutable object.
161 * This is given the data sources from which it can fetch
162 * the exhibit data one or more times to do its computations.
163 * <p>
164 * The data source references are not stored in the object.
165 *
166 * @param esa the exhibit to compute properties for; never null
167 * @param gp the system properties; never null
168 * @param aep the exhibit properties; can be null
169 * @param vars source of event data; can be null
170 * @param scorers source of content-based scoring; can be null
171 * @param voteCache vote cache; can be null
172 */
173 private ExhibitPropsComputableMutable(final ExhibitStaticAttr esa,
174 final GenProps gp,
175 final AllExhibitProperties aep,
176 final BasicVarMgrInterface vars,
177 final ExhibitPropsComputableMutableVoteCacheIF voteCache,
178 final ScorerCacheIF scorers)
179 {
180 if((esa == null) || (gp == null))
181 { throw new IllegalArgumentException(); }
182
183 // Initial set of data-dependent Factor items.
184 final List<Factor> components = new ArrayList<Factor>();
185
186
187 // Calculate factors that depend on access to event histories and other 'live' data.
188 // If an I/O problem is encountered then generate an approximate (and stale) value
189 // with all results gathered up until then.
190 boolean dataAccessTrouble = true; // Note if 'stale'/partial because data not complete...
191 // Avoid Scorer data in persistent low-power mode...
192 final boolean noResourcesForScorers = GenUtils.mustConserveCPU() || GenUtils.mustConservePowerLongTerm();
193 final boolean noScorers = noResourcesForScorers || (null == scorers);
194 if((aep != null) && (vars != null) && (voteCache != null))
195 {
196 try
197 {
198 // Compute synthetic factors based on access stats.
199 final ExhibitFull exhibitFullName = esa.getExhibitFullName();
200 components.addAll(calcAccessFactors(exhibitFullName, aep, vars));
201
202 // First, compute factor based on explicit voting for this exhibit.
203 final Factor vote = voteCache.calcVoteFactor(exhibitFullName, aep, vars);
204 // We use the minimum of the actual and maximum/limit confidence
205 // to realistically boost the vote contribution as much as possible.
206 components.add(new Factor(vote.goodness, Math.min(POP_VOTES, vote.correlation)));
207
208 // Compute factors based on vote correlations, etc.
209 components.addAll(calcCorrelationFactors(esa, aep, vars, voteCache));
210
211 if(!noScorers)
212 {
213 // Compute factor, if any, from AI Scorers, weighting by confidence.
214 // We allow possibly-slightly-stale Scorer values (assumed still to be useful)
215 // so as not to be pole-axed whenever the date/period rolls.
216 // At start-up we may simply not have all Scorers loaded and assessed.
217 final ScoreAndConf compositeScoreAndConf = scorers.computeCompositeScoreAndConfidence(exhibitFullName, true);
218 // Only bother even computing the Factor if we have a non-zero confidence...
219 if(compositeScoreAndConf.confidence != 0)
220 {
221 // We use the minimum of the actual and maximum/limit confidence
222 // to realistically boost the Scorer contribution as much as possible.
223 final Factor scorerFactor = new Factor(compositeScoreAndConf.score / (float) ScoreAndConf.MAX,
224 Math.min(POP_AI_SCORER, compositeScoreAndConf.confidence / (float) ScoreAndConf.MAX));
225 components.add(scorerFactor);
226 if(IsDebug.isDebug) { System.out.println("[EPCM: scorerFactor=" + scorerFactor + " for " +esa.getCharSequence() + ".]"); }
227 }
228 }
229
230 // Managed to gather all the data required.
231 dataAccessTrouble = false;
232 }
233 catch(final IOException e) { /* Absorb error. */ }
234 }
235
236
237 // Compute the full goodness value
238 // and scale it to the form we hold it internally.
239 final Factor gf = _computeTotalGoodnessFactor(esa, gp, aep, components);
240 goodness = Math.max(-Integer.MAX_VALUE, Math.min(Integer.MAX_VALUE,
241 Math.round(Integer.MAX_VALUE * gf.goodness)));
242
243 // Data is marked stale if any data source is null/empty/default
244 // or if we had trouble fetching some data dynamically,
245 // else data is non-stale and we pick a random expiry time between 3/4 and maximum.
246 // Expiry may be further postponed if the system is conserving resources,
247 // eg if it is in low-power mode.
248
249 // Trivially state if critical data missing or suspect.
250 if(dataAccessTrouble || (aep == null) || (aep.longHash == 0) || (gp.timestamp == 0))
251 { staleAfter = 0; }
252 // Much-reduced lifetime if no Scorer data
253 // unless this system doesn't have the resources to support scorers at all.
254 else if(noScorers && !noResourcesForScorers)
255 { staleAfter = System.currentTimeMillis() + MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS; }
256 // Everything seems to be in place so set a full lifetime.
257 // Vary the lifetime a little to minimise recomputation collisions.
258 else
259 {
260 staleAfter = System.currentTimeMillis() + ((3*MAX_AGE_BEFORE_STALE_MS)/4)
261 + Rnd.fastRnd.nextInt(1 + MAX_AGE_BEFORE_STALE_MS/4);
262 }
263
264 // Verify object state.
265 try { validateObject(); }
266 catch(final InvalidObjectException e)
267 { throw new IllegalArgumentException(e.getMessage()); }
268 }
269
270 /**If true then force correlation data up to date.
271 * If false then accept somewhat stale or missing data,
272 * since this only changes slowly and a complete absence is neutral.
273 */
274 private static final boolean FORCE_CORR_RECOMP = true;
275
276 /**Compute factors based on correlations.
277 * These have a confidence pre-scaled in the range 0 to POP_VOTE_CORR.
278 */
279 private static List<Factor> calcCorrelationFactors(final ExhibitStaticAttr esa,
280 final AllExhibitProperties aep,
281 final BasicVarMgrInterface vars,
282 final ExhibitPropsComputableMutableVoteCacheIF voteCache)
283 throws IOException
284 {
285 // Get the raw factors; will need scaling.
286 // Force retrieved values to be (reasonably) up-to-date.
287 final Factor[] rawFactors = voteCache.getCorrelates(esa, aep, vars, FORCE_CORR_RECOMP);
288
289 // Full set of scaled values.
290 // We scale the certainty down in relation to other factors.
291 final List<Factor> result = new ArrayList<Factor>(rawFactors.length);
292 for(final Factor f : rawFactors)
293 { result.add(new Factor(f.goodness, f.correlation * POP_VOTE_CORR)); }
294
295 return(result);
296 }
297
298 /**Time until which this data is considered valid; beyond this time is stale.
299 * If zero, or before the current time of day, the data is stale.
300 * <p>
301 * If the stale date is too far in the future for us to believe,
302 * ie much more than would be allowed with this version of the object,
303 * then we treat the data as stale in case it is from a version of this
304 * object with very different staleness limits or there was a persistence error.
305 */
306 private final long staleAfter;
307
308 /**If true, the data in this object is stale.
309 * The data may still be correct,
310 * but should be treated as an approximation,
311 * and recomputed as soon as possible.
312 * <p>
313 * It is possible for a item previously non-stale that then became 'stale'
314 * to become non-stale again for a limited time
315 * while the system is conserving energy in low-power mode,
316 * thus avoiding the need for some expensive recomputation.
317 * <p>
318 * Trivially-stale instances,
319 * ie that were never fully computed,
320 * never become non-stale again in this way.
321 */
322 public boolean isStale()
323 {
324 // Object was immediately (trivially) stale.
325 // It cannot become non-stale.
326 if(isTriviallyStale()) { return(true); }
327
328 final long now = System.currentTimeMillis();
329
330 // Expired/stale.
331 // It may become/stay non-stale for much longer
332 // iff the system is in low-power (temporary energy saving) mode right now
333 // and even longer for 'bad' exhibits to save wasting too much effort/energy on them.
334 if(staleAfter < now) /* Normally stale. */
335 {
336 final long extraTime = (goodness >= 0) ? CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS :
337 (2 * CoreConsts.MAX_EXPECTED_LOW_POWER_RUN_MS); // Keep a bad 'un down for longer...
338 if((staleAfter + extraTime < now) || !GenUtils.mustConservePower())
339 { return(true); }
340 }
341
342 // Suspicious stale date much too far in the future, so treat data as stale.
343 if(staleAfter >= now + 4*MAX_AGE_BEFORE_STALE_MS)
344 { return(true); }
345
346 // Looks OK!
347 return(false);
348 }
349
350 /**If true, the data in this object is not only stale; it is only even a fast approximation.
351 * The data is probably not worth persisting, for example,
352 * since it was not fully computed and could probably be very quickly recomputed.
353 */
354 public boolean isTriviallyStale()
355 {
356 // Object was immediately (trivially) stale.
357 // It cannot become non-stale.
358 return(staleAfter == 0);
359 }
360
361 /**Get the basic 'best-before' time.
362 * Once this has passed the data is potentially stale
363 * and isStale() will be true unless the system is in power-conserving mode,
364 * at which point isStale() can be deferred for some considerable time.
365 * <p>
366 * This value is useful to help schedule recomputations most-stale-first.
367 * <p>
368 * For trivially-stale instances this is always zero.
369 */
370 public final long bestBefore() { return(staleAfter); }
371
372 /**Maximum age (in ms) of instances before considered them stale; strictly positive.
373 * <em>Slightly</em> (much less than a factor of two) different limit value
374 * between class/JVM/mirror instances to spread out recomputations.
375 * <p>
376 * Each instance of this object should pick a random time
377 * between about half of this value and the full value before going stale
378 * in order to spread out refresh computations over a reasonable interval.
379 * Recomputing these values may prove quite expensive.
380 * <p>
381 * This value is selected so that most exhibits actual scores
382 * will not have changed significantly in the given interval;
383 * something of the order of tens of hours to tens of days is good.
384 */
385 public static final int MAX_AGE_BEFORE_STALE_MS = 64 * 3600 * 1000 + Rnd.fastRnd.nextInt(8 * 3600 * 1000);
386
387 /**Maximum age (in ms) of instances built with all other than Scorer data before considered stale; strictly positive.
388 * Should be (much) shorter than MAX_AGE_BEFORE_STALE_MS,
389 * and much less than 24 hours to force recomputation in favour of the full value
390 * in a full daily cycle if sufficient energy becomes available.
391 * <p>
392 * This allows us to get some rating and ranking done in the absence of
393 * heavy-weight AI computations when low on power,
394 * with this interval acting as a cache of the partially-computed value.
395 */
396 public static final int MAX_AGE_SANS_SCORERS_BEFORE_STALE_MS = Math.min(12 * 3600 * 1000 + Rnd.fastRnd.nextInt(1 * 3600 * 1000),
397 MAX_AGE_BEFORE_STALE_MS/4);
398
399
400 /**The "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
401 * This is a composite of many factors,
402 * including a small random element.
403 * <p>
404 * Has enough significant digits to allow a total ordering
405 * even over a very large number of exhibits.
406 * <p>
407 * This is nominally logically thought of in the range [-1.0, +1.0].
408 */
409 private final int goodness;
410
411 /**Get the "goodness" score, -MAX_VALUE is maximally bad and MAX_VALUE maximally good, zero is neutral.
412 * This gets the value as a raw int value;
413 * useful for fast compares, sorts, etc.
414 */
415 public int getGoodness()
416 { return(goodness); }
417
418 /**Get the "goodness" score as a normalised float in the range -1 (bad) to +1 (good), 0 is neutral.
419 * This is represented as a float which can lose precision/information,
420 * but may be easier to work with for display and some computations.
421 */
422 public float getGoodnessAsFloat()
423 {
424 return((float) (((double) goodness) / Integer.MAX_VALUE));
425 }
426
427 /**Find out if this exhibit is rated "good"/popular or not.
428 * Returns TRUE if rated good, FALSE if bad,
429 * null if not significantly either (ie too close to neutral) or if unknown.
430 * <p>
431 * This is based on the computed "goodness" score.
432 */
433 public Boolean isGood()
434 {
435 if((goodness < GOODBAD_LIMIT_INT) && (goodness > -GOODBAD_LIMIT_INT)) { return(null); }
436 return((goodness >= 0) ? Boolean.TRUE : Boolean.FALSE);
437 }
438
439 /**Maximum weighting for vox pop user votes.
440 * Should be large but possibly not overwhelming,
441 * eg because of pranks to attempt to "fix" the votes.
442 * <p>
443 * The vote element can count for or against an exhibit (ie be +ve or -ve),
444 * and an absence of any votes may tend to dilute goodness back towards neutral.
445 */
446 public static final float POP_VOTES = 0.7f;
447
448 /**Maximum weighting from correlated voting on related exhibits.
449 * Possibly quite a crude measure,
450 * though will be more valuable with more data available,
451 * and so if all our required samples are in place,
452 * then we'll give it a rating that should be visible to users,
453 * and more significant than just newness, for example.
454 */
455 public static final float POP_VOTE_CORR = 0.1331f;
456
457 /**Maximum weighting from Scorer judgement of exhibit content.
458 * This AI-based mechanism inspects the content of each exhibit,
459 * and is driven to get the best possible fit with voting data.
460 * <p>
461 * We cap this at a higher weighting than the metadata-driven statistical methods,
462 * but lower than the real votes.
463 */
464 public static final float POP_AI_SCORER = Math.min(2*POP_VOTE_CORR, POP_VOTES/2);
465
466 /**Maximum weighting for exhibit recently viewed or downloaded.
467 * Possibly quite a crude measure,
468 * though will be more valuable with more data available,
469 * and so if all our required samples are in place,
470 * we'll give it a rating that should be visible to users,
471 * and more significant than just newness, for example.
472 * <p>
473 * We make this a little less important than vote correlations.
474 */
475 public static final float POP_RECENT_ACCESS = POP_VOTE_CORR * 0.73f;
476
477 /**Weighting/correlation of "newness" component of popularity.
478 * This is only applied as a positive (proportional) factor
479 * for exhibits considered "new",
480 * since newer items are potentially more interesting.
481 * <p>
482 * This factor should be relatively small,
483 * not overwhelming manually-set weightings for example,
484 * and mainly intended as a tie-breaker.
485 */
486 public static final float POP_CORR_NEWNESS = 0.001f;
487
488 /**Weighting/correlation of "random" component of popularity.
489 * Exists to help break ties and perturb the popularity ordering a little.
490 * Should be very small so as not to outweigh any genuine factor,
491 * but more-or-less force a total ordering.
492 * <p>
493 * This factor falls equally either side of zero, is a +/- limit.
494 */
495 public static final float POP_CORR_RANDOM = 0.00001f;
496
497 /**Weighting/correlation of has-description component of popularity.
498 * Applied to items that have a description
499 * as slightly more interesting than those without.
500 * <p>
501 * Should generally be greater than the randomness weighting,
502 * and probably less than the newness rating.
503 */
504 public static final float POP_CORR_HASDESC = Math.min(POP_CORR_RANDOM * 10,
505 POP_CORR_NEWNESS / 3);
506
507 /**Weighting factor for an item with a specific description. */
508 private static final Factor FACTOR_HASDESC =
509 new Factor(1, POP_CORR_HASDESC);
510
511 /**Weighting factor for an item with a generic description.
512 * Carries less weight than having a specific description.
513 * <p>
514 * May apply to having location or AKA or description text.
515 */
516 private static final Factor FACTOR_HASGENERICDESC =
517 new Factor(1, POP_CORR_HASDESC/2);
518
519 /**Single (immutable) component of multi-factor value.
520 * Consists of:
521 * <ul>
522 * <li>a goodness value [-1,+1]
523 * <li>a correlation/confidence rating [-1,+1]
524 * </ul>
525 * <p>
526 * The value added into the final value is proportional to:
527 * <samp>(goodness * correlation) / |correlation|</samp>.
528 * <p>
529 * Uses float values since there is probably not even that much
530 * real precision available, and doing so probably saves space and time.
531 * Double values can be passed to the constructors,
532 * but the extra precision is discarded.
533 */
534 static public final class Factor
535 {
536 public Factor(final float goodness, final float correlation)
537 {
538 if((!(goodness >= -1) && (goodness <= +1)) ||
539 (!(correlation >= -1) && (correlation <= +1)))
540 { throw new IllegalArgumentException(); }
541 this.goodness = goodness;
542 this.correlation = correlation;
543 }
544 /**Goodness contributed by this factor: range [-1,+1]. */
545 public final float goodness;
546 /**Correlation of this value with overall result: range [-1,+1]. */
547 public final float correlation;
548
549 /**We will accept double values, but discard the extra precision. */
550 public Factor(final double goodness, final double correlation)
551 { this((float) goodness, (float) correlation); }
552
553 /**Convert to human-readable form. */
554 @Override
555 public String toString()
556 {
557 final StringBuilder sb = new StringBuilder();
558 sb.append("Factor");
559 sb.append(":goodness=").append(goodness);
560 sb.append(":correlation=").append(correlation);
561 return(sb.toString());
562 }
563 }
564
565 /**Zero factor; zero weight, zero goodness. */
566 public static final Factor FACTOR_ZERO = new Factor(0, 0);
567
568 /**Compute random "goodness" factor symmetrical about zero; never null. */
569 private static Factor _computeRandomGoodnessFactor()
570 {
571 return new Factor((Rnd.fastRnd.nextBoolean() ?
572 Rnd.fastRnd.nextDouble() : -Rnd.fastRnd.nextDouble()),
573 POP_CORR_RANDOM);
574 }
575
576 /**Compute newness bonus (if any); never null.
577 * If not new enough to be considered "new" then a zero factor is returned.
578 * <p>
579 * This is not necessarily precisely aligned with the "isExhibitNew()" result,
580 * though except in race conditions they should be aligned.
581 *
582 * @param esa exhibit data; never null
583 * @return factor, positive but low weight if exhibit "new",
584 * else zero with zero weighting
585 */
586 private static Factor _computeNewnessBonusFactor(final ExhibitStaticAttr esa)
587 {
588 // Also for some wobble is server time vs new-exhibit timestamps.
589 final long age = Math.max(0, System.currentTimeMillis() - esa.timestamp);
590 if(age <= CoreConsts.MAX_EXHIBIT_AGE_NEW_MS)
591 {
592 // A very new exhibit has a goodness of nearly 1.0.
593 // An exhibit only just "new" has a goodness of just over 0.0.
594 final double goodness = (CoreConsts.MAX_EXHIBIT_AGE_NEW_MS - age) /
595 (double) CoreConsts.MAX_EXHIBIT_AGE_NEW_MS;
596 return(new Factor(goodness, POP_CORR_NEWNESS));
597 }
598
599 // Exhibit not "new", so zero goodness but the same weight.
600 return(_NOT_NEW);
601 }
602
603 /**Contribution for goodness of non-new exhibits; zero goodness but the same "newness" weight. */
604 private static final Factor _NOT_NEW = new Factor(0, POP_CORR_NEWNESS);
605
606 /**Compute full goodness factor, using full data source if available; never null.
607 *
608 * @param esa exhibit data; never null
609 * @param initialComponents initial set of (data-dependent) Factor elements;
610 * possibly empty but never null
611 */
612 private static Factor _computeTotalGoodnessFactor(final ExhibitStaticAttr esa,
613 final GenProps gp,
614 final AllExhibitProperties aep,
615 final List<Factor> initialComponents)
616 {
617 // Compute a list of components of goodness Factors.
618 // Leave enough space to avoid need for expansion in most (time-critical) cases.
619 final List<Factor> components = new ArrayList<Factor>(5 + initialComponents.size());
620 components.addAll(initialComponents);
621
622 // Compute the fast components that do not require a
623 // (potentially slow) data source or any other non-intrinsic data.
624 components.add(_computeRandomGoodnessFactor());
625 components.add(_computeNewnessBonusFactor(esa));
626
627 // Calculations that require a non-default GenProps to be effective.
628 // Factor in the author weight, if any.
629 final Name.ExhibitFull exhibitName = esa.getExhibitFullName();
630 final Byte aW = gp.getPopWeightForAuth(ExhibitName.getAuthorComponent(exhibitName));
631 if(aW != null)
632 { components.add(new Factor(1, aW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
633 // Factor in the file-type weight, if any.
634 final Byte tW = gp.getPopWeightForType(ExhibitName.getExtensionComponent(exhibitName));
635 if(tW != null)
636 { components.add(new Factor(1, tW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
637 // Penalise one-word exhibits (ignoring number-in-series) for not being descriptive enough.
638 final Set<String> noAttrWords = Collections.emptySet();
639 if(TextUtils.indexOf(exhibitName.getShortName().getMainWordsComponent(noAttrWords), ExhibitName.WORD_SEP) == -1)
640 { components.add(new Factor(-1, POP_CORR_HASDESC)); }
641 // Slight penalty to exhibits that *cannot* show thumbnails, since images are most accessible.
642 if(!(ExhibitMIME.getInputFileType(esa.getCharSequence())).canPossiblyCreateThumbnailOfSameMIMEType())
643 { components.add(new Factor(-1, POP_CORR_HASDESC/3)); }
644
645 // Do the full (but potentially-expensive) goodness calcs
646 // iff we have an apparently-working data source.
647 if(aep != null)
648 {
649 // Factor in weight from attribute words.
650 final Enumeration<?> aWEn =
651 ExhibitName.getAttributeWordsComponentEnumeration(exhibitName, ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet());
652 if(aWEn != null)
653 {
654 while(aWEn.hasMoreElements())
655 {
656 final String attrWord = (String) aWEn.nextElement();
657 final Byte attrW = gp.getPopWeightForAttr(attrWord);
658 if(attrW != null)
659 { components.add(new Factor(1, attrW.doubleValue() / GenProps.MAX_POPWT_VAL)); }
660 }
661 }
662
663 // Factor in weight from description, if any.
664 final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
665 if(epl.hasDescription())
666 { components.add(FACTOR_HASDESC); }
667
668 // Having a location is a generic description...
669 if(Location.NONE != aep.getLocation(exhibitName))
670 { components.add(FACTOR_HASGENERICDESC); }
671 }
672
673 // Now compute the weighted goodness value...
674 double g = 0;
675 double w = 0;
676 for(final Iterator<Factor> it = components.iterator(); it.hasNext(); )
677 {
678 final Factor f = it.next();
679 w += Math.abs(f.correlation);
680 g += f.goodness * f.correlation;
681 }
682
683 // Adjust so that final weight is 1.0,
684 // and the cumulative value is divided by at least that.
685 final double wPrime = Math.max(1.0, w);
686 return(new Factor(g / wPrime, 1.0));
687 }
688
689
690 /**Minimum sample period to even out short cycles in Web access patterns, in ms; strictly positive.
691 * Must usually be a multiple (and minimum) of a week to allow for most common
692 * daily and weekly access patterns to be smoothed as well as possible.
693 */
694 private static final long _SAMPLE_CYCLE_PERIOD_MINOR_MS = 7 * 24 * 3600 * 1000L;
695
696 /**The longest cycle time we will look for in historical data, in ms; strictly positive.
697 * This is usually a year to look for seasonal patterns,
698 * eg what is popular at Christmas or in the summer.
699 */
700 private static final long _SAMPLE_CYCLE_PERIOD_MAJOR_MS = 365242L * 24 * 3600; // 365.242 days in a year.
701
702 /**Number of VLONG samples corresponding to annual cycles in data. */
703 private static final int _SEASONAL_VLONG_CYCLE_PERIODS = Math.max(1, (int)
704 Math.round(_SAMPLE_CYCLE_PERIOD_MAJOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
705
706 /**Minimum number of VLONG samples to smooth out daily/weekly cycles in data. */
707 private static final int _MIN_VLONG_SAMPLE_PERIODS = Math.max(1, (int)
708 Math.round(_SAMPLE_CYCLE_PERIOD_MINOR_MS / (double) EventPeriod.VLONG.getIntervalMs()));
709
710
711 /**Compute Factor(s) that depend on access data.
712 * If this cannot fetch some of the data it requires due to an IOException
713 * (not the same as fetching data successfully that has no entries)
714 * then the exception is propagated to the caller.
715 * (Normally this results in the EPCM item for which this is called being marked stale
716 * so that we can try again when the data is available.)
717 * <p>
718 * The comment fields of the factors produced are
719 * the event names from which they were generated.
720 * <p>
721 * This is public to assist testing.
722 *
723 * @throws IOException if this was unable to fetch some data required
724 */
725 public static Collection<Factor> calcAccessFactors(final Name.ExhibitFull exhibitName,
726 final AllExhibitProperties aep,
727 final BasicVarMgrInterface vars)
728 throws IOException
729 {
730 if((exhibitName == null) || (aep == null) || (vars == null))
731 { throw new IllegalArgumentException(); }
732
733 // Get short name of exhibit as String,
734 // by which most stats are recorded.
735 final String shortNameAsString = exhibitName.getShortName().toString();
736
737 // Factor in weight from access stats...
738 final int nExhibits = aep.aeid.size();
739
740 // Threshold at which usage is considered significantly "good".
741 final int rankLimit = nExhibits / 3;
742
743 // Set of slots/intervals in which we will sample access-pattern stats.
744 // We let the routine force some solid coverage at the near end.
745 final BitSet whichIntervals = _createSampleBitSet(0);
746
747 // Count the full set of samples that we are looking for.
748 final int desiredSamples = whichIntervals.cardinality();
749
750 final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
751
752
753
754 // Access stats event definitions which we want and that record count by short exhibit name.
755 final List<SimpleVariableDefinition> eventVarNames = new ArrayList<SimpleVariableDefinition>(3);
756 eventVarNames.add(SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW);
757 eventVarNames.add(SystemVariables.ACCESSPATTERN_CLICKTHROUGH);
758 // Only look for downloads if not automatically excluded from being counted.
759 // (Note that this definition may vary between data being collected and being used.)
760 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
761 if((esa != null) && !ImageUtils.canBeOwnThumbnail(esa))
762 { eventVarNames.add(SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD); }
763
764 // Create/size the results container...
765 final List<Factor> components = new ArrayList<Factor>(eventVarNames.size());
766
767 // Retrieve data for each event...
768 // Currently we we give all intervals the same weighting.
769 // This is especially dodgy for the current interval, which may be very short and unrepresentative.
770 // We give total weighting in proportion to the square root of the number of samples obtained.
771 // Shuffle the order in which we fetch the data to improve concurrency a little.
772 Collections.shuffle(eventVarNames, Rnd.fastRnd);
773 for(final SimpleVariableDefinition def : eventVarNames)
774 {
775 assert(def.isEvent() && (def.getType() == SimpleVariableDefinition.TYPE_STRING));
776 final EventVariableValue evvs[] = vars.getEventValues(def,
777 EventPeriod.VLONG,
778 prevInterval,
779 whichIntervals);
780 assert(evvs != null); // May be empty, but never null.
781 // Count the number of results we actually retrieved.
782 int resultsFound = 0;
783 double totalScore = 0; // Cumulative score so far...
784 for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
785 {
786 if(i >= evvs.length) { break; /* Run out of answers... */ }
787
788 final EventVariableValue evv = evvs[i];
789
790 // Ignore null values, and those with *no* events at all for any value.
791 if(evv == null) { continue; }
792 if(evv.getTotalEventCount() < 1) { continue; }
793
794 // OK, have a real result; we can see if this exhibit ranks well or not.
795 ++resultsFound;
796
797 // If the event bucket may have "overflowed" (too many distinct event values)
798 // then we should probably ignore count==1 entries as unreliable/incomplete.
799 final boolean possibleOverflow =
800 (evv.getTotalDistinctValues() >= evv.getDef().getMaxDiffEventCount());
801
802 final int rank = evv.getRank(shortNameAsString);
803 if((rank < rankLimit) &&
804 (!possibleOverflow || (evv.getCount(shortNameAsString) > 1)))
805 { totalScore += (rankLimit - rank) / (double) rankLimit; }
806 }
807
808 // We should always get at least one response.
809 // Maybe we should abort with an error if not.
810 // if(resultsFound < 1) { throw new IOException("not even current results found"); }
811
812 // Compute goodness score.
813 final double g = totalScore / Math.max(1, resultsFound);
814 // Compute confidence weighting based on number of samples.
815 final double confidence = Math.sqrt(resultsFound / (double) desiredSamples);
816 //if(IsDebug.isDebug) { System.out.println("***EPCM._calcAccessFactors(): score/resultsFound/confidence/exhibit/event = " +g+", " +resultsFound+", " +confidence+", " +exhibitName+", " +def.getName()); }
817 components.add(new Factor(g, POP_RECENT_ACCESS * confidence));
818 }
819
820 return(components);
821 }
822
823 /**If true then include the "all" buckets in our calculations. */
824 private static final boolean USE_ALL_BUCKET = false;
825
826 /**Cache of semi-fixed interval sample set for power-conserving (low-power) mode; never null.
827 * Cleared (set to null) whenever not in conservation mode.
828 * <p>
829 * Any BitSet stored here is never altered once assigned.
830 * <p>
831 * Used only when conserving and when the requested minNearMs is no larger than our forced level.
832 * <p>
833 * Fixed (small) upper bound on storage required.
834 * <p>
835 * Marked null for atomic lock-free access.
836 */
837 private static transient volatile BitSet cachedLPSampleSet;
838
839 /**Compute exactly one Factor that depends on explicit user votes for specified exhibit; never null.
840 * Because we expect voting to be sparse,
841 * we total up votes across all sample intervals to reduce noise,
842 * rather than taking each sample separately,
843 * and do various other calculations a little differently to dense measures.
844 * <p>
845 * If this cannot fetch some of the data it requires due to an IOException
846 * (not the same as fetching data successfully that has no entries)
847 * then the exception is propagated to the caller.
848 * (Normally this results in the EPCM item for which this is called being
849 * marked stale so that we can try again when the data is available.)
850 * <p>
851 * This is public to assist testing and computation of correlations.
852 * <p>
853 * The returned factor's goodness can range from -1 to +1,
854 * and confidence from 0 to +1;
855 * any scaling required will have to be applied elsewhere.
856 *
857 * @param minNearMs minimum interval (ms) at near end
858 * to include all available samples for
859 *
860 * @throws IOException if this was unable to fetch some data required
861 */
862 public static Factor calcVoteFactor(final Name.ExhibitFull exhibitName,
863 final AllExhibitProperties aep,
864 final BasicVarMgrInterface vars,
865 final long minNearMs)
866 throws IOException
867 {
868 if((exhibitName == null) || (aep == null) || (vars == null))
869 { throw new IllegalArgumentException(); }
870
871 if(!aep.aeid.isPresent(exhibitName))
872 { throw new IllegalArgumentException("no such exhibit"); }
873
874 // Get short name of exhibit,
875 // by which most stats are recorded.
876 final String shortName = exhibitName.getShortName().toString();
877
878 // Set of slots/intervals in which we will sample voting behaviour.
879 // Insist on over a month's worth of near-term votes if possible,
880 // significantly less if temporarily conserving power long term.
881 // (Long term since many system may happen to be in low power mode
882 // at the time of day that this would usually be recalculated,
883 // even if generally fine on energy availability.)
884 final boolean conservingPower = GenUtils.mustConservePowerLongTerm();
885 final long forcedMinNearMs = conservingPower ?
886 (7 * 24 * 3600 * 1000L) : (33 * 24 * 3600 * 1000L);
887 final BitSet whichIntervals;
888 if(!conservingPower)
889 {
890 // Beefy new set each time when not conserving...
891 whichIntervals = _createSampleBitSet(Math.max(minNearMs, forcedMinNearMs));
892 // Clear any cached (low-power) set...
893 cachedLPSampleSet = null;
894 }
895 else
896 {
897 if(minNearMs > forcedMinNearMs)
898 {
899 // Cannot use cached set if requested minimum near set larger than forced near set...
900 whichIntervals = _createSampleBitSet(minNearMs);
901 }
902 else
903 {
904 BitSet cached;
905 // Use cached set if extant...
906 if(null == (cached = cachedLPSampleSet))
907 {
908 // ... else create new fixed minimum-size set and cache it.
909 cachedLPSampleSet = cached = _createSampleBitSet(forcedMinNearMs);
910 }
911 assert(null != cached);
912 whichIntervals = (BitSet) cached.clone(); // Defensive copy to protect the cache.
913 }
914 }
915
916 // Count the full set of samples that we are looking for
917 // optionally including the "all" set assumed always available.
918 final int desiredSamples = whichIntervals.cardinality() + (USE_ALL_BUCKET?1:0);
919
920 // Last available complete interval/period.
921 final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
922
923 // Get votes "for" this exhibit.
924 final EventVariableValue vPro[] = vars.getEventValues(SystemVariables.VOTE_PRO,
925 EventPeriod.VLONG,
926 prevInterval,
927 whichIntervals);
928 // Get votes "against" this exhibit.
929 final EventVariableValue vCon[] = vars.getEventValues(SystemVariables.VOTE_CON,
930 EventPeriod.VLONG,
931 prevInterval,
932 whichIntervals);
933
934 // Set of all event values seen in all sampled slots.
935 // We use a HashSet for speed.
936 final HashSet<Object> allVoteValues = new HashSet<Object>();
937 // Count of all votes in all slots.
938 int allVotesCount = 0;
939
940 int resultsFound = 0; // Total results usable including "all" slot.
941 int totalProVotesThisEx = 0; // Total votes for this exhibit in the sample slots.
942 int totalConVotesThisEx = 0; // Total votes against this exhibit in the sample slots.
943
944 if(USE_ALL_BUCKET)
945 {
946 // Get the "all" slot votes "for"...
947 final EventVariableValue[] aPro = vars.getEventValues(SystemVariables.VOTE_PRO,
948 EventPeriod.VLONG,
949 0,
950 null);
951
952 // Get the "all" slot votes "against"...
953 final EventVariableValue[] aCon = vars.getEventValues(SystemVariables.VOTE_CON,
954 EventPeriod.VLONG,
955 0,
956 null);
957 // Regard the "all" slot as always present if we're using it
958 // (which they logically are, even if not necessarily programmatically).
959 ++resultsFound;
960 if((aPro.length > 0) && (aPro[0] != null))
961 {
962 allVoteValues.addAll(aPro[0].getDistinctValuesInRankOrder());
963 allVotesCount += aPro[0].getTotalEventCount();
964 totalProVotesThisEx += aPro[0].getCount(shortName);
965 }
966 if((aCon.length > 0) && (aCon[0] != null))
967 {
968 allVoteValues.addAll(aCon[0].getDistinctValuesInRankOrder());
969 allVotesCount += aCon[0].getTotalEventCount();
970 totalConVotesThisEx += aCon[0].getCount(shortName);
971 }
972 }
973
974 for(int i = whichIntervals.nextSetBit(0); i >= 0; i = whichIntervals.nextSetBit(i+1))
975 {
976 final EventVariableValue pro = (i < vPro.length) ? vPro[i] : null;
977 final EventVariableValue con = (i < vCon.length) ? vCon[i] : null;
978
979 // Ignore slot where no votes were collected either way for any exhibit at all.
980 final int proTEC = (pro == null) ? 0 : pro.getTotalEventCount();
981 final int conTEC = (con == null) ? 0 : con.getTotalEventCount();
982 final int tEC = proTEC + conTEC;
983 if(tEC < 1) { continue; }
984
985 // OK, have a real result; we can see if this exhibit has attracted any votes.
986 ++resultsFound;
987
988 // Capture all the distinct event values in this slot
989 // so that we know all the exhibits that have attracted votes
990 // in the slots that we have sampled.
991 if(pro != null) { allVoteValues.addAll(pro.getDistinctValuesInRankOrder()); }
992 if(con != null) { allVoteValues.addAll(con.getDistinctValuesInRankOrder()); }
993 allVotesCount += tEC;
994
995 // Sum up the total votes either way for this exhibit.
996 if(pro != null) { totalProVotesThisEx += pro.getCount(shortName); }
997 if(con != null) { totalConVotesThisEx += con.getCount(shortName); }
998 }
999
1000 // Take a simplistic view that the overall vote
1001 // is simply the mean of all those cast.
1002 final int totalScoreThisEx = totalProVotesThisEx - totalConVotesThisEx;
1003 final int totalVotesCastThisEx = totalProVotesThisEx + totalConVotesThisEx;
1004
1005 // Compute goodness score.
1006 final double g = Math.min(1, Math.max(-1, totalScoreThisEx / (double) Math.max(1, totalVotesCastThisEx)));
1007
1008 // Compute average number of votes per voted-for-exhibit
1009 // over all the sampled slots with at least one vote...
1010 // FIXME: should re-do this to allow for "lost" vote events,
1011 // else we end up overestimating the votes-per-voted-for-exhibit value
1012 // and this under-rating even the most-voted-for exhibit(s).
1013 final double meanVotesPerEx = allVotesCount / Math.max(1.0, allVoteValues.size());
1014
1015 //System.out.println("***EPCM.calcVoteFactor(): allVotesCount = " + allVotesCount);
1016 //System.out.println("***EPCM.calcVoteFactor(): allVoteValues.size() = " + allVoteValues.size());
1017 //System.out.println("***EPCM.calcVoteFactor(): totalVotesCastThisEx = " + totalVotesCastThisEx);
1018 //System.out.println("***EPCM.calcVoteFactor(): totalScoreThisEx = " + totalScoreThisEx);
1019 //System.out.println("***EPCM.calcVoteFactor(): meanVotesPerEx = " + meanVotesPerEx);
1020 //System.out.println("***EPCM.calcVoteFactor(): desiredSamples = " + desiredSamples + " : " + whichIntervals);
1021
1022 // Confidence based on number of votes received for this exhibit
1023 // compared to the mean number of votes for each voted-for-exhibit,
1024 // using normal variance-reduction (sqrt) policy.
1025 // This value may exceed 1.0 to boost overall result confidence
1026 // where this exhibit has received an unusually high number of votes.
1027 final boolean enoughVotesCast = totalVotesCastThisEx >= meanVotesPerEx;
1028 final double confidenceThisExhibit = enoughVotesCast ? 1 :
1029 Math.sqrt(totalVotesCastThisEx / meanVotesPerEx);
1030 // Compute confidence weighting based on number of usable samples
1031 // and the total number of votes we got compared to the mean vote count per exhibit in sampled slots.
1032 final double confidence = Math.min(1.0, Math.sqrt(resultsFound / (double) desiredSamples) * confidenceThisExhibit);
1033 //if(IsDebug.isDebug) { System.out.println("***EPCM.calcVoteFactor(): score/resultsFound/confidence/exhibit = " +g+", " +resultsFound+", " +confidence+", " +exhibitName); }
1034 // For low-confidence (low-vote) exhibits, tone down the goodness too.
1035 return(new Factor(enoughVotesCast ? g : (g * confidenceThisExhibit), confidence));
1036 }
1037
1038
1039 /**If true, use ALL available votes rather than a sampling in _computeSampleBitSet().
1040 * If this is false then a caller of _computeSampleBitSet() can get a similar effect
1041 * with a large minNearMs value.
1042 */
1043 private static final boolean USE_ALL_VOTES = false;
1044
1045 /**Create BitSet of samples of event history to take; never null nor empty.
1046 * Creates a BitSet of length up to SystemVariables.EVENT_SAMPLES_RETAINED slots,
1047 * intended to be used with the EventPeriod.VLONG interval size,
1048 * with index 0 indicating the <em>previous</em> slot (ie no use of the current period).
1049 * <p>
1050 * This includes:
1051 * <ul>
1052 * <li>near-end samples designed to smooth out short (daily/weekly) cycles,
1053 * <li>annual/seasonal look-ahead to predict what may be of interest,
1054 * <li>a random sampling of points throughout the event history for robustness.
1055 * </ul>
1056 *
1057 * @param minNearMs if positive, the minimum number of recent ms that
1058 * the sampling should cover (else an internal minimum will be imposed)
1059 *
1060 * @return non-null, non-empty new private BitSet
1061 */
1062 private static BitSet _createSampleBitSet(final long minNearMs)
1063 {
1064 if(USE_ALL_VOTES)
1065 {
1066 // Sample every available vote history point for maximum accuracy
1067 // at a possible performance cost.
1068 final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1069 whichIntervals.set(0, SystemVariables.EVENT_SAMPLES_RETAINED);
1070 return(whichIntervals);
1071 }
1072
1073 // Number of our longest access periods needed to cover
1074 // "the minimum cycle" to smooth out short-term access patterns.
1075 // Typically at least 7 days to smooth out daily and weekly cycles.
1076 // We assume that our short cycle *does* fit in our data window.
1077 assert(_MIN_VLONG_SAMPLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1078
1079 // Number of intervals in our long cycle,
1080 // typically looking for seasonal patterns.
1081 // Typically 365 days to pick up yearly festivals, seasonal patterns, etc.
1082 // We assume that our long cycle *does* fit in our data window.
1083 assert(_SEASONAL_VLONG_CYCLE_PERIODS < SystemVariables.EVENT_SAMPLES_RETAINED);
1084
1085 // We cut down the number of samples when conserving energy long term.
1086 // This makes our result more noisy, but not biased.
1087 final boolean reduceSampling = GenUtils.mustConservePowerLongTerm();
1088
1089 // Now calculate a set of sample periods to use; index 0 is the previous interval.
1090 // We factor in:
1091 // * a window at the short end to catch recent activity.
1092 // * a window around our long cycle to catch seasonal patterns.
1093 // * a large random sampling of the full window to be robust.
1094 // We take a little more than the minimum window at the short end to help predictive ability.
1095 // We DO NOT include the current interval,
1096 // since it is of variable quality and may be expensive to fetch.
1097 final BitSet whichIntervals = new BitSet(SystemVariables.EVENT_SAMPLES_RETAINED);
1098 // Near-end samples, NOT including the current period.
1099 // The caller can force this to be extended to cover the period they are interested in.
1100 final int blockSampleSize = reduceSampling ? _MIN_VLONG_SAMPLE_PERIODS : (4*_MIN_VLONG_SAMPLE_PERIODS);
1101 whichIntervals.set(0, Math.min(SystemVariables.EVENT_SAMPLES_RETAINED-1,
1102 Math.max(blockSampleSize, (int)(minNearMs/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)) +
1103 1+(MAX_AGE_BEFORE_STALE_MS/SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS)));
1104 // Predictive (we hope) seasonal samples; skew this forward for predictive value.
1105 final int seasonOffset = _SEASONAL_VLONG_CYCLE_PERIODS - _MIN_VLONG_SAMPLE_PERIODS;
1106 whichIntervals.set(seasonOffset, seasonOffset + (reduceSampling ? (2*_MIN_VLONG_SAMPLE_PERIODS) : (3*_MIN_VLONG_SAMPLE_PERIODS)));
1107 // One or more random samples;
1108 // take an extensive sampling of points otherwise not covered.
1109 for(int i = 1+Math.max(blockSampleSize, SystemVariables.EVENT_SAMPLES_RETAINED >> (reduceSampling ? 5 : 2)); --i >= 0; )
1110 { _chooseRandomSlot(Rnd.fastRnd, whichIntervals, true); }
1111 // Chose a new one using a good generator with some real entropy in,
1112 // thus hard to guess (but expensive),
1113 // but only if not conserving power (since we're not going to lose money on this!).
1114 if(!reduceSampling && !GenUtils.mustConservePowerExtreme()) { _chooseRandomSlot(Rnd.goodRnd, whichIntervals, true); }
1115
1116 return(whichIntervals);
1117 }
1118
1119 /**Set a random bit in the BitSet to take a random sample.
1120 * Sets a bit at random.
1121 * <p>
1122 * It is possible (though may be expensive) to insist that a new (previously false)
1123 * bit is set, but only if (much) less than SystemVariables.EVENT_SAMPLES_RETAINED bits
1124 * are already true.
1125 *
1126 * @param rnd random number source; never null
1127 * @param whichIntervals the BitSet to set a random bit in; never null
1128 * @param forceNew if true, ensure that we set a new bit ie one that was false
1129 * unless at least half of all possible bits have already been set
1130 */
1131 private static void _chooseRandomSlot(final Random rnd,
1132 final BitSet whichIntervals,
1133 final boolean forceNew)
1134 {
1135 do
1136 {
1137 final int bitIndex = rnd.nextInt(SystemVariables.EVENT_SAMPLES_RETAINED);
1138 if(forceNew && whichIntervals.get(bitIndex) &&
1139 (whichIntervals.cardinality() < (SystemVariables.EVENT_SAMPLES_RETAINED/2)))
1140 { continue; /* Selected bit already set; try again... */ }
1141
1142 // OK, actually set the bit and stop!
1143 whichIntervals.set(bitIndex);
1144 break;
1145
1146 } while(forceNew);
1147 }
1148
1149 /**Our serial version... */
1150 private static final long serialVersionUID = 8518817446315221597L;
1151
1152 // /**Deserialise. */
1153 // private void readObject(ObjectInputStream in)
1154 // throws IOException, ClassNotFoundException
1155 // {
1156 // in.defaultReadObject();
1157 // validateObject(); // Validate state immediately.
1158 // }
1159
1160 /**Deserialise: use constructor for validation, defensive copying, etc.
1161 * Replace all trivially-stale "empty" values with a single shared value.
1162 * <p>
1163 * <strong>NOTE: this may not always preserve all values as expected in future.</strong>
1164 */
1165 protected Object readResolve()
1166 // throws ObjectStreamException
1167 {
1168 // Eliminate redundant deserialised copies of TRIVIAL_NEUTRAL.
1169 if(isTriviallyStale() && (goodness == 0)) { return(TRIVIAL_NEUTRAL); }
1170
1171 // Construct new instance of object in normal defensive way.
1172 return(new ExhibitPropsComputableMutable(staleAfter, goodness));
1173 }
1174
1175 /**Serialise: replace all trivially-stale values being serialised with TRIVIAL_NEUTRAL.
1176 * We save all other (non-trivially-stale) values as-is, even if stale,
1177 * because they may have taken significant resources to compute
1178 * and may still be much more accurate than any fast approximation we could compute.
1179 * <p>
1180 * <strong>NOTE: this is effectively a LOSSY compression mechanism.</strong>
1181 */
1182 protected Object writeReplace()
1183 // throws ObjectStreamException
1184 {
1185 // Don't bother saving trivially-stale values to the stream
1186 // and replace them all with a single value in the output.
1187 if(isTriviallyStale()) { return(TRIVIAL_NEUTRAL); }
1188
1189 // Serialise/persist this non-trivial value as-is.
1190 return(this);
1191 }
1192
1193 /**Validate fields/state.
1194 * Called in the constructor and possibly after de-serialising.
1195 * <p>
1196 * Barf if something bad is found.
1197 * (Maybe allow some extra info in debug version.)
1198 */
1199 public void validateObject()
1200 throws InvalidObjectException
1201 {
1202 if(staleAfter < 0)
1203 { throw new InvalidObjectException("bad object: invalid 'staleAfter' time"); }
1204 if(goodness < -Integer.MAX_VALUE)
1205 { throw new InvalidObjectException("bad object: invalid 'goodness' value"); }
1206 }
1207 }