001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.ai.scorer;
031
032 import java.io.IOException;
033 import java.util.Collections;
034 import java.util.Map;
035 import java.util.Set;
036
037 import org.hd.d.pg2k.svrCore.Name;
038 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
039 import org.hd.d.pg2k.svrCore.Name.ExhibitShort;
040
041
042 /**Base interface to compute (and cache) the score and confidence for exhibits.
043 * Note: since the result of this computation may be used in computing
044 * (EPCM) the ExhibitPropsComputableMutable value for an exhibit,
045 * then any implementation of this must avoid forcing recalculation
046 * of any EPCM value to avoid danger of infinite recursion
047 * (other than the static calcVoteFactor() method).
048 * Ideally the value computed will not depend on any EPCM value.
049 * <p>
050 * Methods that take an allowStale parameter
051 * will generally try to reduce compute and I/O effort and increase robustness
052 * at a small-ish cost in currency.
053 * Typically such methods will return data up to about one day old if present in cache,
054 * else the newest available from cache however old it is if there is a problem
055 * computing an up-to-date value (eg because of connectivity issues).
056 */
057 public interface ScorerCacheIF
058 {
059 /**Computes a weighted composite score [-1,+1] and confidence [0,+1] for the specified exhibit with the best available scorers/parameters; never null but may be (0,0).
060 * This is the highest-level available entry to the cache.
061 *
062 * @param exhibitName valid full exhibit name
063 * @param allowStale if true then allow a stale value from cache,
064 * else throw an exception if nothing is currently available
065 *
066 * @return (0,0) if named scorer is not available,
067 * cannot be used with the specified parameters,
068 * or cannot be applied to the indicated exhibit
069 * (eg because of the exhibit type or the exhibit does not exist);
070 * else returns a non-null ScoreAndConf value
071 */
072 ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName,
073 final boolean allowStale)
074 throws IOException;
075
076 /**Like computeCompositeScoreAndConfidence() but never forces a new computation and returns only from cache; null if nothing suitable immediately available.
077 * This allows us to take advantage of on any work already done
078 * but without the risk of starting any expensive work.
079 *
080 * @param exhibitName valid full exhibit name
081 * @param allowStale if true then allow a stale value from cache,
082 * else throw an exception if nothing is currently available
083 *
084 * @return null if nothing already available in cache,
085 * else (0,0) if named scorer is not available,
086 * cannot be used with the specified parameters,
087 * or cannot be applied to the indicated exhibit
088 * (eg because of the exhibit type or the exhibit does not exist);
089 * else returns a non-null ScoreAndConf value
090 */
091 ScoreAndConf getCachedCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName,
092 final boolean allowStale)
093 throws IOException;
094
095 /**Computes the raw score [-1,+1] and confidence [0,+1] for the specified exhibit with the specified scorer and parameters; never null but may be (0,0).
096 * This is <em>NOT</em> moderated/weighted in the light of how well the scorer's predictions
097 * match reality; that requires an extra step.
098 *
099 * @param exhibitName valid full exhibit name
100 * @param scorer instance of the Scorer; never null
101 * @param allowStale if true then allow a stale value from cache,
102 * else throw an exception if nothing is currently available
103 *
104 * @return (0,0) if named scorer is not available,
105 * cannot be used with the specified parameters,
106 * or cannot be applied to the indicated exhibit
107 * (eg because of the exhibit type or the exhibit does not exist);
108 * else returns a non-null ScoreAndConf value
109 */
110 ScoreAndConf computeUnweightedScoreAndConfidence(final ExhibitFull exhibitName,
111 final ScorerIF scorer,
112 final boolean allowStale)
113 throws IOException;
114
115 /**ScoreAndConfidence for the given Scorer over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
116 * Essentially the result of this should be multiplied by the result for each exhibit
117 * (for the same scorer and parameters)
118 * to normalise the predicted score and confidence for the exhibit.
119 * <p>
120 * Some results will be cached (typically those from the base/current Scorer lists)
121 * while other may have to be computed each time, which may be slow.
122 * <p>
123 * The source parameter can be used to track which mechanisms are most effective
124 * at generating improvements in the population,
125 * and could, for example, to spend more time on those that are most effective right now.
126 *
127 * @param scorerNameAndParameters the name and parameters of the scorer; never null
128 * @param allowStale if true then allow a stale or low-confidence value from cache,
129 * else throw an exception if nothing is currently available
130 * and we cannot quickly compute enough points to increase our confidence
131 * @param source the name of the mechanism used to generate this Scorer value,
132 * or null if none
133 *
134 * @return the score represents the correlation with the underlying votes
135 * (and whatever the scoring is measured against)
136 * with MAX meaning perfect correlation, 0 meaning no correlation,
137 * and -MAX meaning perfectly wrong answers all the time,
138 * and the confidence 0 if we have no (or very/too few) data points
139 * and approaching MAX as we have a large (enough) number of data points
140 */
141 ScoreAndConf computeScorerWeighting(final String scorerNameAndParameters,
142 final boolean allowStale,
143 final String source)
144 throws IOException;
145
146 /**ScoreAndConfidence for the given Scorer over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
147 * Essentially the result of this should be multiplied by the result for each exhibit
148 * (for the same scorer and parameters)
149 * to normalise the predicted score and confidence for the exhibit.
150 * <p>
151 * Some results will be cached (typically those from the base/current Scorer lists)
152 * while other may have to be computed each time, which may be slow.
153 * <p>
154 * The source parameter can be used to track which mechanisms are most effective
155 * at generating improvements in the population,
156 * and could, for example, to spend more time on those that are most effective right now.
157 * <p>
158 * The main value of this variant over the String 'name-and-parameters' method
159 * is that the Scorer instance supplied is used,
160 * which avoids constructing a new instance and may enable state to be shared better.
161 *
162 * @param scorer instance of the Scorer; never null
163 * @param allowStale if true then allow a stale or low-confidence value from cache,
164 * else throw an exception if nothing is currently available
165 * and we cannot quickly compute enough points to increase our confidence
166 * @param source the name of the mechanism used to generate this Scorer value,
167 * or null if none
168 *
169 * @return the score represents the correlation with the underlying votes
170 * (and whatever the scoring is measured against)
171 * with MAX meaning perfect correlation, 0 meaning no correlation,
172 * and -MAX meaning perfectly wrong answers all the time,
173 * and the confidence 0 if we have no (or very/too few) data points
174 * and approaching MAX as we have a large (enough) number of data points
175 */
176 ScoreAndConf computeScorerWeighting(final ScorerIF scorer,
177 final boolean allowStale,
178 final String source)
179 throws IOException;
180
181 /**Base set of available Scorers' names (no parameters); never null but may be empty.
182 * The values returned are of the form <i>ScorerName</i>.
183 */
184 Set<String> getBaseScorersWithoutParameters();
185
186 /**Get base non-parameterised Scorer by name; null if no such base Scorer supported.
187 * @param baseName base (no parameters) name of Scorer; must not be null
188 */
189 ScorerIF getBaseScorerByName(String baseName);
190
191 /**Current set of available Scorers name and parameters (where applicable); never null but may be empty.
192 * The values returned are of the form <i>ScorerName{:name=value}*</i>.
193 * <p>
194 * The scorers returned by this will generally be the best available,
195 * usually the best one or two per base-Scorer type,
196 * allowing in particular that different Scorer types may have different domains.
197 * <p>
198 * This call should not be desperately expensive,
199 * eg will not generally be doing any evolution/scoring,
200 * but may not be very quick/cheap either as it may require some search and sort.
201 */
202 Set<String> getCurrentScorersWithParameters(final boolean allowStale);
203
204 /**Compute exemplar exhibit sub-set to calibrate Scorers with given base name against; never null but may be empty.
205 * These may be exhibits for which we get particularly good or bad predictions,
206 * or a random sub-set.
207 * <p>
208 * An implementation may return an empty result (not null) if it cannot compute this value.
209 * <p>
210 * It may be possible to tune or pre-test new Scorers against the results of this
211 * as a fast filter.
212 * <p>
213 * If a base name is specified that is invalid, it is treated as if null.
214 * @param baseName base name of Scorer to extract calibration set for,
215 * or null for a generic all-Scorers calibration set
216 * @param maxSamples the maximum number of samples to return; strictly positive
217 * @param difficult if TRUE the return the difficult cases that we do not predict well,
218 * if FALSE then return the easy cases that we predict well,
219 * else return a mixure of good, bad, and other random cases
220 * @param allowStale if true then allow slightly older data for speed and robustness
221 *
222 * @return map of zero-or-more exhibits (short names) to calibration-data values; non-null
223 */
224 Map<ExhibitShort, ScoreAndConf> extractCalibrationSet(String baseName, int maxSamples, Boolean difficult, boolean allowStale)
225 throws IOException;
226
227 /**Get the current population size; non-negative. */
228 int size();
229
230 /**Get Scorer instance given the Scorer{:value=name}* format; null if no such Scorer available.
231 * Any instance returned may be a shared/cached instance rather than a new instance.
232 */
233 ScorerIF getScorerInstance(final String nameAndParameters);
234
235 /**Non-blocking attempt to queue an externally-supplied Scorer value; returns true if accepted.
236 * The input values must be fully validated and canonicalised before be used,
237 * but it is useful if some minimal screening is done before submitting values to this routine
238 * to prevent (for example) pointless excessive resource consumption.
239 * <p>
240 * A typical implementation that accepts inbound Scorer values would bound this with
241 * a bounded-size non-blocking queue.
242 * <p>
243 * A given implementation may always return false (ie never accept inbound Scorer values).
244 */
245 boolean offerExternalScorer(String externalScorerNameAndParameters);
246
247 /**Returns true if this cache can definitely accept (many) more externally-supplied Scorer values.
248 * Even if this returns false we may in practice be able to accept
249 * one or more new values: this is indicative.
250 * <p>
251 * An implementation that cannot accept any external Scorer values must always return false.
252 * <p>
253 * Typically this is true if any internal bounded-size queue has a lot of space left,
254 * eg is half empty, but this is not a guarantee that another value will actually be accepted.
255 */
256 boolean canAcceptMoreExternalScorers();
257
258 /**Returns true if at least once external Scorer is queued waiting to be processed. */
259 boolean hasQueuedExternalScorer();
260
261 /**Called/polled periodically (of the order of 1Hz) to do donkey-work and background tasks.
262 * In particular, this call drives the search for improved Scorers,
263 * as well as performing housekeeping and maintaining caches to speed foreground tasks.
264 * <p>
265 * This launches its work in a low-priority daemon thread,
266 * and limits the number of such concurrent work threads globally
267 * by silently discarding any excess,
268 * ie this call always returns quickly.
269 * <p>
270 * This routine should not be called (often) if the host system is under heavy load.
271 */
272 void poll() throws IOException;
273
274 /**Save work-in-progress if possible, and free up resources, ASAP.
275 * This may enable us to reduce work lost during a graceful system shutdown,
276 * but many shutdowns may not be graceful and so we should incrementally save/checkpoint too.
277 * <p>
278 * By default does nothing.
279 */
280 void destroy();
281
282
283 /**Trivial implementation that always returns "no comment"/empty/null values; non-null. */
284 public static final ScorerCacheIF TRIVIAL = new ScorerCacheIF() {
285 public ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName, final boolean allowStale) { return(ScoreAndConf.NO_OPINION); }
286 public ScoreAndConf getCachedCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName, final boolean allowStale) { return(null); }
287 public ScoreAndConf computeScorerWeighting(final String scorerNameAndParameters, final boolean allowStale, final String source) { return(ScoreAndConf.NO_OPINION); }
288 public ScoreAndConf computeScorerWeighting(final ScorerIF scorer, final boolean allowStale, final String source) { return(ScoreAndConf.NO_OPINION); }
289 public ScoreAndConf computeUnweightedScoreAndConfidence(final ExhibitFull exhibitName, final ScorerIF scorer, final boolean allowStale) { return(ScoreAndConf.NO_OPINION); }
290 public Set<String> getBaseScorersWithoutParameters() { return(Collections.emptySet()); }
291 public ScorerIF getBaseScorerByName(final String baseName) { return(null); }
292 public Set<String> getCurrentScorersWithParameters(final boolean allowStale) { return(Collections.emptySet()); }
293 public Map<ExhibitShort, ScoreAndConf> extractCalibrationSet(final String baseName, final int maxSamples, final Boolean difficult, final boolean allowStale) { return(Collections.emptyMap()); }
294 public int size() { return(0); }
295 public ScorerIF getScorerInstance(final String nameAndParameters) { return(null); }
296 public boolean offerExternalScorer(final String externalScorerNameAndParameters) { return(false); }
297 public boolean canAcceptMoreExternalScorers() { return(false); }
298 public boolean hasQueuedExternalScorer() { return(false); }
299 public void poll() { }
300 public void destroy() { }
301 };
302 }