001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.ai.scorer;
031    
032    import java.io.IOException;
033    import java.util.Collections;
034    import java.util.Map;
035    import java.util.Set;
036    
037    import org.hd.d.pg2k.svrCore.Name;
038    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
039    import org.hd.d.pg2k.svrCore.Name.ExhibitShort;
040    
041    
042    /**Base interface to compute (and cache) the score and confidence for exhibits.
043     * Note: since the result of this computation may be used in computing
044     * (EPCM) the ExhibitPropsComputableMutable value for an exhibit,
045     * then any implementation of this must avoid forcing recalculation
046     * of any EPCM value to avoid danger of infinite recursion
047     * (other than the static calcVoteFactor() method).
048     * Ideally the value computed will not depend on any EPCM value.
049     * <p>
050     * Methods that take an allowStale parameter
051     * will generally try to reduce compute and I/O effort and increase robustness
052     * at a small-ish cost in currency.
053     * Typically such methods will return data up to about one day old if present in cache,
054     * else the newest available from cache however old it is if there is a problem
055     * computing an up-to-date value (eg because of connectivity issues).
056     */
057    public interface ScorerCacheIF
058        {
059        /**Computes a weighted composite score [-1,+1] and confidence [0,+1] for the specified exhibit with the best available scorers/parameters; never null but may be (0,0).
060         * This is the highest-level available entry to the cache.
061         *
062         * @param exhibitName  valid full exhibit name
063         * @param allowStale  if true then allow a stale value from cache,
064         *     else throw an exception if nothing is currently available
065         *
066         * @return (0,0) if named scorer is not available,
067         *         cannot be used with the specified parameters,
068         *         or cannot be applied to the indicated exhibit
069         *         (eg because of the exhibit type or the exhibit does not exist);
070         *         else returns a non-null ScoreAndConf value
071         */
072        ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName,
073                                                        final boolean allowStale)
074            throws IOException;
075    
076        /**Like computeCompositeScoreAndConfidence() but never forces a new computation and returns only from cache; null if nothing suitable immediately available.
077         * This allows us to take advantage of on any work already done
078         * but without the risk of starting any expensive work.
079         *
080         * @param exhibitName  valid full exhibit name
081         * @param allowStale  if true then allow a stale value from cache,
082         *     else throw an exception if nothing is currently available
083         *
084         * @return null if nothing already available in cache,
085         *         else (0,0) if named scorer is not available,
086         *         cannot be used with the specified parameters,
087         *         or cannot be applied to the indicated exhibit
088         *         (eg because of the exhibit type or the exhibit does not exist);
089         *         else returns a non-null ScoreAndConf value
090         */
091        ScoreAndConf getCachedCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName,
092                                                          final boolean allowStale)
093            throws IOException;
094    
095        /**Computes the raw score [-1,+1] and confidence [0,+1] for the specified exhibit with the specified scorer and parameters; never null but may be (0,0).
096         * This is <em>NOT</em> moderated/weighted in the light of how well the scorer's predictions
097         * match reality; that requires an extra step.
098         *
099         * @param exhibitName  valid full exhibit name
100         * @param scorer  instance of the Scorer; never null
101         * @param allowStale  if true then allow a stale value from cache,
102         *     else throw an exception if nothing is currently available
103         *
104         * @return (0,0) if named scorer is not available,
105         *         cannot be used with the specified parameters,
106         *         or cannot be applied to the indicated exhibit
107         *         (eg because of the exhibit type or the exhibit does not exist);
108         *         else returns a non-null ScoreAndConf value
109         */
110        ScoreAndConf computeUnweightedScoreAndConfidence(final ExhibitFull exhibitName,
111                                                         final ScorerIF scorer,
112                                                         final boolean allowStale)
113            throws IOException;
114    
115        /**ScoreAndConfidence for the given Scorer over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
116         * Essentially the result of this should be multiplied by the result for each exhibit
117         * (for the same scorer and parameters)
118         * to normalise the predicted score and confidence for the exhibit.
119         * <p>
120         * Some results will be cached (typically those from the base/current Scorer lists)
121         * while other may have to be computed each time, which may be slow.
122         * <p>
123         * The source parameter can be used to track which mechanisms are most effective
124         * at generating improvements in the population,
125         * and could, for example, to spend more time on those that are most effective right now.
126         *
127         * @param scorerNameAndParameters  the name and parameters of the scorer; never null
128         * @param allowStale  if true then allow a stale or low-confidence value from cache,
129         *     else throw an exception if nothing is currently available
130         *     and we cannot quickly compute enough points to increase our confidence
131         * @param source  the name of the mechanism used to generate this Scorer value,
132         *     or null if none
133         *
134         * @return  the score represents the correlation with the underlying votes
135         *     (and whatever the scoring is measured against)
136         *     with MAX meaning perfect correlation, 0 meaning no correlation,
137         *     and -MAX meaning perfectly wrong answers all the time,
138         *     and the confidence 0 if we have no (or very/too few) data points
139         *     and approaching MAX as we have a large (enough) number of data points
140         */
141        ScoreAndConf computeScorerWeighting(final String scorerNameAndParameters,
142                                            final boolean allowStale,
143                                            final String source)
144            throws IOException;
145    
146        /**ScoreAndConfidence for the given Scorer over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
147         * Essentially the result of this should be multiplied by the result for each exhibit
148         * (for the same scorer and parameters)
149         * to normalise the predicted score and confidence for the exhibit.
150         * <p>
151         * Some results will be cached (typically those from the base/current Scorer lists)
152         * while other may have to be computed each time, which may be slow.
153         * <p>
154         * The source parameter can be used to track which mechanisms are most effective
155         * at generating improvements in the population,
156         * and could, for example, to spend more time on those that are most effective right now.
157         * <p>
158         * The main value of this variant over the String 'name-and-parameters' method
159         * is that the Scorer instance supplied is used,
160         * which avoids constructing a new instance and may enable state to be shared better.
161         *
162         * @param scorer  instance of the Scorer; never null
163         * @param allowStale  if true then allow a stale or low-confidence value from cache,
164         *     else throw an exception if nothing is currently available
165         *     and we cannot quickly compute enough points to increase our confidence
166         * @param source  the name of the mechanism used to generate this Scorer value,
167         *     or null if none
168         *
169         * @return  the score represents the correlation with the underlying votes
170         *     (and whatever the scoring is measured against)
171         *     with MAX meaning perfect correlation, 0 meaning no correlation,
172         *     and -MAX meaning perfectly wrong answers all the time,
173         *     and the confidence 0 if we have no (or very/too few) data points
174         *     and approaching MAX as we have a large (enough) number of data points
175         */
176        ScoreAndConf computeScorerWeighting(final ScorerIF scorer,
177                                            final boolean allowStale,
178                                            final String source)
179            throws IOException;
180    
181        /**Base set of available Scorers' names (no parameters); never null but may be empty.
182         * The values returned are of the form <i>ScorerName</i>.
183         */
184        Set<String> getBaseScorersWithoutParameters();
185    
186        /**Get base non-parameterised Scorer by name; null if no such base Scorer supported.
187         * @param  baseName  base (no parameters) name of Scorer; must not be null
188         */
189        ScorerIF getBaseScorerByName(String baseName);
190    
191        /**Current set of available Scorers name and parameters (where applicable); never null but may be empty.
192         * The values returned are of the form <i>ScorerName{:name=value}*</i>.
193         * <p>
194         * The scorers returned by this will generally be the best available,
195         * usually the best one or two per base-Scorer type,
196         * allowing in particular that different Scorer types may have different domains.
197         * <p>
198         * This call should not be desperately expensive,
199         * eg will not generally be doing any evolution/scoring,
200         * but may not be very quick/cheap either as it may require some search and sort.
201         */
202        Set<String> getCurrentScorersWithParameters(final boolean allowStale);
203    
204        /**Compute exemplar exhibit sub-set to calibrate Scorers with given base name against; never null but may be empty.
205         * These may be exhibits for which we get particularly good or bad predictions,
206         * or a random sub-set.
207         * <p>
208         * An implementation may return an empty result (not null) if it cannot compute this value.
209         * <p>
210         * It may be possible to tune or pre-test new Scorers against the results of this
211         * as a fast filter.
212         * <p>
213         * If a base name is specified that is invalid, it is treated as if null.
214         * @param baseName  base name of Scorer to extract calibration set for,
215         *     or null for a generic all-Scorers calibration set
216         * @param maxSamples  the maximum number of samples to return; strictly positive
217         * @param difficult  if TRUE the return the difficult cases that we do not predict well,
218         *     if FALSE then return the easy cases that we predict well,
219         *     else return a mixure of good, bad, and other random cases
220         * @param allowStale  if true then allow slightly older data for speed and robustness
221         *
222         * @return map of zero-or-more exhibits (short names) to calibration-data values; non-null
223         */
224        Map<ExhibitShort, ScoreAndConf> extractCalibrationSet(String baseName, int maxSamples, Boolean difficult, boolean allowStale)
225            throws IOException;
226    
227        /**Get the current population size; non-negative. */
228        int size();
229    
230        /**Get Scorer instance given the Scorer{:value=name}* format; null if no such Scorer available.
231         * Any instance returned may be a shared/cached instance rather than a new instance.
232         */
233        ScorerIF getScorerInstance(final String nameAndParameters);
234    
235        /**Non-blocking attempt to queue an externally-supplied Scorer value; returns true if accepted.
236         * The input values must be fully validated and canonicalised before be used,
237         * but it is useful if some minimal screening is done before submitting values to this routine
238         * to prevent (for example) pointless excessive resource consumption.
239         * <p>
240         * A typical implementation that accepts inbound Scorer values would bound this with
241         * a bounded-size non-blocking queue.
242         * <p>
243         * A given implementation may always return false (ie never accept inbound Scorer values).
244         */
245        boolean offerExternalScorer(String externalScorerNameAndParameters);
246    
247        /**Returns true if this cache can definitely accept (many) more externally-supplied Scorer values.
248         * Even if this returns false we may in practice be able to accept
249         * one or more new values: this is indicative.
250         * <p>
251         * An implementation that cannot accept any external Scorer values must always return false.
252         * <p>
253         * Typically this is true if any internal bounded-size queue has a lot of space left,
254         * eg is half empty, but this is not a guarantee that another value will actually be accepted.
255         */
256        boolean canAcceptMoreExternalScorers();
257    
258        /**Returns true if at least once external Scorer is queued waiting to be processed. */
259        boolean hasQueuedExternalScorer();
260    
261        /**Called/polled periodically (of the order of 1Hz) to do donkey-work and background tasks.
262         * In particular, this call drives the search for improved Scorers,
263         * as well as performing housekeeping and maintaining caches to speed foreground tasks.
264         * <p>
265         * This launches its work in a low-priority daemon thread,
266         * and limits the number of such concurrent work threads globally
267         * by silently discarding any excess,
268         * ie this call always returns quickly.
269         * <p>
270         * This routine should not be called (often) if the host system is under heavy load.
271         */
272        void poll() throws IOException;
273    
274        /**Save work-in-progress if possible, and free up resources, ASAP.
275         * This may enable us to reduce work lost during a graceful system shutdown,
276         * but many shutdowns may not be graceful and so we should incrementally save/checkpoint too.
277         * <p>
278         * By default does nothing.
279         */
280        void destroy();
281    
282    
283        /**Trivial implementation that always returns "no comment"/empty/null values; non-null. */
284        public static final ScorerCacheIF TRIVIAL = new ScorerCacheIF() {
285            public ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName, final boolean allowStale) { return(ScoreAndConf.NO_OPINION); }
286            public ScoreAndConf getCachedCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName, final boolean allowStale) { return(null); }
287            public ScoreAndConf computeScorerWeighting(final String scorerNameAndParameters, final boolean allowStale, final String source)  { return(ScoreAndConf.NO_OPINION); }
288            public ScoreAndConf computeScorerWeighting(final ScorerIF scorer, final boolean allowStale, final String source)  { return(ScoreAndConf.NO_OPINION); }
289            public ScoreAndConf computeUnweightedScoreAndConfidence(final ExhibitFull exhibitName, final ScorerIF scorer, final boolean allowStale) { return(ScoreAndConf.NO_OPINION); }
290            public Set<String> getBaseScorersWithoutParameters() { return(Collections.emptySet()); }
291            public ScorerIF getBaseScorerByName(final String baseName) { return(null); }
292            public Set<String> getCurrentScorersWithParameters(final boolean allowStale) { return(Collections.emptySet()); }
293            public Map<ExhibitShort, ScoreAndConf> extractCalibrationSet(final String baseName, final int maxSamples, final Boolean difficult, final boolean allowStale) { return(Collections.emptyMap()); }
294            public int size() { return(0); }
295            public ScorerIF getScorerInstance(final String nameAndParameters) { return(null); }
296            public boolean offerExternalScorer(final String externalScorerNameAndParameters) { return(false); }
297            public boolean canAcceptMoreExternalScorers() { return(false); }
298            public boolean hasQueuedExternalScorer() { return(false); }
299            public void poll() { }
300            public void destroy() { }
301            };
302        }