001    /*
002    Copyright (c) 1996-2009, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.ai.scorer;
031    
032    import java.io.IOException;
033    import java.util.Collections;
034    import java.util.Map;
035    import java.util.Set;
036    
037    import org.hd.d.pg2k.svrCore.Name;
038    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
039    import org.hd.d.pg2k.svrCore.Name.ExhibitShort;
040    
041    
042    /**Base interface to compute (and cache) the score and confidence for exhibits.
043     * Note: since the result of this computation may be used in computing
044     * (EPCM) the ExhibitPropsComputableMutable value for an exhibit,
045     * then any implementation of this must avoid forcing recalculation
046     * of any EPCM value to avoid danger of infinite recursion
047     * (other than the static calcVoteFactor() method).
048     * Ideally the value computed will not depend on any EPCM value.
049     * <p>
050     * Methods that take an allowStale parameter
051     * will generally try to reduce compute and I/O effort and increase robustness
052     * at a small-ish cost in currency.
053     * Typically such methods will return data up to about one day old if present in cache,
054     * else the newest available from cache however old it is if there is a problem
055     * computing an up-to-date value (eg because of connectivity issues).
056     */
057    public interface ScorerCacheIF
058        {
059        /**Computes a weighted composite score [-1,+1] and confidence [0,+1] for the specified exhibit with the best available scorers/parameters; never null but may be (0,0).
060         * This is the highest-level available entry to the cache.
061         *
062         * @param exhibitName  valid full exhibit name
063         * @param allowStale  if true then allow a stale value from cache,
064         *     else throw an exception if nothing is currently available
065         *
066         * @return (0,0) if named scorer is not available,
067         *         cannot be used with the specified parameters,
068         *         or cannot be applied to the indicated exhibit
069         *         (eg because of the exhibit type or the exhibit does not exist);
070         *         else returns a non-null ScoreAndConf value
071         */
072        ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName,
073                                                        final boolean allowStale)
074            throws IOException;
075    
076        /**Computes the raw score [-1,+1] and confidence [0,+1] for the specified exhibit with the specified scorer and parameters; never null but may be (0,0).
077         * This is <em>NOT</em> moderated/weighted in the light of how well the scorer's predictions
078         * match reality; that requires an extra step.
079         *
080         * @param exhibitName  valid full exhibit name
081         * @param scorer  instance of the Scorer; never null
082         * @param allowStale  if true then allow a stale value from cache,
083         *     else throw an exception if nothing is currently available
084         *
085         * @return (0,0) if named scorer is not available,
086         *         cannot be used with the specified parameters,
087         *         or cannot be applied to the indicated exhibit
088         *         (eg because of the exhibit type or the exhibit does not exist);
089         *         else returns a non-null ScoreAndConf value
090         */
091        ScoreAndConf computeUnweightedScoreAndConfidence(final ExhibitFull exhibitName,
092                                                         final ScorerIF scorer,
093                                                         final boolean allowStale)
094            throws IOException;
095    
096        /**ScoreAndConfidence for the given Scorer over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
097         * Essentially the result of this should be multiplied by the result for each exhibit
098         * (for the same scorer and parameters)
099         * to normalise the predicted score and confidence for the exhibit.
100         * <p>
101         * Some results will be cached (typically those from the base/current Scorer lists)
102         * while other may have to be computed each time, which may be slow.
103         * <p>
104         * The source parameter can be used to track which mechanisms are most effective
105         * at generating improvements in the population,
106         * and could, for example, to spend more time on those that are most effective right now.
107         *
108         * @param scorerNameAndParameters  the name and parameters of the scorer; never null
109         * @param allowStale  if true then allow a stale or low-confidence value from cache,
110         *     else throw an exception if nothing is currently available
111         *     and we cannot quickly compute enough points to increase our confidence
112         * @param source  the name of the mechanism used to generate this Scorer value,
113         *     or null if none
114         *
115         * @return  the score represents the correlation with the underlying votes
116         *     (and whatever the scoring is measured against)
117         *     with MAX meaning perfect correlation, 0 meaning no correlation,
118         *     and -MAX meaning perfectly wrong answers all the time,
119         *     and the confidence 0 if we have no (or very/too few) data points
120         *     and approaching MAX as we have a large (enough) number of data points
121         */
122        ScoreAndConf computeScorerWeighting(final String scorerNameAndParameters,
123                                            final boolean allowStale,
124                                            final String source)
125            throws IOException;
126    
127        /**ScoreAndConfidence for the given Scorer over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
128         * Essentially the result of this should be multiplied by the result for each exhibit
129         * (for the same scorer and parameters)
130         * to normalise the predicted score and confidence for the exhibit.
131         * <p>
132         * Some results will be cached (typically those from the base/current Scorer lists)
133         * while other may have to be computed each time, which may be slow.
134         * <p>
135         * The source parameter can be used to track which mechanisms are most effective
136         * at generating improvements in the population,
137         * and could, for example, to spend more time on those that are most effective right now.
138         * <p>
139         * The main value of this variant over the String 'name-and-parameters' method
140         * is that the Scorer instance supplied is used,
141         * which avoids constructing a new instance and may enable state to be shared better.
142         *
143         * @param scorer  instance of the Scorer; never null
144         * @param allowStale  if true then allow a stale or low-confidence value from cache,
145         *     else throw an exception if nothing is currently available
146         *     and we cannot quickly compute enough points to increase our confidence
147         * @param source  the name of the mechanism used to generate this Scorer value,
148         *     or null if none
149         *
150         * @return  the score represents the correlation with the underlying votes
151         *     (and whatever the scoring is measured against)
152         *     with MAX meaning perfect correlation, 0 meaning no correlation,
153         *     and -MAX meaning perfectly wrong answers all the time,
154         *     and the confidence 0 if we have no (or very/too few) data points
155         *     and approaching MAX as we have a large (enough) number of data points
156         */
157        ScoreAndConf computeScorerWeighting(final ScorerIF scorer,
158                                            final boolean allowStale,
159                                            final String source)
160            throws IOException;
161    
162        /**Base set of available Scorers' names (no parameters); never null but may be empty.
163         * The values returned are of the form <i>ScorerName</i>.
164         */
165        Set<String> getBaseScorersWithoutParameters();
166    
167        /**Get base non-parameterised Scorer by name; null if no such base Scorer supported.
168         * @param  baseName  base (no parameters) name of Scorer; must not be null
169         */
170        ScorerIF getBaseScorerByName(String baseName);
171    
172        /**Current set of available Scorers name and parameters (where applicable); never null but may be empty.
173         * The values returned are of the form <i>ScorerName{:name=value}*</i>.
174         * <p>
175         * The scorers returned by this will generally be the best available,
176         * usually the best one or two per base-Scorer type,
177         * allowing in particular that different Scorer types may have different domains.
178         * <p>
179         * This call should not be desperately expensive,
180         * eg will not generally be doing any evolution/scoring,
181         * but may not be very quick/cheap either as it may require some search and sort.
182         */
183        Set<String> getCurrentScorersWithParameters(final boolean allowStale);
184    
185        /**Compute exemplar exhibit sub-set to calibrate Scorers with given base name against; never null but may be empty.
186         * These may be exhibits for which we get particularly good or bad predictions,
187         * or a random sub-set.
188         * <p>
189         * An implementation may return an empty result (not null) if it cannot compute this value.
190         * <p>
191         * It may be possible to tune or pre-test new Scorers against the results of this
192         * as a fast filter.
193         * <p>
194         * If a base name is specified that is invalid, it is treated as if null.
195         * @param baseName  base name of Scorer to extract calibration set for,
196         *     or null for a generic all-Scorers calibration set
197         * @param maxSamples  the maximum number of samples to return; strictly positive
198         * @param difficult  if TRUE the return the difficult cases that we do not predict well,
199         *     if FALSE then return the easy cases that we predict well,
200         *     else return a mixure of good, bad, and other random cases
201         * @param allowStale  if true then allow slightly older data for speed and robustness
202         *
203         * @return map of zero-or-more exhibits (short names) to calibration-data values; non-null
204         */
205        Map<ExhibitShort, ScoreAndConf> extractCalibrationSet(String baseName, int maxSamples, Boolean difficult, boolean allowStale)
206            throws IOException;
207    
208        /**Get the current population size; non-negative. */
209        int size();
210    
211        /**Get Scorer instance given the Scorer{:value=name}* format; null if no such Scorer available.
212         * Any instance returned may be a shared/cached instance rather than a new instance.
213         */
214        ScorerIF getScorerInstance(final String nameAndParameters);
215    
216        /**Non-blocking attempt to queue an externally-supplied Scorer value; returns true if accepted.
217         * The input values must be fully validated and canonicalised before be used,
218         * but it is useful if some minimal screening is done before submitting values to this routine
219         * to prevent (for example) pointless excessive resource consumption.
220         * <p>
221         * A typical implementation that accepts inbound Scorer values would bound this with
222         * a bounded-size non-blocking queue.
223         * <p>
224         * A given implementation may always return false (ie never accept inbound Scorer values).
225         */
226        boolean offerExternalScorer(String externalScorerNameAndParameters);
227    
228        /**Returns true if this cache can definitely accept (many) more externally-supplied Scorer values.
229         * Even if this returns false we may in practice be able to accept
230         * one or more new values: this is indicative.
231         * <p>
232         * An implementation that cannot accept any external Scorer values must always return false.
233         * <p>
234         * Typically this is true if any internal bounded-size queue has a lot of space left,
235         * eg is half empty, but this is not a guarantee that another value will actually be accepted.
236         */
237        boolean canAcceptMoreExternalScorers();
238    
239        /**Returns true if at least once external Scorer is queued waiting to be processed. */
240        boolean hasQueuedExternalScorer();
241    
242    
243        /**Trivial implementation that always returns "no comment"/empty/null values; non-null. */
244        public static final ScorerCacheIF TRIVIAL = new ScorerCacheIF() {
245            public ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName, final boolean allowStale) { return(ScoreAndConf.NO_OPINION); }
246            public ScoreAndConf computeScorerWeighting(final String scorerNameAndParameters, final boolean allowStale, final String source)  { return(ScoreAndConf.NO_OPINION); }
247            public ScoreAndConf computeScorerWeighting(final ScorerIF scorer, final boolean allowStale, final String source)  { return(ScoreAndConf.NO_OPINION); }
248            public ScoreAndConf computeUnweightedScoreAndConfidence(final ExhibitFull exhibitName, final ScorerIF scorer, final boolean allowStale) { return(ScoreAndConf.NO_OPINION); }
249            public Set<String> getBaseScorersWithoutParameters() { return(Collections.emptySet()); }
250            public ScorerIF getBaseScorerByName(final String baseName) { return(null); }
251            public Set<String> getCurrentScorersWithParameters(final boolean allowStale) { return(Collections.emptySet()); }
252            public Map<ExhibitShort, ScoreAndConf> extractCalibrationSet(final String baseName, final int maxSamples, final Boolean difficult, final boolean allowStale) { return(Collections.emptyMap()); }
253            public int size() { return(0); }
254            public ScorerIF getScorerInstance(final String nameAndParameters) { return(null); }
255            public boolean offerExternalScorer(final String externalScorerNameAndParameters) { return(false); }
256            public boolean canAcceptMoreExternalScorers() { return(false); }
257            public boolean hasQueuedExternalScorer() { return(false); }
258            };
259        }