001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.ai.scorer;
031    
032    import java.io.IOException;
033    import java.util.Collections;
034    import java.util.HashMap;
035    import java.util.Map;
036    import java.util.Set;
037    
038    import org.hd.d.pg2k.ai.scorer.parameterised.LocalSampler;
039    import org.hd.d.pg2k.svrCore.AllExhibitImmutableData;
040    import org.hd.d.pg2k.svrCore.Name;
041    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
042    import org.hd.d.pg2k.svrCore.Name.ExhibitShort;
043    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
044    import org.hd.d.pg2k.svrCore.Tuple.Pair;
045    import org.hd.d.pg2k.svrCore.datasource.SimpleExhibitPipelineIF;
046    
047    /**"Lite" implementation to compute (and cache) the score and confidence for exhibits.
048     * Used were access to the full data pipeline is not available or too expensive,
049     * and where memory and CPU resources are potentially scarce,
050     * for example in an applet or JWS app run by a user to donate CPU time.
051     * <p>
052     * This caps its use of resources, population size, etc, at smaller levels
053     * than a "full" implementation might.
054     * <p>
055     * This only handles the most promising sub-set of "real" parameterised Scorers.
056     */
057    public final class MiniScorerCacheImpl extends AbstractScorerCache implements ScorerCacheIF
058        {
059        /**Construct an instance attached to the supplied data source and logger.
060         * @param dataSource  (probably-restricted) live data source; must not be null
061         */
062        public MiniScorerCacheImpl(final SimpleExhibitPipelineIF dataSource,
063                                   final SimpleLoggerIF log,
064                                   final ScorerPopulation.NewBestCallbackIF callback)
065            {
066            super(new ScorerPopulation(fixedSimpleScorers,
067                                       callback,
068                                       MAX_MINI_SCORER_SCORES_RETAINED),
069                  dataSource,
070                  log);
071            }
072    
073        /**Maximum number of Scorers retained in the population; strictly positive.
074         * Small enough not to be too burdensome in memory for an applet, for example.
075         * <p>
076         * Much more than 1000 may cause out-of-memory difficulties.
077         */
078        public static final int MAX_MINI_SCORER_SCORES_RETAINED = (1 << 10);
079    
080        /**Immutable map from short exhibit name to calibration data; never null but may be empty.
081         * Marked as volatile to allow atomic replacement of the entire map,
082         * but each map instance is immutable.
083         * <p>
084         * Initially empty.
085         */
086        volatile Map<Name.ExhibitShort, ScoreAndConf> calibrationData = Collections.emptyMap();
087    
088        /**Indicate the set of exhibits (short names) to calibrate against and their score/confidence values.
089         * Used by computeScorerWeighting() and thus computeScorerWeighting()
090         * to calibrate the Scorers.
091         * <p>
092         * This restricts the work that has to be done for calibration
093         * (and the data that need be fetched, which may be important for some users),
094         * and supplies calibration data that may not be available over a restricted pipeline.
095         * <p>
096         * Any exhibit data and metadata required will be fetched from the pipeline.
097         *
098         * @param  calibrationData  map from full valid exhibit names to non-null calibration data;
099         *     never null, though may be empty
100         */
101        public void setCalibrationExhibitsAndScores(final Map<Name.ExhibitShort, ScoreAndConf> calibrationData)
102            {
103            if(calibrationData == null) { throw new IllegalArgumentException(); }
104            // Create new immutable map with copy of input data for safety.
105            final Map<Name.ExhibitShort, ScoreAndConf> m = Collections.unmodifiableMap(new HashMap<Name.ExhibitShort, ScoreAndConf>(calibrationData));
106            // Validate the map keys, at least for syntax.
107    //        for(final Name.ExhibitShort n : m.keySet())
108    //            { if(!ExhibitName.validNameFinalComponentSyntax(n)) { throw new IllegalArgumentException(); } }
109            // Having validated the calibration data, store it atomically.
110            this.calibrationData = m;
111            }
112    
113        /**Get the set of exhibits (short names) to calibrate against and their score/confidence values; never null. */
114        public Map<Name.ExhibitShort, ScoreAndConf> getCalibrationExhibitsAndScores()
115            { return(calibrationData); }
116    
117    
118        /**The (small) immutable current fixed set of parameterless base Scorer instances.
119         * Note that since these are fixed and immutable and have no parameters,
120         * we can safely create and store instances here.
121         * <p>
122         * These include only the live Scorers that we want end-users to help search for.
123         * It is possible that we might allow this to be updated remotely
124         */
125        public static final Map<String, ScorerIF> fixedSimpleScorers;
126        /**Initialise fixedSimpleScorers. */
127        static
128            {
129            final ScorerIF simpleScorers[] =
130                {
131                new LocalSampler(),
132                };
133            // We use a HashMap for speed of lookup.
134            final HashMap<String, ScorerIF> m = new HashMap<String, ScorerIF>(2 * simpleScorers.length);
135            for(final ScorerIF sc : simpleScorers)
136                { m.put(sc.getBaseName(), sc); }
137            fixedSimpleScorers = Collections.unmodifiableMap(m); // Make read-only.
138            }
139    
140    
141        /**Computes a weighted composite score [-1,+1] and confidence [0,+1] for the specified exhibit with the best available scorers/parameters; never null but may be (0,0).
142         * Always throws UnsupportedOperationException in this implementation.
143         */
144        public ScoreAndConf computeCompositeScoreAndConfidence(final Name.ExhibitFull exhibitName,
145                                                               final boolean allowStale)
146            throws IOException
147            { throw new UnsupportedOperationException(); }
148    
149        /**Not available.
150         * Always throws UnsupportedOperationException in this implementation.
151         */
152        public ScoreAndConf getCachedCompositeScoreAndConfidence(final ExhibitFull exhibitName, final boolean allowStale)
153            { throw new UnsupportedOperationException(); }
154    
155    
156        /**Base set of available Scorers' names (no parameters); never null but may be empty.
157         * The values returned are of the form <i>ScorerName</i>.
158         */
159        public Set<String> getBaseScorersWithoutParameters()
160            { return(fixedSimpleScorers.keySet()); }
161    
162        /**Get base non-parameterised Scorer by name; null if no such base Scorer supported.
163         * @param  baseName  base (no parameters) name of Scorer; must not be null
164         */
165        public ScorerIF getBaseScorerByName(final String baseName)
166            { return(fixedSimpleScorers.get(baseName)); }
167    
168        /**Compute exemplar exhibit sub-set to calibrate against; never null but may be empty.
169         * This implementation returns its input calibration data.
170         */
171        public Map<ExhibitShort, ScoreAndConf>extractCalibrationSet(final String basename, final int maxSamples, final Boolean difficult, final boolean allowStale)
172            { return(calibrationData); }
173    
174        /**ScoreAndConfidence for the given scorer itself over all exhibit types; never null but may be (0,0) where the scorer is unknown or untested.
175         * Essentially the result of this should be multiplied by the result for each exhibit
176         * (for the same scorer and parameters)
177         * to normalise the predicted score and confidence for the exhibit.
178         * <p>
179         * Some results will be cached (typically those from the base/current Scorer lists)
180         * while other may have to be computed each time, which may be slow.
181         *
182         * @param allowStale  if true then allow a stale or low-confidence value from cache,
183         *     else throw an exception if nothing is currently available
184         *     and we cannot quickly compute enough points to increase our confidence
185         * @param source  the name of the mechanism used to generate this Scorer value,
186         *     or null if none
187         *
188         * @return  the score represents the correlation with the underlying votes
189         *     (and whatever the scoring is measured against)
190         *     with MAX meaning perfect correlation, 0 meaning no correlation,
191         *     and -MAX meaning perfectly wrong answers all the time,
192         *     and the confidence 0 if we have no (or very/too few) data points
193         *     and approaching MAX as we have a large (enough) number of data points
194         */
195        public ScoreAndConf computeScorerWeighting(final ScorerIF scorer,
196                                                   final boolean allowStale,
197                                                   final String source)
198            throws IOException
199            {
200            if(scorer == null) { throw new IllegalArgumentException(); }
201    
202            // Return what we have in cache, if anything.
203            final ScoreAndConf cachedValue = population.getScorerWeighting(scorer, allowStale);
204            if(cachedValue != null) { return(cachedValue); }
205    
206            final AllExhibitImmutableData aeid = dataSource.getAllExhibitImmutableData(-1);
207            // If there are no live exhibits then we can't compute (nor cache) a result.
208            // This is especially important to avoid cacheing spurious false results at start-up.
209            if(aeid.length == 0) { return(ScoreAndConf.NO_OPINION); }
210    
211            // If there are no calibration exhibits then we cannot compute (nor cache) a result.
212            final Map<Name.ExhibitShort, ScoreAndConf> calibData = calibrationData;
213            if(calibData.isEmpty()) { return(ScoreAndConf.NO_OPINION); }
214    
215            // Compute the calibration result.
216            final Pair<ScoreAndConf, Boolean> weighting = ScorerCreator.computeWeighting(calibData, this, scorer, aeid, allowStale);
217            final ScoreAndConf result = weighting.first;
218    
219            // Atomically store/replace the result in cache if not "partial"/incomplete.
220            // We allow values computed with potentially-stale inputs to be cached
221            // as probably reasonably accurate if other conditions are met.
222            // Note that if the Scorer that we have just generated is amongst the best
223            // then it may be automatically persisted/propagated.
224            if(!weighting.second)
225                {
226                final boolean isBest = population.putScorerWeighting(scorer, result, dataSource, log);
227                if(isBest)
228                    {
229    log.log("[Computed/cached 'best' Scorer from source '"+source+"'; score and confidence "+result+"; "+scorer.getNameAndParameters()+" .]");
230    //if(/* IsDebug.isDebug && */ (source == null)) { (new Throwable("FINDME")).printStackTrace(); /* Find places that we have omitted the source. */ }
231                    }
232                }
233    
234            return(result);
235            }
236        }