001    /*
002    Copyright (c) 1996-2009, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.ai.scorer;
031    
032    import java.io.IOException;
033    import java.io.InterruptedIOException;
034    import java.util.ArrayList;
035    import java.util.Collection;
036    import java.util.Collections;
037    import java.util.Comparator;
038    import java.util.HashMap;
039    import java.util.HashSet;
040    import java.util.Iterator;
041    import java.util.List;
042    import java.util.Map;
043    import java.util.Queue;
044    import java.util.Set;
045    import java.util.concurrent.ConcurrentHashMap;
046    import java.util.concurrent.ConcurrentMap;
047    import java.util.concurrent.atomic.AtomicBoolean;
048    import java.util.concurrent.atomic.AtomicInteger;
049    import java.util.concurrent.atomic.AtomicLong;
050    
051    import org.freehep.math.minuit.FCNBase;
052    import org.freehep.math.minuit.FunctionMinimum;
053    import org.freehep.math.minuit.MnMinimize;
054    import org.freehep.math.minuit.MnStrategy;
055    import org.freehep.math.minuit.MnUserParameters;
056    import org.hd.d.pg2k.svrCore.AllExhibitImmutableData;
057    import org.hd.d.pg2k.svrCore.CoreConsts;
058    import org.hd.d.pg2k.svrCore.GenUtils;
059    import org.hd.d.pg2k.svrCore.MemoryTools;
060    import org.hd.d.pg2k.svrCore.Name;
061    import org.hd.d.pg2k.svrCore.Rnd;
062    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
063    import org.hd.d.pg2k.svrCore.Tuple;
064    import org.hd.d.pg2k.svrCore.datasource.SimpleExhibitPipelineIF;
065    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
066    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
067    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
068    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
069    
070    import ORG.hd.d.IsDebug;
071    
072    
073    /**Creates new Scorer (parameter-set) instances given an existing population.
074     *
075     * @author dhd
076     */
077    public final class ScorerCreator
078        {
079        /**Scorer 'source' for recovering from persistent/shared store; never empty nor null. */
080        public static final String SSOURCE_REC = "REC";
081    
082        /**This method attempts to retrieve a "new" Scorer from the persisted history.
083         * This mechanism is used to recover previous known-good Scorers from local store and/or
084         * share good Scorers between all participating servers.
085         * This makes for faster start-up of new server instances,
086         * a local and global memory of good genotypes,
087         * and sharing of good genotypes between local populations.
088         *
089         * @return true if a Scorer was found that was not in the extant population
090         *     (though may not have been good enough to remain in it)
091         */
092        public static boolean retrieveOldScorer(final SimpleExhibitPipelineIF dataSource,
093                                                final ScorerCacheIF cache,
094                                                final ScorerPopulation population,
095                                                final SimpleLoggerIF log)
096            throws IOException
097            {
098            // Choose between the longest and a less-long period
099            // to try to pick up more variety of persisted Scorer.
100            final EventPeriod intervalSelector = Rnd.fastRnd.nextBoolean() ?
101                    EventPeriod.LONG : EventPeriod.VLONG;
102            // Choose a period from the previous full period to as-far-back-as-possible
103            // (for the given period/interval length)
104            // to improve long-term memory of Scorer values tests
105            // and to increase the chance of finding something rare,
106            // but with the choice heavily skewed to more recent events
107            // as more likely to be "better" or even present at all.
108            final long whichPeriod = intervalSelector.getIntervalNumber(System.currentTimeMillis()) -
109                Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(SystemVariables.EVENT_SAMPLES_RETAINED)));
110            // Select from local (larger volume) and global (broadcast) results.
111            final SimpleVariableDefinition def = Rnd.fastRnd.nextBoolean() ?
112                    SystemVariables.AI_SCORER_STRING_LOCAL_EVENT : SystemVariables.AI_SCORER_STRING_GLOBAL_EVENT;
113            final EventVariableValue[] evvs = dataSource.getEventValues(def, intervalSelector, whichPeriod, null);
114            if(evvs.length == 0) { return(false); }
115            EventVariableValue evv = evvs[0];
116            // If the selected (full, historical) period has no events
117            // then try the current (partial) period instead,
118            // which has the advantage of getting us bang up-to-date.
119            if((evv == null) || (evv.getTotalDistinctValues() == 0))
120                { evv = dataSource.getEventValue(def, intervalSelector, true); }
121            if((evv == null) || (evv.getTotalDistinctValues() == 0)) { return(false); }
122    //        final ArrayList<Object> vals = new ArrayList<Object>(evv.getInfo().keySet());
123    //        if(vals.isEmpty()) { return(false); }
124    //        final String scorerNameAndStringParameters = (String) vals.get(Rnd.goodRnd.nextInt(vals.size()));
125            // Select an entry at random, though weighted more towards items that were "best" more often.
126            final String scorerNameAndStringParameters = (String) evv.getValueByRank(Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(evv.getTotalDistinctValues())));
127            // Check that it is new...
128            if(population.getScorerWeighting(scorerNameAndStringParameters, true) != null) { return(false); }
129            // Evaluate/record/cache it if possible.
130            final ScoreAndConf sac = cache.computeScorerWeighting(scorerNameAndStringParameters, false, SSOURCE_REC);
131            // Check that it is good enough to remain in the population...
132            if(population.getScorerWeighting(scorerNameAndStringParameters, false) != null) { return(false); }
133    if(IsDebug.isDebug) { log.log("[ScorerCreator.retrieveOldScorer(): imported "+scorerNameAndStringParameters+" ScoreAndConfidence: "+sac+".]"); }
134            return(true); // Found (and imported/evaluated) a new Scorer not in our population.
135            }
136    
137        /**Scorer 'source' for the GA combine/mutate method; never empty nor null. */
138        public static final String SSOURCE_GA = "GA";
139    
140        /**Create a new Scorer (parameter set) from existing ones using normal GA techniques.
141         * This uses selection, mutation and recombination to attempt to find
142         * a "better" Scorer version based on those in the existing population.
143         *
144         * @param population  the extant population; never null
145         *
146         * @return true iff we created and evaluated a new Scorer not in the extant population
147         */
148        public static boolean createNewByGA(final ScorerCacheIF cache,
149                                            final ScorerPopulation population,
150                                            final SimpleLoggerIF log)
151            throws IOException
152            {
153            // Pick breeding set to work on.
154            // Allow slightly stale values for speed and robustness.
155            final List<String> breedingSet = pickBreedingSet(cache, population, true);
156            if(breedingSet == null) { return(false); /* No suitable breeding stock... */ }
157    
158            // Randomly select one or more different Scorers to mutate and combine.
159            final Set<String> parents = new HashSet<String>();
160            do
161                {
162                // Randomly select a Scorer, skewed towards "better" Scorers.
163                final String extant = breedingSet.get(
164                        Rnd.goodRnd.nextInt(1 + Rnd.goodRnd.nextInt(breedingSet.size())));
165                // Add this mutant if different to extant parents (eg if no parents so far).
166                parents.add(extant);
167                // Stop if this parent has zero or one parameters,
168                // since multiple parents cannot be usefully "combined" sexually.
169                if(extant.indexOf(AbstractScorer.SEPARATOR) == extant.lastIndexOf(AbstractScorer.SEPARATOR))
170                    { break; }
171                // We try not to draw too many samples from a small breeding set
172                // and we keep low the probability of using many parents.
173                } while((parents.size() < breedingSet.size()/4) && Rnd.goodRnd.nextBoolean());
174            assert(!parents.isEmpty());
175    
176            final ScorerIF firstParent = cache.getScorerInstance(parents.iterator().next());
177    
178            // Now make the new new variant/child by combining the parents if more than one
179            // and mutating the result.
180            // Else if there is exactly one parent then mutate it to get the new child.
181            final ScorerIF newVariant;
182            if(parents.size() == 1)
183                { newVariant = firstParent.createPerturbedVariant(); }
184            else
185                {
186                // Combine the parents' parameters
187                // and mutate the result/offspring.
188                final ArrayList<List<ScorerParam>> pt = new ArrayList<List<ScorerParam>>(parents.size());
189                for(final String snp : parents)
190                    { pt.add(population.getScorerInstance(snp).getParameterDefsAndValues()); }
191                // Shuffle once to get a completely random order.
192                Collections.shuffle(pt, Rnd.goodRnd);
193                // Now, add one token at a time from the first parent,
194                // occasionally shuffling to switch the source of tokens
195                // to be from a different parent.
196                // We stop when the current/first parent is out of tokens.
197                // We force at least one crossover.
198                final int pt0len = pt.get(0).size();
199                final List<ScorerParam> newParams = new ArrayList<ScorerParam>(pt0len);
200                final int forcedCrossoverAfter = (pt0len <= 3) ? 1 : (1+Rnd.goodRnd.nextInt(pt0len-2));
201                // Note that each time, for whatever the current "0" Scorer is,
202                // we test against its length to safely allow for variable parameter numbers.
203                for(int tokenNumber = 0; tokenNumber < pt.get(0).size(); ++tokenNumber)
204                    {
205                    newParams.add(pt.get(0).get(tokenNumber));
206                    // Occasionally switch to another parent.
207                    // We make it quite unlikely to switch/crossover at any one point
208                    // so as to give some notion of locality/clustering.
209                    // FIXME: The frequency/probability of crossover may be very important.
210                    if((forcedCrossoverAfter == tokenNumber) ||
211                       (Rnd.fastRnd.nextBoolean() && Rnd.goodRnd.nextBoolean()))
212                        {
213                        final int ptSize = pt.size();
214                        // Swap the current 0 item (which is the source of "genes")
215                        // randomly with one of the others.
216                        // This is analogous to a DNA crossover during meiosis.
217                        final int swapIndex = (ptSize == 2) ? 1 : 1+Rnd.goodRnd.nextInt(ptSize-1);
218                        final List<ScorerParam> old0 = pt.get(0);
219                        pt.set(0, pt.get(swapIndex));
220                        pt.set(swapIndex, old0);
221                        }
222                    }
223                // Get the un-mutated child.
224                final ScorerIF child = firstParent.createVariant(firstParent.getBaseName(), newParams);
225                final String childSNP = child.getNameAndParameters();
226                // Always mutate the child if identical to one parent
227                // or already exists in the population.
228                newVariant = (!parents.contains(childSNP) && (population.getScorerWeighting(child, true) == null)) ?
229                    child /* Already new. */ :
230                    child.createPerturbedVariant();
231                }
232    
233            // Check that this is new (or at least not rescored recently).
234            if(population.getScorerWeighting(newVariant, true) != null) { return(false); }
235    
236    //if(IsDebug.isDebug) { log.log("[ScorerCreator.createNewByGA(): trying new variant: " + newVariant + " of " + (new ArrayList<String>(parents)) + "...]"); }
237    
238            // Evaluate new variant, automatically recording/sharing if good.
239            /* final ScoreAndConf sac = */ cache.computeScorerWeighting(newVariant, false, SSOURCE_GA);
240            // Check that it is good enough to remain in the population...
241            if(population.getScorerWeighting(newVariant, false) != null) { return(false); }
242    //if(IsDebug.isDebug) { log.log("[ScorerCreator.createNewByGA(): ...variant ScoreAndConfidence: " + sac + ".]"); }
243            return(true); // Successfully created and evaluated a new Scorer.
244            }
245    
246    
247        /**Create a variant Scorer given an extant same-type Scorer and the array of values of the variable parameters; never null.
248         */
249        private static ScorerIF constructModifiedSNP(final double[] par, final ScorerIF initialScorer, final List<ScorerParam> paramDefsAndValuesOriginal, final int[] mnIndexToParamIndex)
250            {
251            if((par == null) || (initialScorer == null) ||
252               (paramDefsAndValuesOriginal == null) || (mnIndexToParamIndex == null))
253                { throw new IllegalArgumentException(); }
254            // Copy the original parameter values,
255            // overwriting the changed variable parameters under control of the minimiser.
256            final List<ScorerParam> params = new ArrayList<ScorerParam>(paramDefsAndValuesOriginal);
257            for(int i = par.length; --i >= 0; )
258                {
259                final int origParamIndex = mnIndexToParamIndex[i];
260                final ScorerParam p = paramDefsAndValuesOriginal.get(origParamIndex);
261                if(p instanceof ScorerParamInteger)
262                    {
263                    final ScorerParamInteger pi = (ScorerParamInteger) p;
264                    final int newValue = Math.min(pi.max, Math.max(pi.min, (int) Math.round(par[i])));
265                    // Replace the parameter in situ if the value has changed from the original.
266                    if(pi.value != newValue)
267                        { params.set(origParamIndex, ScorerParamInteger.createScorerParamInteger(pi.min, pi.def, pi.max, pi.delta, pi.biasedLow, pi.name, newValue)); }
268                    continue;
269                    }
270                assert(false); // Unexpected parameter type has been varied.
271                }
272    
273            // Now create the full Scorer name with parameters.
274            return(initialScorer.createVariant(initialScorer.getBaseName(), params));
275            }
276    
277    //    /**If true then a minimiser run that finds a new/'best' Scorer is allowed to run to completion.
278    //     * This may make worker threads slow to die, but they should be daemons and interruptable anyway.
279    //     */
280    //    private static final boolean ALLOW_MIN_RUN_TO_COMPLETION_AFTER_BEST = true;
281    
282        /**Scorer 'source' for the "minimiser" method; never empty nor null. */
283        public static final String SSOURCE_MIN = "MIN";
284    
285        /**Create a new Scorer (parameter set) from existing ones using a multi-variable "minimiser".
286         * This selects a "good" Scorer and then attempts to find a better/optimal one
287         * derived from it using mathematical error-minimisation techniques.
288         * <p>
289         * For the moment this only attempts to adjust the continuously-variable properties
290         * (ie the integer parameter values) and leaves the enumeration parameters alone.
291         *
292         * @param population  the extant population; never null
293         * @param calibSubset  if non-null, should be non-empty and is a calibration set
294         *     to optimise/minimise against
295         * @param endTime  target end time to stop computations at
296         *
297         * @return true iff we created and evaluated a new Scorer not in the extant population
298         */
299        public static boolean createNewByOpt(final SimpleExhibitPipelineIF dataSource,
300                                             final ScorerCacheIF cache,
301                                             final ScorerPopulation population,
302                                             final Map<Name.ExhibitShort, ScoreAndConf> calibSubset,
303                                             final SimpleLoggerIF log,
304                                             final long endTime)
305            throws IOException
306            {
307            if((dataSource == null) ||
308               (cache == null) ||
309               (population == null) ||
310               (log == null))
311                { throw new IllegalArgumentException(); }
312    
313            final long startTime = System.currentTimeMillis();
314            // We'll extend our finish time if we made progress during the original time slice.
315            final long extendedEndTime = endTime + Math.max(100, 2*(endTime-startTime));
316    
317            // Pick breeding set to work on.
318            // Allow slightly stale values for speed and robustness.
319            final List<String> breedingSet = pickBreedingSet(cache, population, true);
320            if(breedingSet == null) { return(false); /* No suitable breeding stock... */ }
321    
322            // Are we optimising for error against a small calibration subset?
323            final boolean useCalibSubset = (calibSubset != null) && !calibSubset.isEmpty();
324            // This value is guaranteed to be non-null and non-empty if useCalibSubset is true.
325            final Map<Name.ExhibitShort, ScoreAndConf> calib = (!useCalibSubset) ? null : calibSubset;
326            assert((!useCalibSubset) || ((calib != null) && (!calib.isEmpty()))) : "calib must be null or non-empty";
327    
328            final AllExhibitImmutableData aeid = dataSource.getAllExhibitImmutableData(-1);
329    
330            // Some of the time, fix some of the otherwise-variable parameters.
331            // we do this to allow the minimiser to focus its fire,
332            // have less chance of trying to adjust interacting parameters, etc.
333            // Only effective when the Scorer has 2 or more (continuously variable) parameters.
334            final boolean fixSomeParameters = Rnd.fastRnd.nextBoolean();
335    
336            // If we're fixing some parameters then we can optionally fix all those
337            // at or very close to their end stops.
338            final boolean fixAllExtremeParameters = fixSomeParameters && Rnd.goodRnd.nextBoolean();
339    
340            // Some of the time we perturb the initial Scorer we select from the breeding set.
341            // We do this to try to move out of an existing local minimum,
342            // especially as some perturbations may be quite large.
343            final boolean perturbInitialScorer = Rnd.goodRnd.nextBoolean();
344    
345            // Randomly select an extant Scorer,
346            // with selection skewed towards "better" Scorers from the available breeding set.
347            final String extant0 = breedingSet.get(
348                Rnd.goodRnd.nextInt(1 + Rnd.goodRnd.nextInt(breedingSet.size())));
349            // Create the Scorer instance with its parameters parsed.
350            final ScorerIF initialScorer0 = cache.getScorerInstance(extant0);
351            assert(initialScorer0 != null) : "must be able to recreate Scorer instance from breeding set SNP: "+extant0; // Must be able to create Scorer from value in extant population.
352            // Note the initial score/goodness.
353            final ScoreAndConf sacExtant0 = cache.computeScorerWeighting(extant0, true, null);
354            // Some of the time, perturb the parameters before we start.
355            // This may help get us out of any local minimum, ie a rut...
356            final String extant;
357            final ScorerIF initialScorer;
358            final ScoreAndConf sacExtant;
359            if(perturbInitialScorer)
360                {
361                // Use perturbed version of extant Scorer.
362                initialScorer = initialScorer0.createPerturbedVariant();
363                extant = initialScorer0.getNameAndParameters();
364    //            extant = initialScorer0.getPerturbedNameAndParameters();
365    //            initialScorer = cache.getScorerInstance(extant);
366                assert(initialScorer != null) : "must be able to recreate Scorer instance from perturbed SNP: "+extant; // Must be able to create instance for this Scorer variation.
367                sacExtant = cache.computeScorerWeighting(extant, true, SSOURCE_GA); // Simple mutation...
368                }
369            else
370                {
371                // Use selected extant Scorer as-is.
372                initialScorer = initialScorer0;
373                extant = extant0;
374                sacExtant = sacExtant0;
375                }
376            // Compute the goodness of our initial/starting Scorer.
377            final int goodnessExtant = ScoreAndConf.computeScorerGoodness(sacExtant);
378    //if(IsDebug.isDebug) { System.out.println("[ScorerCreator.createNewByOpt(): working on "+extant+"...]"); }
379    
380            // Ensure fast in-order and random access to the original parameter definitions.
381            // This assumes that all parameters have unique names.
382            // This must not be altered.
383            final List<ScorerParam> paramDefsAndValuesOriginal = Collections.unmodifiableList(new ArrayList<ScorerParam>(initialScorer.getParameterDefsAndValues()));
384            final int nParams = paramDefsAndValuesOriginal.size();
385    
386            // Map from from MnUserParameters params index to paramDefsAndValuesOriginal position.
387            // This makes it easy to simply take a copy of the original
388            // and then overwrite controlled parameter values in place.
389            // We allow this to be over-long...
390            final int mnIndexToParamIndex[] = new int[nParams];
391    
392    //        // We precompute the expanded versions of all the rest
393    //        // to make it relatively quick to assemble a modified parameter set.
394    //        final String fixedParams[] = new String[nParams];
395    
396            // Create the parameter set,
397            // including only those parameter types with reasonably continuous values,
398            // ie including integer parameters and excluding enumeration parameters.
399            final MnUserParameters params = new MnUserParameters();
400            int mnParamNum = 0;
401            for(int i = 0; i < nParams; ++i)
402                {
403                final ScorerParam p = paramDefsAndValuesOriginal.get(i);
404                assert(p != null) : "parameter cannot be null in paramDefsAndValuesOriginal from "+initialScorer;
405                final String name = p.getName();
406    
407                // Allow the minimiser to vary integer-valued parameters.
408                // If we're fixing extreme parameters (which may hinder the minimiser)
409                // then we simply don't vary any parameter within a delta of its limits.
410                if(p instanceof ScorerParamInteger)
411                    {
412                    final ScorerParamInteger pi = (ScorerParamInteger) p;
413                    // Create this as a variable parameter, if:
414                    //  * not fixing extreme-valued parameters, OR
415                    //  * not within a delta of the extremes (all but AT the extremes for delta==1).
416                    if((!fixAllExtremeParameters) ||
417                        ((pi.value - pi.min >= pi.delta) && (pi.max - pi.value >= pi.delta)))
418                        {
419                        // We set the error tolerance similar the "delta" value
420                        // since it is assumed to represent a "small" non-critical change.
421                        // We use a minimum error value of 1 to reflect the int granularity.
422                        params.add(name, pi.value, (pi.delta < 2) ? 1 : pi.delta, pi.min, pi.max);
423                        mnIndexToParamIndex[mnParamNum++] = i; // Provide mapping.
424                        continue;
425                        }
426                    }
427    
428    //            // For a parameter that cannot be varied (reasonably) smoothly,
429    //            // precompute its text representation for speed later.
430    //            fixedParams[i] = p.toNameValueString();
431                }
432    
433            // If we are fixing some otherwise-variable parameters, we do it here.
434            // We do this only if there is more than one variable parameter.
435            final int variableParamCount = mnParamNum;
436            assert(params.variableParameters() == variableParamCount) : "params.variableParameters() == variableParamCount";
437            if(fixSomeParameters && (variableParamCount > 1))
438                {
439                // We fix at least one of, but never all of, the parameters.
440                // We skew towards leaving few parameters variable.
441                final int parametersToFix = variableParamCount - 1 -
442                    Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(variableParamCount-1));
443                assert((parametersToFix > 0) && (parametersToFix < variableParamCount)) : "must not fix all params nor none";
444    //if(IsDebug.isDebug) { log.log("[Fixing "+parametersToFix+" parameter(s) out of "+variableParamCount+"...]"); }
445                // Collection of indices of parameters to fix.
446                final List<Integer> toFix = new ArrayList<Integer>(variableParamCount);
447                for(int i = variableParamCount; --i >= 0; ) { toFix.add(i); }
448                Collections.shuffle(toFix, Rnd.goodRnd);
449                for(int i = parametersToFix; --i >= 0; )
450                    { params.fix(toFix.get(i)); }
451                assert(params.variableParameters() == variableParamCount - parametersToFix) : "params.variableParameters() == variableParamCount - parametersToFix";
452                }
453    //if(IsDebug.isDebug) { log.log("[Minimiser: varying parameters "+params.variableParameters()+" out of "+nParams+"...]"); }
454    
455            // Local error-signalling class
456            // (to wrap a legit IOException and propagate up through the minimiser).
457            @SuppressWarnings("serial")
458            class WrappedIOException extends RuntimeException
459                { WrappedIOException(final IOException cause) { super(cause); } }
460    
461            // Set true if we find a new Scorer.
462            final AtomicBoolean foundNewScorer = new AtomicBoolean();
463    
464            // Set true if we find a Scorer better than our initial one.
465            final AtomicBoolean foundBetterNewScorer = new AtomicBoolean();
466    //
467    //        // Set true if we found the best-so-far Scorer at least once during this run.
468    //        final AtomicBoolean foundBest = new AtomicBoolean();
469    
470            // Maximum error that we can generate in the evaluation functor.
471            final int maxERR = ScoreAndConf.MAX_GOODNESS;
472            assert(maxERR > 0) : "maxERR must be strictly positive as an int";
473    
474            // Wrap our Scorer-evaluation routine as a standard functor.
475            // We remember all values so far computed on this evaluation
476            // for speed and consistency.
477            // We are prepared to abort the calculation once:
478            //  * we have passed our target end time, and,
479            //  * we have found a new Scorer parameter set better than our starting set.
480            // Better means BOTH higher "goodness" AND higher correlation.
481            final FCNBase fcn = (new FCNBase() {
482                /**Map of values so-far requested/computed (thread-safe). */
483                private final ConcurrentMap<String,Integer> answers = new ConcurrentHashMap<String, Integer>();
484                /**Return the "error" of the Scorer, to be minimised. */
485                public double valueOf(final double[] par)
486                    {
487                    final ScorerIF toEvalSc = constructModifiedSNP(par, initialScorer, paramDefsAndValuesOriginal, mnIndexToParamIndex);
488                    final String toEvalS = toEvalSc.getNameAndParameters();
489                    // After rounding we may find that we have a previous answer to hand
490                    // if so, use it for consistency and speed.
491                    final Integer cachedAnswer = answers.get(toEvalS);
492                    if(cachedAnswer != null) { return(cachedAnswer); }
493                    // Find out if this Scorer is novel/new.
494                    final boolean isNew = (population.getScorerWeighting(toEvalSc, true) == null);
495                    try
496                        {
497                        int goodness = Integer.MIN_VALUE;
498                        // We can stop if we've found a new Scorer better than we started with.
499                        // This helps limit the execution time while still ensuring progress.
500                        if((!useCalibSubset) || isNew)
501                            {
502                            // Allow use of stale/cached values here for speed and robustness
503                            // with the risk that an inconsistent mix of stale and up-to-date values
504                            // may make finding the minimum error difficult or impossible at times.
505                            final ScoreAndConf sac = cache.computeScorerWeighting(toEvalSc, true, SSOURCE_MIN);
506                            goodness = ScoreAndConf.computeScorerGoodness(sac);
507    
508                            // Check that new Scorer is good enough to remain in the general population.
509                            if(isNew && (population.getScorerWeighting(toEvalSc, false) != null))
510                                {
511                                // Note if we generated a useful novel Scorer
512                                // (ie good enough to have the overall routine return true).
513                                foundNewScorer.set(true);
514                                // If our new Scorer is better than we started with then this is progress.
515                                // We have a strict notion of "better" which is both the "goodness" measure
516                                // AND the underlying accuracy (which is more than we are minimising for).
517                                if((goodness > goodnessExtant) && (sac.score > sacExtant.score))
518                                    { foundBetterNewScorer.set(true); }
519                                }
520                            }
521    
522                        // Check if we need to abort the minimiser because we've run out of time.
523                        // If we haven't found anything better than we started with and are now out of time, abort.
524                        if(System.currentTimeMillis() > (foundBetterNewScorer.get() ? extendedEndTime : endTime))
525                            {
526    //if(IsDebug.isDebug) { System.out.println("[ScorerCreator.createNewByOpt(): aborting minimiser having run out of time.]"); }
527                            throw new InterruptedIOException("aborting minimiser: out of time");
528                            }
529                        // If this thread has been interrupted, abort immediately.
530                        if(Thread.interrupted())
531                            { throw new InterruptedIOException("thread interrupted"); }
532    
533                        // If using the calibration set then use its measure of goodness instead for the error.
534                        // Convert score*confidence into (non-positive) error value (to be minimised).
535                        // The better the score, the lower the error.
536                        // MAX^2 s.c => 0 error; -MAX^2 s.c => maxERR +ve error.
537                        // We make sure that we use the same metric that ScorerPopulation does.
538                        final int error = maxERR - (((!useCalibSubset) ? goodness :
539                            ScoreAndConf.computeScorerGoodness(ScorerCreator.computeWeighting(calib, cache, toEvalSc, aeid, true).first)));
540    //if(IsDebug.isDebug) { System.out.println("[ScorerCreator.createNewByOpt(): error="+(error)+" having evaluated "+toEval+"...]"); }
541                        assert((error >= 0) && (error <= 2*maxERR)) : ("Error expected to be in range [0,"+(2*maxERR)+"] but was "+error) ;
542                        answers.putIfAbsent(toEvalS, error); // Cache this answer.
543                        return(error);
544                        }
545                    catch(final IOException e)
546                        {
547                        // Wrap the original exception and rethrow (as our unchecked exception)
548                        // which will abort the minimisation relatively gracefully.
549                        throw new WrappedIOException(e);
550                        }
551                    }
552                });
553    
554            // Select the general strategy.
555            // We select the strategy randomly to maximise our chance of success.
556            // Skew strategy selection heavily towards "fast" (0) rather than "safe" (2),
557            // since this will be a rather hard function to handle anyway.
558            final MnStrategy strategy = new MnStrategy(Rnd.fastRnd.nextInt(1+Rnd.fastRnd.nextInt(1+Rnd.fastRnd.nextInt(3))));
559    
560            // Create the minimiser instance...
561            final MnMinimize minimiser = new MnMinimize(fcn, params, strategy.strategy());
562            // Warn the minimiser that we don't in practice have much intermediate calculation precision.
563            minimiser.setPrecision(1.5/ScoreAndConf.MAX); // Approx limit of precision in score, for example.
564            // Limit the number of function calls that can be made (as they may be very slow)
565            // and we may not have space to retain all the useful intermediate results.
566            // Allow more calls with more parameters.
567            final int maxFnCalls = Math.min(1024 * nParams, population.maxSize()/2); // Crude limits...
568            // Specify a tolerance >> than the integer granularity (1) of the error value.
569            final int resultTolerance = maxERR/100; // Getting within 1% of optimum is just fine!
570            // ...and attempt to find the minimum.
571            // Note that we don't mind too much if the minimiser doesn't think that it finished,
572            // since it may have gotten us some way in the right direction anyway...
573    //        final long minimiserStartTime = System.currentTimeMillis();
574            try
575                {
576                final FunctionMinimum minimum = minimiser.minimize(maxFnCalls, resultTolerance);
577                assert(minimum != null) : "function minimum should be non-null";
578                // Don't try to use the function minimum at all if invalid.
579                if(!minimum.isValid()) { return(foundNewScorer.get()); }
580    
581                final double par[] = minimiser.params();
582                assert(par != null) : "minimiser.params() should not be null";
583                assert(fixSomeParameters || (par.length == minimum.userParameters().variableParameters())) : "unless fixing parameters, par.length should == minimum.userParameters().variableParameters()";
584                // This is the reconstructed "best" Scorer found so far.
585                final ScorerIF newVariant = constructModifiedSNP(par, initialScorer, paramDefsAndValuesOriginal, mnIndexToParamIndex);
586    
587                // Check that this Scorer name-and-parameters value is new.
588                // Note that evaluating the scorer internally may already have cached a result,
589                // so we regard finding a new value as successful if the minimisation succeeded,
590                // or if we had made some progress en passant...
591                if(population.getScorerWeighting(newVariant, true) != null)
592                    {
593                    return(foundNewScorer.get() ||
594                            (minimum.isValid() && !newVariant.getParameterDefsAndValues().equals(extant) &&
595                                                  !newVariant.getParameterDefsAndValues().equals(extant0)));
596                    }
597    
598                // Evaluate new variant, automatically recording/sharing if good.
599                // Force calculation using fully up-to-date values.
600                /* final ScoreAndConf sac = */ cache.computeScorerWeighting(newVariant, false, SSOURCE_MIN);
601                return(true); // Successfully created and evaluated new creature.
602                }
603            catch(final IOException e) { throw e; /* Propagate I/O problem as-is. */ }
604            catch(final WrappedIOException e)
605                {
606                if(foundNewScorer.get()) { return(true); /* Made progress anyway. */ }
607                // Didn't make progress, so rethrow the exception.
608                if(e.getCause() == null) { throw new IOException("WrappedIOException had null cause"); }
609                throw (IOException) e.getCause(); /* Unwrap and rethrow underlying IOException. */
610                }
611    //        finally
612    //            {
613    //            if(foundGoodEnoughNewScorer.get())
614    //                {
615    //                final long runTime = System.currentTimeMillis() - startTime;
616    ///* if(IsDebug.isDebug) */ { log.log("[Minimiser: made progress in "+(runTime)+"ms: varied parameters "+params.variableParameters()+" out of "+nParams+"...]"); }
617    //                }
618    //            }
619            }
620    
621    
622        /**Class to encapsulate all background and evolution work for a given ScorerCache.
623         * The doChunk() entry point is designed to be thread-safe and highly threadable,
624         * ie can be called from multiple worker threads at once.
625         */
626        public static final class ScorerWork
627            {
628            /**Create instance.
629             * @param scorerCache  cache of Scorers to work with; never null
630             * @param log  logger; never null
631             * @param allowScorerSharingByEvent  if true,
632             *     then we try to retrieve persisted/shared Scorers
633             *     from the system variables event mechanism
634             */
635            public ScorerWork(final AbstractScorerCache scorerCache,
636                              final SimpleLoggerIF log,
637                              final boolean allowScorerSharingByEvent)
638                {
639                if((scorerCache == null) || (log == null))
640                    { throw new IllegalArgumentException(); }
641                this.scorerCache = scorerCache;
642                this.log = log;
643                this.allowScorerSharingByEvent = allowScorerSharingByEvent;
644                }
645    
646            /**Underlying Scorer cache; never null. */
647            private final AbstractScorerCache scorerCache;
648    
649            /**Logger; never null. */
650            private final SimpleLoggerIF log;
651    
652            /**If true, then we try to retrieve persisted/shared Scorers from the system variables event mechanism. */
653            private final boolean allowScorerSharingByEvent;
654    
655            /**Excess time spent running slow evolution methods (ns).
656             * In order to attempt to balance the amount of time
657             * spent on different methods of generating new Scorers,
658             * given that they can be orders of magnitude apart in execution time,
659             * we attempt to balance the total run-time they take.
660             * <p>
661             * This is increased by the execution time of each "slow" instance
662             * and decreased by the execution time of each "fast" instance.
663             * Slow methods may only start when it is negative, fast ones otherwise.
664             * <p>
665             * Run-time for "failed" executions may or may not be included.
666             * <p>
667             * This starts at zero, and the effect of the "fair share" scheduling that this controls
668             * should be to keep it as close to zero (on either side of zero) as possible.
669             * <p>
670             * This is thread-safe.
671             */
672            private final AtomicLong excessSlowTimeNs = new AtomicLong();
673    
674            /**Run one chunk of work.
675             * Note that if the minTime or minEndTime values are not more than 1ms beyond 'now'
676             * then this avoids computations that are likely to require especially large
677             * amounts of CPU time/effort.
678             *
679             * @param special  if true, do extra background work and normal work
680             * @param inboundEval  if non-null then is queue of externally-supplied Scorers
681             *     to evaluate and insert into the population if appropriate;
682             *     these are regarded as potentially untrusted so are validated and canonicalised before use
683             * @param endTime  target end time for this chunk of work to stop by
684             * @param minEndTime  time before which we should not stop (ie target minimum quantum)
685             */
686            public final void doChunk(final long endTime, final long minEndTime,
687                                      final boolean special,
688                                      final Queue<String> inboundEval)
689                {
690                final ScorerPopulation population = scorerCache.getPopulation();
691    
692                // We do some extra housekeeping work first in "special" threads
693                // or when we need to bootstrap.
694                if(special || (population.size() == 0))
695                    {
696    if(IsDebug.isDebug && (population.size() == 0)) { System.out.println("ScorerCreator.doChunk(): bootstrapping empty population"); }
697                    // Make sure that we get base and best Scorers up to date and keep them so.
698                    // This will also ensure that all sorts of supporting data is kept fresh too.
699                    final ArrayList<String> shuffledBaseNames = new ArrayList<String>(scorerCache.getBaseScorersWithoutParameters());
700                    // Getting these 'best' values should not cause any exceptions or delay.
701                    // Re-evaluating these should ensure that the best deserve to remain so,
702                    // else incrementally push them out of the way to allow replacements.
703                    shuffledBaseNames.addAll(scorerCache.getCurrentScorersWithParameters(Rnd.fastRnd.nextBoolean()));
704                    // Shuffle the Scorers to give a fair/equal crack of the whip to all-comers.
705                    Collections.shuffle(shuffledBaseNames, Rnd.fastRnd);
706                    for(final String scorerName : shuffledBaseNames)
707                        {
708    if(IsDebug.isDebug && (population.size() == 0)) { System.out.println("ScorerCreator.doChunk(): bootstrapping empty population with: "+scorerName); }
709                        // Attempt to bring the selected Scorer weighting fully up-to-date
710                        // if not already so.
711                        try { scorerCache.computeScorerWeighting(scorerName, Rnd.fastRnd.nextBoolean(), null); }
712                        catch(final IOException e) { /* Ignore probably-transient I/O problem. */ }
713                        catch(final Exception e)
714                            {
715                            // Stop on unexpected error.
716                            e.printStackTrace();
717                            System.err.println("Problem with computeScorerWeighting("+scorerName+", false): " + e.getMessage());
718                            }
719                        // Stop and exit this run immediately if we've exhausted our time chunk...
720                        // This forces us to get the basics in place and up-to-date first.
721                        if(System.currentTimeMillis() > endTime) { return; }
722                        }
723                    }
724    
725                // If there is still an empty population then stop this work chunk immediately
726                // since there is nothing to 'evolve' from,
727                // unless we have inbound queued Scorers
728                // or we are permitted to import Scorers from system events.
729                if((population.size() == 0) && (!allowScorerSharingByEvent) &&
730                        ((inboundEval == null) || inboundEval.isEmpty()))
731                    { return; }
732    
733                // Because the optimiser/minimiser method is possibly hundreds of times slower
734                // than the normal GA (select/combine/mutate) method,
735                // we attempt to force some type of fair-share of CPU time post-hoc.
736                // By selecting outside the loop "fast"/"slow",
737                // we reduce the chance of accidentally starting lots of slow methods at once.
738                // We avoid slow methods where our available time quantum seems very small
739                // at least in relation to our ability to measure time accurately.
740                // TODO: We might also avoid slow methods in power-conserving mode,
741                // since otherwise we may uninterruptibly need a large chunk of energy.
742                final boolean doSlowMethods = (excessSlowTimeNs.get() < 0) &&
743                    (minEndTime - System.currentTimeMillis() > 1) &&
744                    (endTime - System.currentTimeMillis() > 1);
745                // If using slow methods (eg minimisation)
746                // then some of the time optimise/minimise against a small calibration subset for speed.
747                // If we encounter any problem trying to extract the calibration set, we do without.
748                final boolean useCalibSubset = doSlowMethods && Rnd.fastRnd.nextBoolean();
749                Map<Name.ExhibitShort, ScoreAndConf> calibSubset = null;
750                if(useCalibSubset)
751                    {
752                    // Try to generate a generic (and large-ish if possible) calibration set
753                    // (that reflects strengths/weaknesses amongst all available Scorers)
754                    // since we don't know at this point which Scorer we will be tuning.
755                    try { calibSubset = scorerCache.extractCalibrationSet(null, ScorerCreator.SUGGESTED_CALIB_SET_SIZE, null, true); }
756                    catch(final IOException e) { /* Ignore error and do without calibration subset. */ }
757                    }
758    
759                // Try to create several new/good Scorers values
760                // or to import several good saved Scorers from local or global state.
761                // Do extra rounds if we've nowhere-near filled up our population/gene pool yet
762                // and/or if computation is going very quickly.
763                final int maxRounds = Math.min(1+SystemVariables.EVENT_SAMPLES_RETAINED,
764                        Math.max(3, (population.maxSize() / Math.max(1, population.size()))));
765                int roundsDone = 0; // Decremented on successful creation/import of new variant.
766                try
767                    {
768                    // Continue until we've made made visible progress
769                    // or at least until we've being trying for a reasonable time,
770                    // but stop as soon as we pass our target end time or we are interrupted.
771                    while(((roundsDone < maxRounds) || (System.currentTimeMillis() <= minEndTime)) &&
772                            (System.currentTimeMillis() <= endTime) && (!Thread.interrupted()))
773                        {
774                        // Attempt to create new Scorer variations
775                        // or import extant Scorers from local/global event histories or outside.
776                        // In each case, these routines should only return true
777                        // if fairly sure that they have made useful progress.
778                        if(Rnd.fastRnd.nextBoolean()) // Try to 'import' half the time...
779                            {
780                            if(allowScorerSharingByEvent && Rnd.fastRnd.nextBoolean())
781                                {
782                                // Try to fetch a new top Scorer from another population
783                                // or find a "golden oldie" from local/global history.
784                                // Fast.
785                                if(ScorerCreator.retrieveOldScorer(scorerCache.getDataSource(), scorerCache, population, log))
786                                    { ++roundsDone; }
787                                }
788                            else if(inboundEval != null)
789                                {
790                                final String next = inboundEval.poll(); // Non-blocking read.
791                                // Count this as successful 'round'
792                                // if there is a new inbound Scorer whose canonicalised form is new
793                                // and yields a positive goodness.
794                                if(next != null)
795                                    {
796                                    try
797                                        {
798                                        final ScorerIF sc = scorerCache.getScorerInstance(next);
799                                        if(sc != null)
800                                            {
801                                            final String canon = AbstractScorer.canonicalise(sc);
802                                            if(population.getScorerWeighting(canon, true) == null)
803                                                {
804                                                final ScoreAndConf newSAC = scorerCache.computeScorerWeighting(canon, false, "inbound");
805                                                if(ScoreAndConf.computeScorerGoodness(newSAC) > 0)
806                                                    { ++roundsDone; }
807                                                }
808                                            }
809                                        }
810                                    catch(final Exception e)
811                                        { /* Ignore errors from potentially-untrusted inbound Scorer values. */ }
812                                    }
813                                }
814                            }
815                        else // Try to 'create' half the time.
816                            {
817                            final long startTime = System.nanoTime();
818                            if(!doSlowMethods)
819                                {
820                                try
821                                    {
822                                    // Incremental (quick) creation of new Scorer by GA methods:
823                                    // selection, combination, mutation.
824                                    // Fast.
825                                    if(ScorerCreator.createNewByGA(scorerCache, population, log))
826                                        { ++roundsDone; }
827                                    }
828                                finally { excessSlowTimeNs.addAndGet(startTime - System.nanoTime()); /* Subtract run time. */ }
829                                }
830                            else
831                                {
832                                try
833                                    {
834                                    // Potentially slow minimiser-based method, seeking "optimum" parameters.
835                                    // Can probably execute only once on each run.
836                                    // Each run may generate many novel Scorer values, however.
837                                    if(ScorerCreator.createNewByOpt(scorerCache.getDataSource(), scorerCache, population, calibSubset, log, endTime))
838                                        { ++roundsDone; }
839                                    }
840                                finally { excessSlowTimeNs.addAndGet(System.nanoTime() - startTime); /* Add run time. */ }
841                                }
842                            }
843    
844                        // If memory seems to be under huge strain
845                        // or the system is conserving power
846                        // then abort once we have apparently made some progress
847                        // (to avoid system/memory stress and starvation to no end).
848                        if((roundsDone > 0) &&
849                                (MemoryTools.isMemoryStressed() || GenUtils.mustConservePower()))
850                            { return; }
851                        }
852                    }
853                catch(final IOException e) { /* Stop this run on probably-transient I/O problem. */ }
854                catch(final Throwable t) { t.printStackTrace(); /* Stop this run on serious error. */ }
855                }
856            }
857    
858    
859        /**Select at random a best-first breeding set of one Scorer base type; null if none available.
860         * Returns the non-empty breeding set of a parameterised Scorer selected at random,
861         * or null if no such breeding set is available.
862         * <p>
863         * We weight this more heavily towards breeding sets whose best constituent has a high "goodness".
864         *
865         * @param population  the extant population; never null
866         *
867         * @return null or in-order (best-first) non-empty breeding set of same Scorer type
868         */
869        private static List<String> pickBreedingSet(final ScorerCacheIF cache,
870                                                    final ScorerPopulation population,
871                                                    final boolean allowStale)
872            {
873            // Select at random one of the top or base Scorer names,
874            // though avoiding any that have no parameters or an empty breeding set.
875            // We ensure that the base Scorers are included to allow bootstrapping.
876            // If we have more entries for the bigger populations then than is a useful skew.
877            final List<String> topScorers = new ArrayList<String>(population.getCurrentScorersWithParameters(allowStale));
878            topScorers.addAll(cache.getBaseScorersWithoutParameters());
879    
880            // Remove all clearly-ineligible Scorers,
881            // ie those without parameters.
882            for(final Iterator<String> it = topScorers.iterator(); it.hasNext(); )
883                {
884                final ScorerIF sc = cache.getScorerInstance(it.next());
885                final Collection<ScorerParam> parameterDefs = sc.getParameterDefsAndValues();
886                if(parameterDefs.isEmpty()) { it.remove(); } // No variation possible.
887                }
888            // Return null if no candidates left.
889            final int tsSize = topScorers.size();
890            if(tsSize == 0) { return(null); }
891    
892            // Sort remainder by (decreasing) goodness, ie best-first.
893            Collections.sort(topScorers, new Comparator<String>() {
894                public int compare(final String o1, final String o2)
895                    {
896                    final ScoreAndConf sw1 = population.getScorerWeighting(o1, allowStale);
897                    final int g1 = (sw1 != null) ? ScoreAndConf.computeScorerGoodness(sw1) : Integer.MIN_VALUE;
898                    final ScoreAndConf sw2 = population.getScorerWeighting(o2, allowStale);
899                    final int g2 = (sw2 != null) ? ScoreAndConf.computeScorerGoodness(sw2) : Integer.MIN_VALUE;
900                    if(g1 < g2) { return(-1); }
901                    if(g1 > g2) { return(+1); }
902                    return(0); // Identical.
903                    }
904                });
905    
906            // Try to use breeding set skewed towards earlier (better) values.
907            // If our first choice is not available, work down the list (to less-good Scorers).
908            final int startIndex = Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(tsSize));
909            for(int i = startIndex; i < tsSize; ++i)
910                {
911                final ScorerIF sc = cache.getScorerInstance(topScorers.get(i));
912    //          final Collection<ScorerParam> parameterDefs = sc.getParameterDefsAndValues();
913    //          if(parameterDefs.isEmpty()) { continue; } // No variation possible.
914                final List<String> bs = population.getBreedingSet(sc.getBaseName());
915                if(!bs.isEmpty()) { return(bs); }
916                }
917    
918            // Fallback...
919            // If we could not get a breeding set with our preferred skewed weighting
920            // then shuffle the names randomly and return the first suitable entry.
921            Collections.shuffle(topScorers, Rnd.fastRnd);
922            for(final String snp : topScorers)
923                {
924                final ScorerIF sc = cache.getScorerInstance(snp);
925    //            final Collection<ScorerParam> parameterDefs = sc.getParameterDefsAndValues();
926    //            if(parameterDefs.isEmpty()) { continue; } // No variation possible.
927                final List<String> bs = population.getBreedingSet(sc.getBaseName());
928                if(!bs.isEmpty()) { return(bs); }
929                }
930    
931            // There really was no suitable breeding sub-population at all.
932            return(null);
933            }
934    
935        /**If true then monitor and report performance (CPU usage) in computeWeighting().
936         * Turn this on for (Scorer) performance tuning only...
937         * <p>
938         * This should give a reasonable view of the most compute-intensive Scorer workload.
939         */
940        private static final boolean PERF_SAMPLE_CW = false;
941    
942        /**Cumulative performance stats (if any) for computeWeighting(); null iff none being collected. */
943        private static final ConcurrentHashMap<StackTraceElement, AtomicInteger> perfCountsCW = (!PERF_SAMPLE_CW) ? null : new ConcurrentHashMap<StackTraceElement, AtomicInteger>(1001);
944    
945        /**Computes ScoreAndConf over the supplied calibration exhibits using the supplied Scorer cache; never null but may be (0,0) where the scorer is unknown or untested.
946         * This can be used as part of the computation of a Scorer's weighting.
947         * The input data must not change while this routine is running.
948         *
949         * @param calibrationData  map from short exhibit names to external calibration data
950         *     (eg actual user vote-based score); never null
951         *
952         * @return  the score (first in the Pair) represents the correlation with the underlying votes
953         *     (and whatever the scoring is measured against)
954         *     with MAX meaning perfect correlation, 0 meaning no correlation,
955         *     and -MAX meaning perfectly wrong answers all the time,
956         *     and the confidence 0 if we have no (or very/too few) data points
957         *     and approaching MAX as we have a large (enough) number of data points;
958         *     the second element of the Pair is true if the the value is "partial"
959         *     (we had to prematurely stop calculation or did not have enough data points)
960         *     and so for example should not be cached
961         */
962        public static Tuple.Pair<ScoreAndConf,Boolean> computeWeighting(
963                                                    final Map<Name.ExhibitShort, ScoreAndConf> calibrationData,
964                                                    final ScorerCacheIF scorerCache,
965                                                    final ScorerIF scorer,
966                                                    final AllExhibitImmutableData aeid,
967                                                    final boolean allowStale)
968            throws IOException
969            {
970            if((calibrationData == null) ||
971               (scorerCache == null) ||
972               (aeid == null) ||
973               (scorer == null))
974                { throw new IllegalArgumentException(); }
975    
976            // Note start time so as to be able to limit effort/time spent in this call.
977            final long startTime = System.currentTimeMillis();
978    
979            // If there is no calibration data then we cannot compute (nor cache) a result.
980            if(calibrationData.isEmpty()) { return(new Tuple.Pair<ScoreAndConf, Boolean>(ScoreAndConf.NO_OPINION, true)); }
981    
982            // If there are no exhibits then we cannot compute (nor cache) a result.
983            if(aeid.length == 0) { return(new Tuple.Pair<ScoreAndConf, Boolean>(ScoreAndConf.NO_OPINION, true)); }
984    
985            // Start performance sampling/monitoring, if any.
986            Thread perfMonitorThread = null;
987            if(PERF_SAMPLE_CW)
988                {
989                perfMonitorThread = GenUtils.startThreadPerfMonitor(
990                        Thread.currentThread(),
991                        perfCountsCW,
992                        null, // "org.hd.d.pg2k.ai.",
993                        "org.hd.d.pg2k.", // Monitor thread for at most 10s.
994                        System.currentTimeMillis() + 10000, 10); // Sample often enough to obtain a decent profile.
995                }
996            try
997                {
998                // Last captured error, and error count, if any.
999                IOException err = null;
1000                int errorCount = 0;
1001                // If we didn't finish then we should not cache the result.
1002                boolean stoppedPrematurely = false;
1003                // Compute map from short name to score computed by this Scorer.
1004                final Map<Name.ExhibitShort, ScoreAndConf> scorerResults = new HashMap<Name.ExhibitShort, ScoreAndConf>(
1005                        2 * calibrationData.size());
1006                // Randomise the order in which we do the computations
1007                // to better queue background work, avoid systemic and order-dependent errors, etc.
1008                final List<Name.ExhibitShort> names = new ArrayList<Name.ExhibitShort>(calibrationData.keySet());
1009                if(Rnd.fastRnd.nextBoolean())
1010                    { Collections.shuffle(names, Rnd.fastRnd); }
1011                for(final Name.ExhibitShort shortName : names)
1012                    {
1013                    // If the exhibit is missing (eg has been deleted or renamed) then skip it.
1014                    final Name.ExhibitFull fullName = aeid.getFullName(shortName);
1015                    if(fullName == null) { continue; }
1016    
1017                    try
1018                        {
1019                        // Compute raw Scorer result for this exhibit.
1020                        // Note that this may quickly return zero confidence
1021                        // therefore meaning we can skip further work for this exhibit
1022                        // since this Scorer may not be suitable for this exhibit type.
1023                        final ScoreAndConf sac = scorerCache
1024                                .computeUnweightedScoreAndConfidence(fullName,
1025                                        scorer, allowStale);
1026                        // If confidence is zero then skip this exhibit.
1027                        if(sac.confidence == 0) { continue; }
1028    
1029                        // Record this non-zero-confidence result.
1030                        scorerResults.put(shortName, sac);
1031                        }
1032                    catch(final IOException e)
1033                        {
1034                        // Capture the error.
1035                        ++errorCount;
1036                        err = e;
1037    
1038                        // We are prepared to skip over a few missing data points
1039                        // (ie ignore a small number of possibly-transient errors)
1040                        // though then do not subsequently cache the result
1041                        // unless there has been only a very small fraction of errors.
1042                        // This may also allow requests (eg for thumbnails/events)
1043                        // to be queued in the background ready for next time
1044                        // while making computation generally more robust.
1045                        final int usableExhibits = scorerResults.size();
1046                        if((errorCount <= usableExhibits)
1047                                || (System.currentTimeMillis() - startTime < CoreConsts.MAX_INTERACTIVE_DELAY_MS))
1048                            { continue; }
1049    
1050                        // We've spent long enough: do calculations based on what we have so far...
1051                        // If we've had to give up and we don't allow stale/partial results then abort...
1052                        if(!allowStale) { throw e; }
1053                        // ...else continue with what we have but don't cache it.
1054                        stoppedPrematurely = true;
1055                        break;
1056                        }
1057                    }
1058                // We will accept the result if there were far more data points
1059                // where we had a usable vote factor AND a non-zero Scorer confidence
1060                // than the count of errors that we hit (and points we should have included)
1061                // OR if there were some exhibits but none at all that we could use with this Scorer.
1062                final int usableExhibits = scorerResults.size();
1063                assert (aeid.length > 0);
1064                // We will accept/trust the result if there were far more data points
1065                // where we had a usable vote factor AND a non-zero Scorer confidence
1066                // than the count of errors that we hit (and points we should have included)
1067                // OR if there were some exhibits but none at all that we could use with this Scorer.
1068                // This value gets filled in after attempting to calibrate.
1069                final boolean enoughDataPoints = (errorCount <= usableExhibits
1070                        / ScorerCreator.MIN_SUGGESTED_CALIB_SET_SIZE);
1071                // If not allowing stale/partial results then we must abort after too many errors.
1072                if(!allowStale && !enoughDataPoints) { throw err; }
1073                return(new Tuple.Pair<ScoreAndConf, Boolean>(
1074                        ScorerCreator.computeWeighting(scorerResults, calibrationData),
1075                        stoppedPrematurely || !enoughDataPoints));
1076                }
1077            finally
1078                {
1079                if(PERF_SAMPLE_CW && (null != perfMonitorThread))
1080                    {
1081                    try
1082                        {
1083                        GenUtils.stopPerfMonitorandDumpSamples(perfMonitorThread,
1084                                "computeWeighting()",
1085                                perfCountsCW,
1086                                null,
1087                                20, GenUtils.systemOutLogger); // Log to system out...
1088                        }
1089                    catch(final InterruptedException e)
1090                        {
1091                        Thread.currentThread().interrupt(); // Don't lose interruptedness...
1092                        }
1093                    }
1094                }
1095            }
1096    
1097        /**Computes ScoreAndConf over the supplied calibration exhibits; never null but may be (0,0) where the scorer is unknown or untested.
1098         * This can be used as part of the computation of a Scorer's weighting.
1099         * <p>
1100         * All keys in the calibration data should map to non-null values in the ratings map
1101         * (ie the ratings keys can be a superset of the calibration keys);
1102         * missing values will diminish the confidence of the result.
1103         * <p>
1104         * The input data must not change while this routine is running.
1105         *
1106         * @param scorerResult  map from short exhibit name to Scorer's rating/result for the exhibit;
1107         *     never null
1108         * @param calibrationData  map from short exhibit name to external calibration data
1109         *     (eg actual user vote-based score); never null
1110         *
1111         * @return  the score represents the correlation with the underlying votes
1112         *     (and whatever the scoring is measured against)
1113         *     with MAX meaning perfect correlation, 0 meaning no correlation,
1114         *     and -MAX meaning perfectly wrong answers all the time,
1115         *     and the confidence 0 if we have no (or very/too few) data points
1116         *     and approaching MAX as we have a large (enough) number of data points
1117         */
1118        public static ScoreAndConf computeWeighting(final Map<Name.ExhibitShort, ScoreAndConf> scorerResult,
1119                                                    final Map<Name.ExhibitShort, ScoreAndConf> calibrationData)
1120            {
1121            // If there are no results or no calibration data then we cannot generate a confident answer.
1122            if(scorerResult.isEmpty() || calibrationData.isEmpty())
1123                { return(ScoreAndConf.NO_OPINION); }
1124    
1125            // Exhibits which were usable to calibrate this Scorer.
1126            int usableExhibits = 0;
1127            // The total squared error (and confidence) in the range [0, (2*MAX)^2].
1128            // Each error term is multiplied by (moderated downwards by)
1129            // the product of the Scorer and vote-Factor confidence/correlation.
1130            assert((long) ScoreAndConf.MAX * (long) ScoreAndConf.MAX <= Integer.MAX_VALUE);
1131            long cumError = 0;
1132            // The dot product of the Scorer confidence and the vote confidence,
1133            // with theoretical range [0,Y] where Y = ScoreAndConf.MAX^2 * usableExhibits.
1134            long confDP = 0;
1135            for(final Name.ExhibitShort shortName : calibrationData.keySet())
1136                {
1137                // Get vote-based rating (cannot be null or have non-positive correlation).
1138                final ScoreAndConf vote = calibrationData.get(shortName);
1139                // Vote correlation must be strictly positive to be usable.
1140                if(vote.confidence == 0) { continue; }
1141    
1142                // Compute raw Scorer result for this exhibit.
1143                // Note that this may quickly return zero confidence
1144                // thus meaning that we can skip further work for this exhibit
1145                // since this Scorer may not be suitable for this exhibit type.
1146                final ScoreAndConf sac = scorerResult.get(shortName);
1147                // If result is missing then sjip this exhibit.
1148                if(sac == null) { continue; }
1149                // If confidence is zero then skip this exhibit.
1150                if(sac.confidence == 0) { continue; }
1151    
1152                final int conf = sac.confidence * vote.confidence;
1153                assert(conf >= 0); // Should be no overflow...
1154                // Skip zero or noisy (very-low-confidence) correlations.
1155                if(conf < 1) { continue; }
1156                confDP += conf;
1157                // Compute raw error (difference) in range [0,2*MAX].
1158                // The Scorer result is an actual predicted score for the exhibit.
1159                // The smaller the difference between this predicted score
1160                // and the human-vote-derived score the better.
1161                final int rawError = sac.score - vote.score;
1162                // Compute squared error times the (normalised to [0.0,1.0]) confidence.
1163                final long sqErrConf = ((rawError * (long)rawError) * conf) / (ScoreAndConf.MAX * ScoreAndConf.MAX);
1164    //if(IsDebug.isDebug) { System.out.println("[Error/diff="+error+" @ conf="+conf+" for "+scorerNameAndParameters+" on "+shortName+"; sac="+sac+"; voteFactor="+voteFactor+".]"); }
1165                assert(sqErrConf >= 0);
1166                cumError += sqErrConf;
1167                assert(cumError >= 0); // FIXME: Check that we never overflow.
1168                ++usableExhibits;
1169                }
1170    
1171            ScoreAndConf result = ScoreAndConf.NO_OPINION; // Scorer unusable unless proven otherwise.
1172            // We can't compute a result with insufficient cumulative confidence.
1173            // We can cache a valid negative result though.
1174            if((usableExhibits > 0) && (confDP > 0))
1175                {
1176                final double svRangeConf = 1.0;
1177    
1178                // Verify that accumulators are in their expected ranges.
1179                assert((confDP >= 0) && (confDP <= (usableExhibits * (long)ScoreAndConf.MAX*ScoreAndConf.MAX)));
1180                assert((cumError >= 0) && (cumError <= (usableExhibits * 4 * (long)ScoreAndConf.MAX*ScoreAndConf.MAX)));
1181    
1182                // Compute RMS error in range [0,2*MAX].
1183                final double errorRMSRaw = Math.sqrt(cumError / (confDP / (double)(ScoreAndConf.MAX*ScoreAndConf.MAX)));
1184                assert((errorRMSRaw > -1) && (errorRMSRaw < 1 + 2*ScoreAndConf.MAX)); // We'll allow a little wiggle room for rounding errors...
1185                // Compute score in range +/- ScoreAndConf.MAX.
1186                final long score = ScoreAndConf.MAX - Math.round(errorRMSRaw);
1187                // Compute confidence in range [0,ScoreAndConf.MAX] (plus possible rounding errors).
1188                // We allow extra samples over our minimum desired number
1189                // to compensate for poor per-exhibit confidence.
1190                final long confidence = Math.min(ScoreAndConf.MAX,
1191                    Math.round(svRangeConf * (confDP / (usableExhibits*ScoreAndConf.MAX)) *
1192                        Math.sqrt(usableExhibits / (double) calibrationData.size())));
1193                result = new ScoreAndConf((short) Math.max(-ScoreAndConf.MAX, Math.min(ScoreAndConf.MAX, score)),
1194                                          (short) Math.max(0, confidence));
1195                }
1196    
1197            return(result);
1198            }
1199    
1200        /**Minimum suggested calibration-set size; strictly positive.
1201         * Note that if multiple distinct Scorer populations and target exhibit types are to be covered,
1202         * then a set of this size or greater should be gathered for each of them.
1203         * It may then be convenient to work with the union of those values
1204         * for all Scorer types.
1205         * <p>
1206         * This should be large enough that no one Scorer has enough parameters
1207         * to trivially map each different calibration exhibit to its correct score.
1208         * <p>
1209         * In general, while using a larger calibration sample set increases CPU and memory costs,
1210         * especially for remote 'AH' clients that may need to fetch data,
1211         * a larger calibration set should increase the reliability of calibrated results.
1212         * But there may be a huge advantage in using a set small enough
1213         * to keep in cache close to the top of the memory hierarchy.
1214         * <p>
1215         * We may want to try different calibration set sizes larger than the minimum
1216         * to improve the usefulness and predictive power of the calibration set.
1217         * <p>
1218         * A power of two may allow for generation of slightly better code in various places.
1219         * <p>
1220         * Experience suggests that this value should be at least 128.
1221         */
1222        public static final int MIN_SUGGESTED_CALIB_SET_SIZE = 128;
1223    
1224        /**Server-side calibration set size; strictly positive and no less than MIN_SUGGESTED_CALIB_SET_SIZE.
1225         * A larger calibration set of this size implies use of more memory and CPU time,
1226         * but with a higher reliability of answer,
1227         * than using a calibration set of size MIN_SUGGESTED_CALIB_SET_SIZE.
1228         * <p>
1229         * Needs to be at least as large as the minimum 'voted-for' set to give good confidence.
1230         * <p>
1231         * A power of two may allow for generation of slightly better code.
1232         */
1233        public static final int SUGGESTED_CALIB_SET_SIZE =
1234            Math.max(ScorerCacheImpl.MIN_VOTED_FOR_SET_SIZE, 2*MIN_SUGGESTED_CALIB_SET_SIZE);
1235    
1236        /**Tunnel root-relative (starting with '/') URL within the server; never null. */
1237        public static final String scorerTunnelRRURL = "/_AI/remote.jsp";
1238        /**Parameter name to request particular remote data type; never null nor empty. */
1239        public static final String DT_PARAM_NAME = "scDataType";
1240        /**Data type parameter value to request calibration-set data; never null nor empty. */
1241        public static final String DT_CALIB = "calibration";
1242        /**Data type parameter value to request server's best Scorers; never null nor empty. */
1243        public static final String DT_BESTSC = "bestScorers";
1244        /**Data type parameter value to POST new 'star' Scorer; never null nor empty. */
1245        public static final String DT_POSTNEWSC = "postNewScorer";
1246        /**Data type parameter name for generic data/query; never null nor empty. */
1247        public static final String DT__GEN_DATA = "q";
1248        }