001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.ai.scorer;
031    
032    import java.io.IOException;
033    import java.io.InterruptedIOException;
034    import java.util.ArrayList;
035    import java.util.Collection;
036    import java.util.Collections;
037    import java.util.Comparator;
038    import java.util.HashMap;
039    import java.util.HashSet;
040    import java.util.Iterator;
041    import java.util.List;
042    import java.util.Map;
043    import java.util.Queue;
044    import java.util.Set;
045    import java.util.concurrent.ConcurrentHashMap;
046    import java.util.concurrent.ConcurrentMap;
047    import java.util.concurrent.atomic.AtomicBoolean;
048    import java.util.concurrent.atomic.AtomicInteger;
049    import java.util.concurrent.atomic.AtomicLong;
050    
051    import org.freehep.math.minuit.FCNBase;
052    import org.freehep.math.minuit.FunctionMinimum;
053    import org.freehep.math.minuit.MnMinimize;
054    import org.freehep.math.minuit.MnStrategy;
055    import org.freehep.math.minuit.MnUserParameters;
056    import org.hd.d.pg2k.svrCore.AllExhibitImmutableData;
057    import org.hd.d.pg2k.svrCore.CoreConsts;
058    import org.hd.d.pg2k.svrCore.GenUtils;
059    import org.hd.d.pg2k.svrCore.MemoryTools;
060    import org.hd.d.pg2k.svrCore.Name;
061    import org.hd.d.pg2k.svrCore.Rnd;
062    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
063    import org.hd.d.pg2k.svrCore.Tuple;
064    import org.hd.d.pg2k.svrCore.datasource.SimpleExhibitPipelineIF;
065    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
066    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
067    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
068    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
069    
070    import ORG.hd.d.IsDebug;
071    
072    
073    /**Creates new Scorer (parameter-set) instances given an existing population.
074     *
075     * @author dhd
076     */
077    public final class ScorerCreator
078        {
079        /**Scorer 'source' for recovering from persistent/shared store; never empty nor null. */
080        public static final String SSOURCE_REC = "REC";
081    
082        /**This method attempts to retrieve a "new" Scorer from the persisted history.
083         * This mechanism is used to recover previous known-good Scorers from local store and/or
084         * share good Scorers between all participating servers.
085         * This makes for faster start-up of new server instances,
086         * a local and global memory of good genotypes,
087         * and sharing of good genotypes between local populations.
088         *
089         * @return true if a Scorer was found that was not in the extant population
090         *     (though may not have been good enough to remain in it)
091         */
092        public static boolean retrieveOldScorer(final SimpleExhibitPipelineIF dataSource,
093                                                final ScorerCacheIF cache,
094                                                final ScorerPopulation population,
095                                                final SimpleLoggerIF log)
096            throws IOException
097            {
098            // Choose between the longest and a less-long period
099            // to try to pick up more variety of persisted Scorer.
100            final EventPeriod intervalSelector = Rnd.fastRnd.nextBoolean() ?
101                    EventPeriod.LONG : EventPeriod.VLONG;
102            // Choose a period from the previous full period to as-far-back-as-possible
103            // (for the given period/interval length)
104            // to improve long-term memory of Scorer values tests
105            // and to increase the chance of finding something rare,
106            // but with the choice heavily skewed to more recent events
107            // as more likely to be "better" or even present at all.
108            final long whichPeriod = intervalSelector.getIntervalNumber(System.currentTimeMillis()) -
109                Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(SystemVariables.EVENT_SAMPLES_RETAINED)));
110            // Select from local (larger volume) and global (broadcast) results.
111            final SimpleVariableDefinition def = Rnd.fastRnd.nextBoolean() ?
112                    SystemVariables.AI_SCORER_STRING_LOCAL_EVENT : SystemVariables.AI_SCORER_STRING_GLOBAL_EVENT;
113            final EventVariableValue[] evvs = dataSource.getEventValues(def, intervalSelector, whichPeriod, null);
114            if(evvs.length == 0) { return(false); }
115            EventVariableValue evv = evvs[0];
116            // If the selected (full, historical) period has no events
117            // then try the current (partial) period instead,
118            // which has the advantage of getting us bang up-to-date.
119            if((evv == null) || (evv.getTotalDistinctValues() == 0))
120                { evv = dataSource.getEventValue(def, intervalSelector, true); }
121            if((evv == null) || (evv.getTotalDistinctValues() == 0)) { return(false); }
122            // Select an entry at random, though weighted more towards items that were "best" more often.
123            final String scorerNameAndStringParameters = (String) evv.getValueByRank(Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(evv.getTotalDistinctValues())));
124            // Check that it is new...
125            if(population.getScorerWeighting(scorerNameAndStringParameters, true) != null) { return(false); }
126            // Evaluate/record/cache it if possible.
127            final ScoreAndConf sac = cache.computeScorerWeighting(scorerNameAndStringParameters, false, SSOURCE_REC);
128            // Check that it is good enough to remain in the population...
129            if(population.getScorerWeighting(scorerNameAndStringParameters, false) != null) { return(false); }
130    if(IsDebug.isDebug) { log.log("[ScorerCreator.retrieveOldScorer(): imported "+scorerNameAndStringParameters+" ScoreAndConfidence: "+sac+".]"); }
131            return(true); // Found (and imported/evaluated) a new Scorer not in our population.
132            }
133    
134        /**Scorer 'source' for the GA combine/mutate method; never empty nor null. */
135        public static final String SSOURCE_GA = "GA";
136    
137        /**Create a new Scorer (parameter set) from existing ones using normal GA techniques.
138         * This uses selection, mutation and recombination to attempt to find
139         * a "better" Scorer version based on those in the existing population.
140         *
141         * @param population  the extant population; never null
142         *
143         * @return true iff we created and evaluated a new Scorer not in the extant population
144         */
145        public static boolean createNewByGA(final ScorerCacheIF cache,
146                                            final ScorerPopulation population,
147                                            final SimpleLoggerIF log)
148            throws IOException
149            {
150            // Pick breeding set to work on.
151            // Allow slightly stale values for speed and robustness.
152            final List<String> breedingSet = pickBreedingSet(cache, population, true);
153            if(breedingSet == null) { return(false); /* No suitable breeding stock... */ }
154    
155            // Randomly select one or more different Scorers to mutate and combine.
156            final Set<String> parents = new HashSet<String>();
157            do
158                {
159                // Randomly select a Scorer, skewed towards "better" Scorers.
160                final String extant = breedingSet.get(
161                        Rnd.goodRnd.nextInt(1 + Rnd.goodRnd.nextInt(breedingSet.size())));
162                // Add this mutant if different to extant parents (eg if no parents so far).
163                parents.add(extant);
164                // Stop if this parent has zero or one parameters,
165                // since multiple parents cannot be usefully "combined" sexually.
166                if(extant.indexOf(AbstractScorer.SEPARATOR) == extant.lastIndexOf(AbstractScorer.SEPARATOR))
167                    { break; }
168                // We try not to draw too many samples from a small breeding set
169                // and we keep low the probability of using many parents.
170                } while((parents.size() < breedingSet.size()/4) && Rnd.goodRnd.nextBoolean());
171            assert(!parents.isEmpty());
172    
173            final ScorerIF firstParent = cache.getScorerInstance(parents.iterator().next());
174    
175            // Now make the new new variant/child by combining the parents if more than one
176            // and mutating the result.
177            // Else if there is exactly one parent then mutate it to get the new child.
178            final ScorerIF newVariant;
179            if(parents.size() == 1)
180                { newVariant = firstParent.createPerturbedVariant(); }
181            else
182                {
183                // Combine the parents' parameters
184                // and mutate the result/offspring.
185                final ArrayList<List<ScorerParam>> pt = new ArrayList<List<ScorerParam>>(parents.size());
186                for(final String snp : parents)
187                    { pt.add(population.getScorerInstance(snp).getParameterDefsAndValues()); }
188                // Shuffle once to get a completely random order.
189                Collections.shuffle(pt, Rnd.goodRnd);
190                // Now, add one token at a time from the first parent,
191                // occasionally shuffling to switch the source of tokens
192                // to be from a different parent.
193                // We stop when the current/first parent is out of tokens.
194                // We force at least one crossover.
195                final int pt0len = pt.get(0).size();
196                final List<ScorerParam> newParams = new ArrayList<ScorerParam>(pt0len);
197                final int forcedCrossoverAfter = (pt0len <= 3) ? 1 : (1+Rnd.goodRnd.nextInt(pt0len-2));
198                // Note that each time, for whatever the current "0" Scorer is,
199                // we test against its length to safely allow for variable parameter numbers.
200                for(int tokenNumber = 0; tokenNumber < pt.get(0).size(); ++tokenNumber)
201                    {
202                    newParams.add(pt.get(0).get(tokenNumber));
203                    // Occasionally switch to another parent.
204                    // We make it quite unlikely to switch/crossover at any one point
205                    // so as to give some notion of locality/clustering.
206                    // FIXME: The frequency/probability of crossover may be very important.
207                    if((forcedCrossoverAfter == tokenNumber) ||
208                       (Rnd.fastRnd.nextBoolean() && Rnd.goodRnd.nextBoolean()))
209                        {
210                        final int ptSize = pt.size();
211                        // Swap the current 0 item (which is the source of "genes")
212                        // randomly with one of the others.
213                        // This is analogous to a DNA crossover during meiosis.
214                        final int swapIndex = (ptSize == 2) ? 1 : 1+Rnd.goodRnd.nextInt(ptSize-1);
215                        final List<ScorerParam> old0 = pt.get(0);
216                        pt.set(0, pt.get(swapIndex));
217                        pt.set(swapIndex, old0);
218                        }
219                    }
220                // Get the un-mutated child.
221                final ScorerIF child = firstParent.createVariant(firstParent.getBaseName(), newParams);
222                final String childSNP = child.getNameAndParameters();
223                // Always mutate the child if identical to one parent
224                // or already exists in the population.
225                newVariant = (!parents.contains(childSNP) && (population.getScorerWeighting(child, true) == null)) ?
226                    child /* Already new. */ :
227                    child.createPerturbedVariant();
228                }
229    
230            // Check that this is new (or at least not rescored recently).
231            if(population.getScorerWeighting(newVariant, true) != null) { return(false); }
232    
233    //if(IsDebug.isDebug) { log.log("[ScorerCreator.createNewByGA(): trying new variant: " + newVariant + " of " + (new ArrayList<String>(parents)) + "...]"); }
234    
235            // Evaluate new variant, automatically recording/sharing if good.
236            /* final ScoreAndConf sac = */ cache.computeScorerWeighting(newVariant, false, SSOURCE_GA);
237            // Check that it is good enough to remain in the population...
238            if(population.getScorerWeighting(newVariant, false) != null) { return(false); }
239    //if(IsDebug.isDebug) { log.log("[ScorerCreator.createNewByGA(): ...variant ScoreAndConfidence: " + sac + ".]"); }
240            return(true); // Successfully created and evaluated a new Scorer.
241            }
242    
243    
244        /**Create a variant Scorer given an extant same-type Scorer and the array of values of the variable parameters; never null.
245         */
246        private static ScorerIF constructModifiedSNP(final double[] par, final ScorerIF initialScorer, final List<ScorerParam> paramDefsAndValuesOriginal, final int[] mnIndexToParamIndex)
247            {
248            if((par == null) || (initialScorer == null) ||
249               (paramDefsAndValuesOriginal == null) || (mnIndexToParamIndex == null))
250                { throw new IllegalArgumentException(); }
251            // Copy the original parameter values,
252            // overwriting the changed variable parameters under control of the minimiser.
253            final List<ScorerParam> params = new ArrayList<ScorerParam>(paramDefsAndValuesOriginal);
254            for(int i = par.length; --i >= 0; )
255                {
256                final int origParamIndex = mnIndexToParamIndex[i];
257                final ScorerParam p = paramDefsAndValuesOriginal.get(origParamIndex);
258                if(p instanceof ScorerParamInteger)
259                    {
260                    final ScorerParamInteger pi = (ScorerParamInteger) p;
261                    final int newValue = Math.min(pi.max, Math.max(pi.min, (int) Math.round(par[i])));
262                    // Replace the parameter in situ if the value has changed from the original.
263                    if(pi.value != newValue)
264                        { params.set(origParamIndex, ScorerParamInteger.createScorerParamInteger(pi.min, pi.def, pi.max, pi.delta, pi.biasedLow, pi.name, newValue)); }
265                    continue;
266                    }
267                assert(false); // Unexpected parameter type has been varied.
268                }
269    
270            // Now create the full Scorer name with parameters.
271            return(initialScorer.createVariant(initialScorer.getBaseName(), params));
272            }
273    
274    //    /**If true then a minimiser run that finds a new/'best' Scorer is allowed to run to completion.
275    //     * This may make worker threads slow to die, but they should be daemons and interruptable anyway.
276    //     */
277    //    private static final boolean ALLOW_MIN_RUN_TO_COMPLETION_AFTER_BEST = true;
278    
279        /**Scorer 'source' for the "minimiser" method; never empty nor null. */
280        public static final String SSOURCE_MIN = "MIN";
281    
282        /**Create a new Scorer (parameter set) from existing ones using a multi-variable "minimiser".
283         * This selects a "good" Scorer and then attempts to find a better/optimal one
284         * derived from it using mathematical error-minimisation techniques.
285         * <p>
286         * For the moment this only attempts to adjust the continuously-variable properties
287         * (ie the integer parameter values) and leaves the enumeration parameters alone.
288         *
289         * @param population  the extant population; never null
290         * @param calibSubset  if non-null, should be non-empty and is a calibration set
291         *     to optimise/minimise against
292         * @param endTime  target end time to stop computations at
293         *
294         * @return true iff we created and evaluated a new Scorer not in the extant population
295         */
296        public static boolean createNewByOpt(final SimpleExhibitPipelineIF dataSource,
297                                             final ScorerCacheIF cache,
298                                             final ScorerPopulation population,
299                                             final Map<Name.ExhibitShort, ScoreAndConf> calibSubset,
300                                             final SimpleLoggerIF log,
301                                             final long endTime)
302            throws IOException
303            {
304            if((dataSource == null) ||
305               (cache == null) ||
306               (population == null) ||
307               (log == null))
308                { throw new IllegalArgumentException(); }
309    
310            final long startTime = System.currentTimeMillis();
311            // We'll extend our finish time if we made progress during the original time slice.
312            final long extendedEndTime = endTime + Math.max(100, 2*(endTime-startTime));
313    
314            // Pick breeding set to work on.
315            // Allow slightly stale values for speed and robustness.
316            final List<String> breedingSet = pickBreedingSet(cache, population, true);
317            if(breedingSet == null) { return(false); /* No suitable breeding stock... */ }
318    
319            // Are we optimising for error against a small calibration subset?
320            final boolean useCalibSubset = (calibSubset != null) && !calibSubset.isEmpty();
321            // This value is guaranteed to be non-null and non-empty if useCalibSubset is true.
322            final Map<Name.ExhibitShort, ScoreAndConf> calib = (!useCalibSubset) ? null : calibSubset;
323            assert((!useCalibSubset) || ((calib != null) && (!calib.isEmpty()))) : "calib must be null or non-empty";
324    
325            final AllExhibitImmutableData aeid = dataSource.getAllExhibitImmutableData(-1);
326    
327            // Some of the time, fix some of the otherwise-variable parameters.
328            // we do this to allow the minimiser to focus its fire,
329            // have less chance of trying to adjust interacting parameters, etc.
330            // Only effective when the Scorer has 2 or more (continuously variable) parameters.
331            final boolean fixSomeParameters = Rnd.fastRnd.nextBoolean();
332    
333            // If we're fixing some parameters then we can optionally fix all those
334            // at or very close to their end stops.
335            final boolean fixAllExtremeParameters = fixSomeParameters && Rnd.goodRnd.nextBoolean();
336    
337            // Some of the time we perturb the initial Scorer we select from the breeding set.
338            // We do this to try to move out of an existing local minimum,
339            // especially as some perturbations may be quite large.
340            final boolean perturbInitialScorer = Rnd.goodRnd.nextBoolean();
341    
342            // Randomly select an extant Scorer,
343            // with selection skewed towards "better" Scorers from the available breeding set.
344            final String extant0 = breedingSet.get(
345                Rnd.goodRnd.nextInt(1 + Rnd.goodRnd.nextInt(breedingSet.size())));
346            // Create the Scorer instance with its parameters parsed.
347            final ScorerIF initialScorer0 = cache.getScorerInstance(extant0);
348            assert(initialScorer0 != null) : "must be able to recreate Scorer instance from breeding set SNP: "+extant0; // Must be able to create Scorer from value in extant population.
349            // Note the initial score/goodness.
350            final ScoreAndConf sacExtant0 = cache.computeScorerWeighting(extant0, true, null);
351            // Some of the time, perturb the parameters before we start.
352            // This may help get us out of any local minimum, ie a rut...
353            final String extant;
354            final ScorerIF initialScorer;
355            final ScoreAndConf sacExtant;
356            if(perturbInitialScorer)
357                {
358                // Use perturbed version of extant Scorer.
359                initialScorer = initialScorer0.createPerturbedVariant();
360                extant = initialScorer0.getNameAndParameters();
361    //            extant = initialScorer0.getPerturbedNameAndParameters();
362    //            initialScorer = cache.getScorerInstance(extant);
363                assert(initialScorer != null) : "must be able to recreate Scorer instance from perturbed SNP: "+extant; // Must be able to create instance for this Scorer variation.
364                sacExtant = cache.computeScorerWeighting(extant, true, SSOURCE_GA); // Simple mutation...
365                }
366            else
367                {
368                // Use selected extant Scorer as-is.
369                initialScorer = initialScorer0;
370                extant = extant0;
371                sacExtant = sacExtant0;
372                }
373            // Compute the goodness of our initial/starting Scorer.
374            final int goodnessExtant = ScoreAndConf.computeScorerGoodness(sacExtant);
375    //if(IsDebug.isDebug) { System.out.println("[ScorerCreator.createNewByOpt(): working on "+extant+"...]"); }
376    
377            // Ensure fast in-order and random access to the original parameter definitions.
378            // This assumes that all parameters have unique names.
379            // This must not be altered.
380            final List<ScorerParam> paramDefsAndValuesOriginal = Collections.unmodifiableList(new ArrayList<ScorerParam>(initialScorer.getParameterDefsAndValues()));
381            final int nParams = paramDefsAndValuesOriginal.size();
382    
383            // Map from from MnUserParameters params index to paramDefsAndValuesOriginal position.
384            // This makes it easy to simply take a copy of the original
385            // and then overwrite controlled parameter values in place.
386            // We allow this to be over-long...
387            final int mnIndexToParamIndex[] = new int[nParams];
388    
389    //        // We precompute the expanded versions of all the rest
390    //        // to make it relatively quick to assemble a modified parameter set.
391    //        final String fixedParams[] = new String[nParams];
392    
393            // Create the parameter set,
394            // including only those parameter types with reasonably continuous values,
395            // ie including integer parameters and excluding enumeration parameters.
396            final MnUserParameters params = new MnUserParameters();
397            int mnParamNum = 0;
398            for(int i = 0; i < nParams; ++i)
399                {
400                final ScorerParam p = paramDefsAndValuesOriginal.get(i);
401                assert(p != null) : "parameter cannot be null in paramDefsAndValuesOriginal from "+initialScorer;
402                final String name = p.getName();
403    
404                // Allow the minimiser to vary integer-valued parameters.
405                // If we're fixing extreme parameters (which may hinder the minimiser)
406                // then we simply don't vary any parameter within a delta of its limits.
407                if(p instanceof ScorerParamInteger)
408                    {
409                    final ScorerParamInteger pi = (ScorerParamInteger) p;
410                    // Create this as a variable parameter, if:
411                    //  * not fixing extreme-valued parameters, OR
412                    //  * not within a delta of the extremes (all but AT the extremes for delta==1).
413                    if((!fixAllExtremeParameters) ||
414                        ((pi.value - pi.min >= pi.delta) && (pi.max - pi.value >= pi.delta)))
415                        {
416                        // We set the error tolerance similar the "delta" value
417                        // since it is assumed to represent a "small" non-critical change.
418                        // We use a minimum error value of 1 to reflect the int granularity.
419                        params.add(name, pi.value, (pi.delta < 2) ? 1 : pi.delta, pi.min, pi.max);
420                        mnIndexToParamIndex[mnParamNum++] = i; // Provide mapping.
421                        continue;
422                        }
423                    }
424    
425    //            // For a parameter that cannot be varied (reasonably) smoothly,
426    //            // precompute its text representation for speed later.
427    //            fixedParams[i] = p.toNameValueString();
428                }
429    
430            // If we are fixing some otherwise-variable parameters, we do it here.
431            // We do this only if there is more than one variable parameter.
432            final int variableParamCount = mnParamNum;
433            assert(params.variableParameters() == variableParamCount) : "params.variableParameters() == variableParamCount";
434            if(fixSomeParameters && (variableParamCount > 1))
435                {
436                // We fix at least one of, but never all of, the parameters.
437                // We skew towards leaving few parameters variable.
438                final int parametersToFix = variableParamCount - 1 -
439                    Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(variableParamCount-1));
440                assert((parametersToFix > 0) && (parametersToFix < variableParamCount)) : "must not fix all params nor none";
441    //if(IsDebug.isDebug) { log.log("[Fixing "+parametersToFix+" parameter(s) out of "+variableParamCount+"...]"); }
442                // Collection of indices of parameters to fix.
443                final List<Integer> toFix = new ArrayList<Integer>(variableParamCount);
444                for(int i = variableParamCount; --i >= 0; ) { toFix.add(i); }
445                Collections.shuffle(toFix, Rnd.goodRnd);
446                for(int i = parametersToFix; --i >= 0; )
447                    { params.fix(toFix.get(i)); }
448                assert(params.variableParameters() == variableParamCount - parametersToFix) : "params.variableParameters() == variableParamCount - parametersToFix";
449                }
450    //if(IsDebug.isDebug) { log.log("[Minimiser: varying parameters "+params.variableParameters()+" out of "+nParams+"...]"); }
451    
452            // Local error-signalling class
453            // (to wrap a legit IOException and propagate up through the minimiser).
454            @SuppressWarnings("serial")
455            class WrappedIOException extends RuntimeException
456                { WrappedIOException(final IOException cause) { super(cause); } }
457    
458            // Set true if we find a new Scorer.
459            final AtomicBoolean foundNewScorer = new AtomicBoolean();
460    
461            // Set true if we find a Scorer better than our initial one.
462            final AtomicBoolean foundBetterNewScorer = new AtomicBoolean();
463    //
464    //        // Set true if we found the best-so-far Scorer at least once during this run.
465    //        final AtomicBoolean foundBest = new AtomicBoolean();
466    
467            // Maximum error that we can generate in the evaluation functor.
468            final int maxERR = ScoreAndConf.MAX_GOODNESS;
469            assert(maxERR > 0) : "maxERR must be strictly positive as an int";
470    
471            // Wrap our Scorer-evaluation routine as a standard functor.
472            // We remember all values so far computed on this evaluation
473            // for speed and consistency.
474            // We are prepared to abort the calculation once:
475            //  * we have passed our target end time, and,
476            //  * we have found a new Scorer parameter set better than our starting set.
477            // Better means BOTH higher "goodness" AND higher correlation.
478            final FCNBase fcn = (new FCNBase() {
479                /**Map of values so-far requested/computed (thread-safe). */
480                private final ConcurrentMap<String,Integer> answers = new ConcurrentHashMap<String, Integer>();
481                /**Return the "error" of the Scorer, to be minimised. */
482                public double valueOf(final double[] par)
483                    {
484                    final ScorerIF toEvalSc = constructModifiedSNP(par, initialScorer, paramDefsAndValuesOriginal, mnIndexToParamIndex);
485                    final String toEvalS = toEvalSc.getNameAndParameters();
486                    // After rounding we may find that we have a previous answer to hand
487                    // if so, use it for consistency and speed.
488                    final Integer cachedAnswer = answers.get(toEvalS);
489                    if(cachedAnswer != null) { return(cachedAnswer); }
490                    // Find out if this Scorer is novel/new.
491                    final boolean isNew = (population.getScorerWeighting(toEvalSc, true) == null);
492                    try
493                        {
494                        int goodness = Integer.MIN_VALUE;
495                        // We can stop if we've found a new Scorer better than we started with.
496                        // This helps limit the execution time while still ensuring progress.
497                        if((!useCalibSubset) || isNew)
498                            {
499                            // Allow use of stale/cached values here for speed and robustness
500                            // with the risk that an inconsistent mix of stale and up-to-date values
501                            // may make finding the minimum error difficult or impossible at times.
502                            final ScoreAndConf sac = cache.computeScorerWeighting(toEvalSc, true, SSOURCE_MIN);
503                            goodness = ScoreAndConf.computeScorerGoodness(sac);
504    
505                            // Check that new Scorer is good enough to remain in the general population.
506                            if(isNew && (population.getScorerWeighting(toEvalSc, false) != null))
507                                {
508                                // Note if we generated a useful novel Scorer
509                                // (ie good enough to have the overall routine return true).
510                                foundNewScorer.set(true);
511                                // If our new Scorer is better than we started with then this is progress.
512                                // We have a strict notion of "better" which is both the "goodness" measure
513                                // AND the underlying accuracy (which is more than we are minimising for).
514                                if((goodness > goodnessExtant) && (sac.score > sacExtant.score))
515                                    { foundBetterNewScorer.set(true); }
516                                }
517                            }
518    
519                        // Check if we need to abort the minimiser because we've run out of time.
520                        // If we haven't found anything better than we started with and are now out of time, abort.
521                        if(System.currentTimeMillis() > (foundBetterNewScorer.get() ? extendedEndTime : endTime))
522                            {
523    //if(IsDebug.isDebug) { System.out.println("[ScorerCreator.createNewByOpt(): aborting minimiser having run out of time.]"); }
524                            throw new InterruptedIOException("aborting minimiser: out of time");
525                            }
526                        // If this thread has been interrupted, abort immediately.
527                        if(Thread.interrupted())
528                            { throw new InterruptedIOException("thread interrupted"); }
529    
530                        // If using the calibration set then use its measure of goodness instead for the error.
531                        // Convert score*confidence into (non-positive) error value (to be minimised).
532                        // The better the score, the lower the error.
533                        // MAX^2 s.c => 0 error; -MAX^2 s.c => maxERR +ve error.
534                        // We make sure that we use the same metric that ScorerPopulation does.
535                        final int error = maxERR - (((!useCalibSubset) ? goodness :
536                            ScoreAndConf.computeScorerGoodness(ScorerCreator.computeWeighting(calib, cache, toEvalSc, aeid, true).first)));
537    //if(IsDebug.isDebug) { System.out.println("[ScorerCreator.createNewByOpt(): error="+(error)+" having evaluated "+toEval+"...]"); }
538                        assert((error >= 0) && (error <= 2*maxERR)) : ("Error expected to be in range [0,"+(2*maxERR)+"] but was "+error) ;
539                        answers.putIfAbsent(toEvalS, error); // Cache this answer.
540                        return(error);
541                        }
542                    catch(final IOException e)
543                        {
544                        // Wrap the original exception and rethrow (as our unchecked exception)
545                        // which will abort the minimisation relatively gracefully.
546                        throw new WrappedIOException(e);
547                        }
548                    }
549                });
550    
551            // Select the general strategy.
552            // We select the strategy randomly to maximise our chance of success.
553            // Skew strategy selection heavily towards "fast" (0) rather than "safe" (2),
554            // since this will be a rather hard function to handle anyway.
555            final MnStrategy strategy = new MnStrategy(Rnd.fastRnd.nextInt(1+Rnd.fastRnd.nextInt(1+Rnd.fastRnd.nextInt(3))));
556    
557            // Create the minimiser instance...
558            final MnMinimize minimiser = new MnMinimize(fcn, params, strategy.strategy());
559            // Warn the minimiser that we don't in practice have much intermediate calculation precision.
560            minimiser.setPrecision(1.5/ScoreAndConf.MAX); // Approx limit of precision in score, for example.
561            // Limit the number of function calls that can be made (as they may be very slow)
562            // and we may not have space to retain all the useful intermediate results.
563            // Allow more calls with more parameters.
564            final int maxFnCalls = Math.min(1024 * nParams, population.maxSize()/2); // Crude limits...
565            // Specify a tolerance >> than the integer granularity (1) of the error value.
566            final int resultTolerance = maxERR/100; // Getting within 1% of optimum is just fine!
567            // ...and attempt to find the minimum.
568            // Note that we don't mind too much if the minimiser doesn't think that it finished,
569            // since it may have gotten us some way in the right direction anyway...
570    //        final long minimiserStartTime = System.currentTimeMillis();
571            try
572                {
573                final FunctionMinimum minimum = minimiser.minimize(maxFnCalls, resultTolerance);
574                assert(minimum != null) : "function minimum should be non-null";
575                // Don't try to use the function minimum at all if invalid.
576                if(!minimum.isValid()) { return(foundNewScorer.get()); }
577    
578                final double par[] = minimiser.params();
579                assert(par != null) : "minimiser.params() should not be null";
580                assert(fixSomeParameters || (par.length == minimum.userParameters().variableParameters())) : "unless fixing parameters, par.length should == minimum.userParameters().variableParameters()";
581                // This is the reconstructed "best" Scorer found so far.
582                final ScorerIF newVariant = constructModifiedSNP(par, initialScorer, paramDefsAndValuesOriginal, mnIndexToParamIndex);
583    
584                // Check that this Scorer name-and-parameters value is new.
585                // Note that evaluating the scorer internally may already have cached a result,
586                // so we regard finding a new value as successful if the minimisation succeeded,
587                // or if we had made some progress en passant...
588                if(population.getScorerWeighting(newVariant, true) != null)
589                    {
590                    return(foundNewScorer.get() ||
591                            (minimum.isValid() && !newVariant.getParameterDefsAndValues().equals(extant) &&
592                                                  !newVariant.getParameterDefsAndValues().equals(extant0)));
593                    }
594    
595                // Evaluate new variant, automatically recording/sharing if good.
596                // Force calculation using fully up-to-date values.
597                /* final ScoreAndConf sac = */ cache.computeScorerWeighting(newVariant, false, SSOURCE_MIN);
598                return(true); // Successfully created and evaluated new creature.
599                }
600            catch(final IOException e) { throw e; /* Propagate I/O problem as-is. */ }
601            catch(final WrappedIOException e)
602                {
603                if(foundNewScorer.get()) { return(true); /* Made progress anyway. */ }
604                // Didn't make progress, so rethrow the exception.
605                if(e.getCause() == null) { throw new IOException("WrappedIOException had null cause"); }
606                throw (IOException) e.getCause(); /* Unwrap and rethrow underlying IOException. */
607                }
608    //        finally
609    //            {
610    //            if(foundGoodEnoughNewScorer.get())
611    //                {
612    //                final long runTime = System.currentTimeMillis() - startTime;
613    ///* if(IsDebug.isDebug) */ { log.log("[Minimiser: made progress in "+(runTime)+"ms: varied parameters "+params.variableParameters()+" out of "+nParams+"...]"); }
614    //                }
615    //            }
616            }
617    
618    
619        /**Class to encapsulate all background and evolution work for a given ScorerCache.
620         * The doChunk() entry point is designed to be thread-safe and highly threadable,
621         * ie can be called from multiple worker threads at once.
622         */
623        public static final class ScorerWork
624            {
625            /**Create instance.
626             * @param scorerCache  cache of Scorers to work with; never null
627             * @param log  logger; never null
628             * @param allowScorerSharingByEvent  if true,
629             *     then we try to retrieve persisted/shared Scorers
630             *     from the system variables event mechanism
631             */
632            public ScorerWork(final AbstractScorerCache scorerCache,
633                              final SimpleLoggerIF log,
634                              final boolean allowScorerSharingByEvent)
635                {
636                if((scorerCache == null) || (log == null))
637                    { throw new IllegalArgumentException(); }
638                this.scorerCache = scorerCache;
639                this.log = log;
640                this.allowScorerSharingByEvent = allowScorerSharingByEvent;
641                }
642    
643            /**Underlying Scorer cache; never null. */
644            private final AbstractScorerCache scorerCache;
645    
646            /**Logger; never null. */
647            private final SimpleLoggerIF log;
648    
649            /**If true, then we try to retrieve persisted/shared Scorers from the system variables event mechanism. */
650            private final boolean allowScorerSharingByEvent;
651    
652            /**Excess time spent running slow evolution methods (ns).
653             * In order to attempt to balance the amount of time
654             * spent on different methods of generating new Scorers,
655             * given that they can be orders of magnitude apart in execution time,
656             * we attempt to balance the total run-time they take.
657             * <p>
658             * This is increased by the execution time of each "slow" instance
659             * and decreased by the execution time of each "fast" instance.
660             * Slow methods may only start when it is negative, fast ones otherwise.
661             * <p>
662             * Run-time for "failed" executions may or may not be included.
663             * <p>
664             * This starts at zero, and the effect of the "fair share" scheduling that this controls
665             * should be to keep it as close to zero (on either side of zero) as possible.
666             * <p>
667             * This is thread-safe.
668             */
669            private final AtomicLong excessSlowTimeNs = new AtomicLong();
670    
671            /**Run one chunk of work.
672             * Note that if the minTime or minEndTime values are not more than 1ms beyond 'now'
673             * then this avoids computations that are likely to require especially large
674             * amounts of CPU time/effort.
675             *
676             * @param special  if true, do extra background work and normal work
677             * @param inboundEval  if non-null then is queue of externally-supplied Scorers
678             *     to evaluate and insert into the population if appropriate;
679             *     these are regarded as potentially untrusted so are validated and canonicalised before use
680             * @param endTime  target end time for this chunk of work to stop by
681             * @param minEndTime  time before which we should not stop (ie target minimum quantum)
682             */
683            public final void doChunk(final long endTime, final long minEndTime,
684                                      final boolean special,
685                                      final Queue<String> inboundEval)
686                {
687                final ScorerPopulation population = scorerCache.getPopulation();
688    
689                // We do some extra housekeeping work first in "special" threads
690                // or when we need to bootstrap.
691                if(special || (population.size() == 0))
692                    {
693    if(IsDebug.isDebug && (population.size() == 0)) { System.out.println("ScorerCreator.doChunk(): bootstrapping empty population"); }
694                    // Make sure that we get base and best Scorers up to date and keep them so.
695                    // This will also ensure that all sorts of supporting data is kept fresh too.
696                    final ArrayList<String> shuffledBaseNames = new ArrayList<String>(scorerCache.getBaseScorersWithoutParameters());
697                    // Getting these 'best' values should not cause any exceptions or delay.
698                    // Re-evaluating these should ensure that the best deserve to remain so,
699                    // else incrementally push them out of the way to allow replacements.
700                    shuffledBaseNames.addAll(scorerCache.getCurrentScorersWithParameters(Rnd.fastRnd.nextBoolean()));
701                    // Shuffle the Scorers to give a fair/equal crack of the whip to all-comers.
702                    Collections.shuffle(shuffledBaseNames, Rnd.fastRnd);
703                    for(final String scorerName : shuffledBaseNames)
704                        {
705    if(IsDebug.isDebug && (population.size() == 0)) { System.out.println("ScorerCreator.doChunk(): bootstrapping empty population with: "+scorerName); }
706                        // Attempt to bring the selected Scorer weighting fully up-to-date
707                        // if not already so.
708                        try { scorerCache.computeScorerWeighting(scorerName, Rnd.fastRnd.nextBoolean(), null); }
709                        catch(final IOException e) { /* Ignore probably-transient I/O problem. */ }
710                        catch(final Exception e)
711                            {
712                            // Stop on unexpected error.
713                            e.printStackTrace();
714                            System.err.println("Problem with computeScorerWeighting("+scorerName+", false): " + e.getMessage());
715                            }
716                        // Stop and exit this run immediately if we've exhausted our time chunk...
717                        // This forces us to get the basics in place and up-to-date first.
718                        if(System.currentTimeMillis() > endTime) { return; }
719                        }
720                    }
721    
722                // If there is still an empty population then stop this work chunk immediately
723                // since there is nothing to 'evolve' from,
724                // unless we have inbound queued Scorers
725                // or we are permitted to import Scorers from system events.
726                if((population.size() == 0) && (!allowScorerSharingByEvent) &&
727                        ((inboundEval == null) || inboundEval.isEmpty()))
728                    { return; }
729    
730                // Because the optimiser/minimiser method is possibly hundreds of times slower
731                // than the normal GA (select/combine/mutate) method,
732                // we attempt to force some type of fair-share of CPU time post-hoc.
733                // By selecting outside the loop "fast"/"slow",
734                // we reduce the chance of accidentally starting lots of slow methods at once.
735                // We avoid slow methods where our available time quantum seems very small
736                // at least in relation to our ability to measure time accurately.
737                // TODO: We might also avoid slow methods in power-conserving mode,
738                // since otherwise we may uninterruptibly need a large chunk of energy.
739                final boolean doSlowMethods = (excessSlowTimeNs.get() < 0) &&
740                    (minEndTime - System.currentTimeMillis() > 1) &&
741                    (endTime - System.currentTimeMillis() > 1);
742                // If using slow methods (eg minimisation)
743                // then some of the time optimise/minimise against a small calibration subset for speed.
744                // If we encounter any problem trying to extract the calibration set, we do without.
745                final boolean useCalibSubset = doSlowMethods && Rnd.fastRnd.nextBoolean();
746                Map<Name.ExhibitShort, ScoreAndConf> calibSubset = null;
747                if(useCalibSubset)
748                    {
749                    // Try to generate a generic (and large-ish if possible) calibration set
750                    // (that reflects strengths/weaknesses amongst all available Scorers)
751                    // since we don't know at this point which Scorer we will be tuning.
752                    try { calibSubset = scorerCache.extractCalibrationSet(null, ScorerCreator.SUGGESTED_CALIB_SET_SIZE, null, true); }
753                    catch(final IOException e) { /* Ignore error and do without calibration subset. */ }
754                    }
755    
756                // Try to create several new/good Scorers values
757                // or to import several good saved Scorers from local or global state.
758                // Do extra rounds if we've nowhere-near filled up our population/gene pool yet
759                // and/or if computation is going very quickly.
760                final int maxRounds = Math.min(1+SystemVariables.EVENT_SAMPLES_RETAINED,
761                        Math.max(3, (population.maxSize() / Math.max(1, population.size()))));
762                int roundsDone = 0; // Decremented on successful creation/import of new variant.
763                try
764                    {
765                    // Continue until we've made made visible progress
766                    // or at least until we've being trying for a reasonable time,
767                    // but stop as soon as we pass our target end time or we are interrupted.
768                    while(((roundsDone < maxRounds) || (System.currentTimeMillis() <= minEndTime)) &&
769                            (System.currentTimeMillis() <= endTime) && (!Thread.interrupted()))
770                        {
771                        // Attempt to create new Scorer variations
772                        // or import extant Scorers from local/global event histories or outside.
773                        // In each case, these routines should only return true
774                        // if fairly sure that they have made useful progress.
775                        if(Rnd.fastRnd.nextBoolean()) // Try to 'import' half the time...
776                            {
777                            if(allowScorerSharingByEvent && Rnd.fastRnd.nextBoolean())
778                                {
779                                // Try to fetch a new top Scorer from another population
780                                // or find a "golden oldie" from local/global history.
781                                // Fast.
782                                if(ScorerCreator.retrieveOldScorer(scorerCache.getDataSource(), scorerCache, population, log))
783                                    { ++roundsDone; }
784                                }
785                            else if(inboundEval != null)
786                                {
787                                final String next = inboundEval.poll(); // Non-blocking read.
788                                // Count this as successful 'round'
789                                // if there is a new inbound Scorer whose canonicalised form is new
790                                // and yields a positive goodness.
791                                if(next != null)
792                                    {
793                                    try
794                                        {
795                                        final ScorerIF sc = scorerCache.getScorerInstance(next);
796                                        if(sc != null)
797                                            {
798                                            final String canon = AbstractScorer.canonicalise(sc);
799                                            if(population.getScorerWeighting(canon, true) == null)
800                                                {
801                                                final ScoreAndConf newSAC = scorerCache.computeScorerWeighting(canon, false, "inbound");
802                                                if(ScoreAndConf.computeScorerGoodness(newSAC) > 0)
803                                                    { ++roundsDone; }
804                                                }
805                                            }
806                                        }
807                                    catch(final Exception e)
808                                        { /* Ignore errors from potentially-untrusted inbound Scorer values. */ }
809                                    }
810                                }
811                            }
812                        else // Try to 'create' half the time.
813                            {
814                            final long startTime = System.nanoTime();
815                            if(!doSlowMethods)
816                                {
817                                try
818                                    {
819                                    // Incremental (quick) creation of new Scorer by GA methods:
820                                    // selection, combination, mutation.
821                                    // Fast.
822                                    if(ScorerCreator.createNewByGA(scorerCache, population, log))
823                                        { ++roundsDone; }
824                                    }
825                                finally { excessSlowTimeNs.addAndGet(startTime - System.nanoTime()); /* Subtract run time. */ }
826                                }
827                            else
828                                {
829                                try
830                                    {
831                                    // Potentially slow minimiser-based method, seeking "optimum" parameters.
832                                    // Can probably execute only once on each run.
833                                    // Each run may generate many novel Scorer values, however.
834                                    if(ScorerCreator.createNewByOpt(scorerCache.getDataSource(), scorerCache, population, calibSubset, log, endTime))
835                                        { ++roundsDone; }
836                                    }
837                                finally { excessSlowTimeNs.addAndGet(System.nanoTime() - startTime); /* Add run time. */ }
838                                }
839                            }
840    
841                        // If memory seems to be under huge strain
842                        // or the system is conserving CPU or power
843                        // then abort once we have apparently made some progress
844                        // (to avoid system/memory stress and starvation to no end).
845                        if((roundsDone > 0) &&
846                                (MemoryTools.isMemoryStressed() || GenUtils.mustConserveCPU() || GenUtils.mustConservePower()))
847                            { return; }
848                        }
849                    }
850                catch(final IOException e) { /* Stop this run on probably-transient I/O problem. */ }
851                catch(final Throwable t) { t.printStackTrace(); /* Stop this run on serious error. */ }
852                }
853            }
854    
855    
856        /**Select at random a best-first breeding set of one Scorer base type; null if none available.
857         * Returns the non-empty breeding set of a parameterised Scorer selected at random,
858         * or null if no such breeding set is available.
859         * <p>
860         * We weight this more heavily towards breeding sets whose best constituent has a high "goodness".
861         *
862         * @param population  the extant population; never null
863         *
864         * @return null or in-order (best-first) non-empty breeding set of same Scorer type
865         */
866        private static List<String> pickBreedingSet(final ScorerCacheIF cache,
867                                                    final ScorerPopulation population,
868                                                    final boolean allowStale)
869            {
870            // Select at random one of the top or base Scorer names,
871            // though avoiding any that have no parameters or an empty breeding set.
872            // We ensure that the base Scorers are included to allow bootstrapping.
873            // If we have more entries for the bigger populations then than is a useful skew.
874            final List<String> topScorers = new ArrayList<String>(population.getCurrentScorersWithParameters(allowStale));
875            topScorers.addAll(cache.getBaseScorersWithoutParameters());
876    
877            // Remove all clearly-ineligible Scorers,
878            // ie those without parameters.
879            for(final Iterator<String> it = topScorers.iterator(); it.hasNext(); )
880                {
881                final ScorerIF sc = cache.getScorerInstance(it.next());
882                final Collection<ScorerParam> parameterDefs = sc.getParameterDefsAndValues();
883                if(parameterDefs.isEmpty()) { it.remove(); } // No variation possible.
884                }
885            // Return null if no candidates left.
886            final int tsSize = topScorers.size();
887            if(tsSize == 0) { return(null); }
888    
889            // Sort remainder by (decreasing) goodness, ie best-first.
890            Collections.sort(topScorers, new Comparator<String>() {
891                public int compare(final String o1, final String o2)
892                    {
893                    final ScoreAndConf sw1 = population.getScorerWeighting(o1, allowStale);
894                    final int g1 = (sw1 != null) ? ScoreAndConf.computeScorerGoodness(sw1) : Integer.MIN_VALUE;
895                    final ScoreAndConf sw2 = population.getScorerWeighting(o2, allowStale);
896                    final int g2 = (sw2 != null) ? ScoreAndConf.computeScorerGoodness(sw2) : Integer.MIN_VALUE;
897                    if(g1 < g2) { return(-1); }
898                    if(g1 > g2) { return(+1); }
899                    return(0); // Identical.
900                    }
901                });
902    
903            // Try to use breeding set skewed towards earlier (better) values.
904            // If our first choice is not available, work down the list (to less-good Scorers).
905            final int startIndex = Rnd.goodRnd.nextInt(1+Rnd.goodRnd.nextInt(tsSize));
906            for(int i = startIndex; i < tsSize; ++i)
907                {
908                final ScorerIF sc = cache.getScorerInstance(topScorers.get(i));
909    //          final Collection<ScorerParam> parameterDefs = sc.getParameterDefsAndValues();
910    //          if(parameterDefs.isEmpty()) { continue; } // No variation possible.
911                final List<String> bs = population.getBreedingSet(sc.getBaseName());
912                if(!bs.isEmpty()) { return(bs); }
913                }
914    
915            // Fallback...
916            // If we could not get a breeding set with our preferred skewed weighting
917            // then shuffle the names randomly and return the first suitable entry.
918            Collections.shuffle(topScorers, Rnd.fastRnd);
919            for(final String snp : topScorers)
920                {
921                final ScorerIF sc = cache.getScorerInstance(snp);
922    //            final Collection<ScorerParam> parameterDefs = sc.getParameterDefsAndValues();
923    //            if(parameterDefs.isEmpty()) { continue; } // No variation possible.
924                final List<String> bs = population.getBreedingSet(sc.getBaseName());
925                if(!bs.isEmpty()) { return(bs); }
926                }
927    
928            // There really was no suitable breeding sub-population at all.
929            return(null);
930            }
931    
932        /**If true then monitor and report performance (CPU usage) in computeWeighting().
933         * Turn this on for (Scorer) performance tuning only...
934         * <p>
935         * This should give a reasonable view of the most compute-intensive Scorer workload.
936         */
937        private static final boolean PERF_SAMPLE_CW = false;
938    
939        /**Cumulative performance stats (if any) for computeWeighting(); null iff none being collected. */
940        private static final ConcurrentHashMap<StackTraceElement, AtomicInteger> perfCountsCW = (!PERF_SAMPLE_CW) ? null : new ConcurrentHashMap<StackTraceElement, AtomicInteger>(1001);
941    
942        /**Computes ScoreAndConf over the supplied calibration exhibits using the supplied Scorer cache; never null but may be (0,0) where the scorer is unknown or untested.
943         * This can be used as part of the computation of a Scorer's weighting.
944         * The input data must not change while this routine is running.
945         *
946         * @param calibrationData  map from short exhibit names to external calibration data
947         *     (eg actual user vote-based score); never null
948         *
949         * @return  the score (first in the Pair) represents the correlation with the underlying votes
950         *     (and whatever the scoring is measured against)
951         *     with MAX meaning perfect correlation, 0 meaning no correlation,
952         *     and -MAX meaning perfectly wrong answers all the time,
953         *     and the confidence 0 if we have no (or very/too few) data points
954         *     and approaching MAX as we have a large (enough) number of data points;
955         *     the second element of the Pair is true if the the value is "partial"
956         *     (we had to prematurely stop calculation or did not have enough data points)
957         *     and so for example should not be cached
958         */
959        public static Tuple.Pair<ScoreAndConf,Boolean> computeWeighting(
960                                                    final Map<Name.ExhibitShort, ScoreAndConf> calibrationData,
961                                                    final ScorerCacheIF scorerCache,
962                                                    final ScorerIF scorer,
963                                                    final AllExhibitImmutableData aeid,
964                                                    final boolean allowStale)
965            throws IOException
966            {
967            if((calibrationData == null) ||
968               (scorerCache == null) ||
969               (aeid == null) ||
970               (scorer == null))
971                { throw new IllegalArgumentException(); }
972    
973            // Note start time so as to be able to limit effort/time spent in this call.
974            final long startTime = System.currentTimeMillis();
975    
976            // If there is no calibration data then we cannot compute (nor cache) a result.
977            if(calibrationData.isEmpty()) { return(new Tuple.Pair<ScoreAndConf, Boolean>(ScoreAndConf.NO_OPINION, true)); }
978    
979            // If there are no exhibits then we cannot compute (nor cache) a result.
980            if(aeid.length == 0) { return(new Tuple.Pair<ScoreAndConf, Boolean>(ScoreAndConf.NO_OPINION, true)); }
981    
982            // Start performance sampling/monitoring, if any.
983            Thread perfMonitorThread = null;
984            if(PERF_SAMPLE_CW)
985                {
986                perfMonitorThread = GenUtils.startThreadPerfMonitor(
987                        Thread.currentThread(),
988                        perfCountsCW,
989                        null, // "org.hd.d.pg2k.ai.",
990                        "org.hd.d.pg2k.", // Monitor thread for at most 10s.
991                        System.currentTimeMillis() + 10000, 10); // Sample often enough to obtain a decent profile.
992                }
993            try
994                {
995                // Last captured error, and error count, if any.
996                IOException err = null;
997                int errorCount = 0;
998                // If we didn't finish then we should not cache the result.
999                boolean stoppedPrematurely = false;
1000                // Compute map from short name to score computed by this Scorer.
1001                final Map<Name.ExhibitShort, ScoreAndConf> scorerResults = new HashMap<Name.ExhibitShort, ScoreAndConf>(
1002                        2 * calibrationData.size());
1003                // Randomise the order in which we do the computations
1004                // to better queue background work, avoid systemic and order-dependent errors, etc.
1005                final List<Name.ExhibitShort> names = new ArrayList<Name.ExhibitShort>(calibrationData.keySet());
1006                if(Rnd.fastRnd.nextBoolean())
1007                    { Collections.shuffle(names, Rnd.fastRnd); }
1008                for(final Name.ExhibitShort shortName : names)
1009                    {
1010                    if(!aeid.isPresent(shortName)) { continue; }
1011                    final Name.ExhibitFull fullName = shortName.getFullName();
1012    //                // If the exhibit is missing (eg has been deleted or renamed) then skip it.
1013    //                final Name.ExhibitFull fullName = aeid.getFullName(shortName);
1014    //                if(fullName == null) { continue; }
1015    
1016                    try
1017                        {
1018                        // Compute raw Scorer result for this exhibit.
1019                        // Note that this may quickly return zero confidence
1020                        // therefore meaning we can skip further work for this exhibit
1021                        // since this Scorer may not be suitable for this exhibit type.
1022                        final ScoreAndConf sac = scorerCache
1023                                .computeUnweightedScoreAndConfidence(fullName,
1024                                        scorer, allowStale);
1025                        // If confidence is zero then skip this exhibit.
1026                        if(sac.confidence == 0) { continue; }
1027    
1028                        // Record this non-zero-confidence result.
1029                        scorerResults.put(shortName, sac);
1030                        }
1031                    catch(final IOException e)
1032                        {
1033                        // Capture the error.
1034                        ++errorCount;
1035                        err = e;
1036    
1037                        // We are prepared to skip over a few missing data points
1038                        // (ie ignore a small number of possibly-transient errors)
1039                        // though then do not subsequently cache the result
1040                        // unless there has been only a very small fraction of errors.
1041                        // This may also allow requests (eg for thumbnails/events)
1042                        // to be queued in the background ready for next time
1043                        // while making computation generally more robust.
1044                        final int usableExhibits = scorerResults.size();
1045                        if((errorCount <= usableExhibits)
1046                                || (System.currentTimeMillis() - startTime < CoreConsts.MAX_INTERACTIVE_DELAY_MS))
1047                            { continue; }
1048    
1049                        // We've spent long enough: do calculations based on what we have so far...
1050                        // If we've had to give up and we don't allow stale/partial results then abort...
1051                        if(!allowStale) { throw e; }
1052                        // ...else continue with what we have but don't cache it.
1053                        stoppedPrematurely = true;
1054                        break;
1055                        }
1056                    }
1057                // We will accept the result if there were far more data points
1058                // where we had a usable vote factor AND a non-zero Scorer confidence
1059                // than the count of errors that we hit (and points we should have included)
1060                // OR if there were some exhibits but none at all that we could use with this Scorer.
1061                final int usableExhibits = scorerResults.size();
1062                assert (aeid.length > 0);
1063                // We will accept/trust the result if there were far more data points
1064                // where we had a usable vote factor AND a non-zero Scorer confidence
1065                // than the count of errors that we hit (and points we should have included)
1066                // OR if there were some exhibits but none at all that we could use with this Scorer.
1067                // This value gets filled in after attempting to calibrate.
1068                final boolean enoughDataPoints = (errorCount <= usableExhibits
1069                        / ScorerCreator.MIN_SUGGESTED_CALIB_SET_SIZE);
1070                // If not allowing stale/partial results then we must abort after too many errors.
1071                if(!allowStale && !enoughDataPoints) { throw err; }
1072                return(new Tuple.Pair<ScoreAndConf, Boolean>(
1073                        ScorerCreator.computeWeighting(scorerResults, calibrationData),
1074                        stoppedPrematurely || !enoughDataPoints));
1075                }
1076            finally
1077                {
1078                if(PERF_SAMPLE_CW && (null != perfMonitorThread))
1079                    {
1080                    try
1081                        {
1082                        GenUtils.stopPerfMonitorandDumpSamples(perfMonitorThread,
1083                                "computeWeighting()",
1084                                perfCountsCW,
1085                                null,
1086                                20, GenUtils.systemOutLogger); // Log to system out...
1087                        }
1088                    catch(final InterruptedException e)
1089                        {
1090                        Thread.currentThread().interrupt(); // Don't lose interruptedness...
1091                        }
1092                    }
1093                }
1094            }
1095    
1096        /**Computes ScoreAndConf over the supplied calibration exhibits; never null but may be (0,0) where the scorer is unknown or untested.
1097         * This can be used as part of the computation of a Scorer's weighting.
1098         * <p>
1099         * All keys in the calibration data should map to non-null values in the ratings map
1100         * (ie the ratings keys can be a superset of the calibration keys);
1101         * missing values will diminish the confidence of the result.
1102         * <p>
1103         * The input data must not change while this routine is running.
1104         *
1105         * @param scorerResult  map from short exhibit name to Scorer's rating/result for the exhibit;
1106         *     never null
1107         * @param calibrationData  map from short exhibit name to external calibration data
1108         *     (eg actual user vote-based score); never null
1109         *
1110         * @return  the score represents the correlation with the underlying votes
1111         *     (and whatever the scoring is measured against)
1112         *     with MAX meaning perfect correlation, 0 meaning no correlation,
1113         *     and -MAX meaning perfectly wrong answers all the time,
1114         *     and the confidence 0 if we have no (or very/too few) data points
1115         *     and approaching MAX as we have a large (enough) number of data points
1116         */
1117        public static ScoreAndConf computeWeighting(final Map<Name.ExhibitShort, ScoreAndConf> scorerResult,
1118                                                    final Map<Name.ExhibitShort, ScoreAndConf> calibrationData)
1119            {
1120            // If there are no results or no calibration data then we cannot generate a confident answer.
1121            if(scorerResult.isEmpty() || calibrationData.isEmpty())
1122                { return(ScoreAndConf.NO_OPINION); }
1123    
1124            // Exhibits which were usable to calibrate this Scorer.
1125            int usableExhibits = 0;
1126            // The total squared error (and confidence) in the range [0, (2*MAX)^2].
1127            // Each error term is multiplied by (moderated downwards by)
1128            // the product of the Scorer and vote-Factor confidence/correlation.
1129            assert((long) ScoreAndConf.MAX * (long) ScoreAndConf.MAX <= Integer.MAX_VALUE);
1130            long cumError = 0;
1131            // The dot product of the Scorer confidence and the vote confidence,
1132            // with theoretical range [0,Y] where Y = ScoreAndConf.MAX^2 * usableExhibits.
1133            long confDP = 0;
1134            for(final Name.ExhibitShort shortName : calibrationData.keySet())
1135                {
1136                // Get vote-based rating (cannot be null or have non-positive correlation).
1137                final ScoreAndConf vote = calibrationData.get(shortName);
1138                // Vote correlation must be strictly positive to be usable.
1139                if(vote.confidence == 0) { continue; }
1140    
1141                // Compute raw Scorer result for this exhibit.
1142                // Note that this may quickly return zero confidence
1143                // thus meaning that we can skip further work for this exhibit
1144                // since this Scorer may not be suitable for this exhibit type.
1145                final ScoreAndConf sac = scorerResult.get(shortName);
1146                // If result is missing then sjip this exhibit.
1147                if(sac == null) { continue; }
1148                // If confidence is zero then skip this exhibit.
1149                if(sac.confidence == 0) { continue; }
1150    
1151                final int conf = sac.confidence * vote.confidence;
1152                assert(conf >= 0); // Should be no overflow...
1153                // Skip zero or noisy (very-low-confidence) correlations.
1154                if(conf < 1) { continue; }
1155                confDP += conf;
1156                // Compute raw error (difference) in range [0,2*MAX].
1157                // The Scorer result is an actual predicted score for the exhibit.
1158                // The smaller the difference between this predicted score
1159                // and the human-vote-derived score the better.
1160                final int rawError = sac.score - vote.score;
1161                // Compute squared error times the (normalised to [0.0,1.0]) confidence.
1162                final long sqErrConf = ((rawError * (long)rawError) * conf) / (ScoreAndConf.MAX * ScoreAndConf.MAX);
1163    //if(IsDebug.isDebug) { System.out.println("[Error/diff="+error+" @ conf="+conf+" for "+scorerNameAndParameters+" on "+shortName+"; sac="+sac+"; voteFactor="+voteFactor+".]"); }
1164                assert(sqErrConf >= 0);
1165                cumError += sqErrConf;
1166                assert(cumError >= 0); // FIXME: Check that we never overflow.
1167                ++usableExhibits;
1168                }
1169    
1170            ScoreAndConf result = ScoreAndConf.NO_OPINION; // Scorer unusable unless proven otherwise.
1171            // We can't compute a result with insufficient cumulative confidence.
1172            // We can cache a valid negative result though.
1173            if((usableExhibits > 0) && (confDP > 0))
1174                {
1175                final double svRangeConf = 1.0;
1176    
1177                // Verify that accumulators are in their expected ranges.
1178                assert((confDP >= 0) && (confDP <= (usableExhibits * (long)ScoreAndConf.MAX*ScoreAndConf.MAX)));
1179                assert((cumError >= 0) && (cumError <= (usableExhibits * 4 * (long)ScoreAndConf.MAX*ScoreAndConf.MAX)));
1180    
1181                // Compute RMS error in range [0,2*MAX].
1182                final double errorRMSRaw = Math.sqrt(cumError / (confDP / (double)(ScoreAndConf.MAX*ScoreAndConf.MAX)));
1183                assert((errorRMSRaw > -1) && (errorRMSRaw < 1 + 2*ScoreAndConf.MAX)); // We'll allow a little wiggle room for rounding errors...
1184                // Compute score in range +/- ScoreAndConf.MAX.
1185                final long score = ScoreAndConf.MAX - Math.round(errorRMSRaw);
1186                // Compute confidence in range [0,ScoreAndConf.MAX] (plus possible rounding errors).
1187                // We allow extra samples over our minimum desired number
1188                // to compensate for poor per-exhibit confidence.
1189                final long confidence = Math.min(ScoreAndConf.MAX,
1190                    Math.round(svRangeConf * (confDP / (usableExhibits*ScoreAndConf.MAX)) *
1191                        Math.sqrt(usableExhibits / (double) calibrationData.size())));
1192                result = new ScoreAndConf((short) Math.max(-ScoreAndConf.MAX, Math.min(ScoreAndConf.MAX, score)),
1193                                          (short) Math.max(0, confidence));
1194                }
1195    
1196            return(result);
1197            }
1198    
1199        /**Minimum suggested calibration-set size; strictly positive.
1200         * Note that if multiple distinct Scorer populations and target exhibit types are to be covered,
1201         * then a set of this size or greater should be gathered for each of them.
1202         * It may then be convenient to work with the union of those values
1203         * for all Scorer types.
1204         * <p>
1205         * This should be large enough that no one Scorer has enough parameters
1206         * to trivially map each different calibration exhibit to its correct score.
1207         * <p>
1208         * In general, while using a larger calibration sample set increases CPU and memory costs,
1209         * especially for remote 'AH' clients that may need to fetch data,
1210         * a larger calibration set should increase the reliability of calibrated results.
1211         * But there may be a huge advantage in using a set small enough
1212         * to keep in cache close to the top of the memory hierarchy.
1213         * <p>
1214         * We may want to try different calibration set sizes larger than the minimum
1215         * to improve the usefulness and predictive power of the calibration set.
1216         * <p>
1217         * A power of two may allow for generation of slightly better code in various places.
1218         * <p>
1219         * Experience suggests that this value should be at least 128.
1220         */
1221        public static final int MIN_SUGGESTED_CALIB_SET_SIZE = 128;
1222    
1223        /**Server-side calibration set size; strictly positive and no less than MIN_SUGGESTED_CALIB_SET_SIZE.
1224         * A larger calibration set of this size implies use of more memory and CPU time,
1225         * but with a higher reliability of answer,
1226         * than using a calibration set of size MIN_SUGGESTED_CALIB_SET_SIZE.
1227         * <p>
1228         * Needs to be at least as large as the minimum 'voted-for' set to give good confidence.
1229         * <p>
1230         * A power of two may allow for generation of slightly better code.
1231         */
1232        public static final int SUGGESTED_CALIB_SET_SIZE =
1233            Math.max(ScorerCacheImpl.MIN_VOTED_FOR_SET_SIZE, 2*MIN_SUGGESTED_CALIB_SET_SIZE);
1234    
1235        /**Tunnel root-relative (starting with '/') URL within the server; never null. */
1236        public static final String scorerTunnelRRURL = "/_AI/remote.jsp";
1237        /**Parameter name to request particular remote data type; never null nor empty. */
1238        public static final String DT_PARAM_NAME = "scDataType";
1239        /**Data type parameter value to request calibration-set data; never null nor empty. */
1240        public static final String DT_CALIB = "calibration";
1241        /**Data type parameter value to request server's best Scorers; never null nor empty. */
1242        public static final String DT_BESTSC = "bestScorers";
1243        /**Data type parameter value to POST new 'star' Scorer; never null nor empty. */
1244        public static final String DT_POSTNEWSC = "postNewScorer";
1245        /**Data type parameter name for generic data/query; never null nor empty. */
1246        public static final String DT__GEN_DATA = "q";
1247        }