001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.webSvr.util;
031    
032    import java.awt.image.BufferedImage;
033    import java.awt.image.ColorModel;
034    import java.awt.image.WritableRaster;
035    import java.io.IOException;
036    import java.io.InputStream;
037    import java.lang.ref.SoftReference;
038    import java.lang.ref.WeakReference;
039    import java.net.Inet6Address;
040    import java.net.InetAddress;
041    import java.net.MalformedURLException;
042    import java.net.URL;
043    import java.net.UnknownHostException;
044    import java.util.ArrayList;
045    import java.util.Arrays;
046    import java.util.BitSet;
047    import java.util.Collection;
048    import java.util.Collections;
049    import java.util.Comparator;
050    import java.util.Enumeration;
051    import java.util.HashMap;
052    import java.util.HashSet;
053    import java.util.LinkedList;
054    import java.util.List;
055    import java.util.ListIterator;
056    import java.util.Map;
057    import java.util.ResourceBundle;
058    import java.util.Set;
059    import java.util.SortedSet;
060    import java.util.TreeSet;
061    import java.util.concurrent.Callable;
062    import java.util.concurrent.ConcurrentHashMap;
063    import java.util.concurrent.ConcurrentMap;
064    import java.util.concurrent.Future;
065    import java.util.regex.Pattern;
066    
067    import javax.servlet.ServletContext;
068    import javax.servlet.http.HttpServletRequest;
069    import javax.servlet.http.HttpServletResponse;
070    
071    import org.apache.http.HeaderElement;
072    import org.apache.http.message.BasicHeader;
073    import org.hd.d.pg2k.svrCore.AbstractSimpleLogger;
074    import org.hd.d.pg2k.svrCore.AccessionData;
075    import org.hd.d.pg2k.svrCore.AddrTools;
076    import org.hd.d.pg2k.svrCore.AllExhibitProperties;
077    import org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter;
078    import org.hd.d.pg2k.svrCore.Compact7BitString;
079    import org.hd.d.pg2k.svrCore.CoreConsts;
080    import org.hd.d.pg2k.svrCore.ExhibitName;
081    import org.hd.d.pg2k.svrCore.ExhibitPropsComputable;
082    import org.hd.d.pg2k.svrCore.ExhibitPropsComputableMutable;
083    import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
084    import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
085    import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
086    import org.hd.d.pg2k.svrCore.GenUtils;
087    import org.hd.d.pg2k.svrCore.HostUtils;
088    import org.hd.d.pg2k.svrCore.ImageUtils;
089    import org.hd.d.pg2k.svrCore.LocaleBeanBase;
090    import org.hd.d.pg2k.svrCore.MemoryTools;
091    import org.hd.d.pg2k.svrCore.MemoryTools.CacheMiniMap;
092    import org.hd.d.pg2k.svrCore.Name;
093    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
094    import org.hd.d.pg2k.svrCore.Rnd;
095    import org.hd.d.pg2k.svrCore.TextUtils;
096    import org.hd.d.pg2k.svrCore.ThreadUtils;
097    import org.hd.d.pg2k.svrCore.Tuple;
098    import org.hd.d.pg2k.svrCore.VarTools;
099    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
100    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME.ExhibitTypeParameters;
101    import org.hd.d.pg2k.svrCore.collections.LRUMapAutoSizeForHitRate;
102    import org.hd.d.pg2k.svrCore.collections.SimpleLRUMap;
103    import org.hd.d.pg2k.svrCore.collections.SimpleProbabilisticCache;
104    import org.hd.d.pg2k.svrCore.location.GeoUtils;
105    import org.hd.d.pg2k.svrCore.props.GenProps;
106    import org.hd.d.pg2k.svrCore.props.GenPropsGenNames;
107    import org.hd.d.pg2k.svrCore.props.LocalProps;
108    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
109    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
110    import org.hd.d.pg2k.svrCore.vars.SimpleVarStats;
111    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
112    import org.hd.d.pg2k.svrCore.vars.SimpleVariablePipelineIF;
113    import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
114    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
115    import org.hd.d.pg2k.webSvr.catalogue.TrailData;
116    import org.hd.d.pg2k.webSvr.exhibit.BuiltInFilters;
117    import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
118    import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean.AEPLinkedKey;
119    import org.hd.d.pg2k.webSvr.exhibit.FilterExpr;
120    import org.hd.d.pg2k.webSvr.exhibit.TreeFilterBean;
121    import org.hd.d.pg2k.webSvr.virtualHosts.VirtualHosts;
122    import org.hd.d.tmf.ThroughputMonitorFilterBase;
123    import org.w3c.dom.Node;
124    
125    import ORG.hd.d.IsDebug;
126    
127    
128    /**Web-server-related utility functions.
129     * This is for algorithms only of interest to Web apps, often JSPs.
130     * <p>
131     * One advantage of having code here rather than in-line in a JSP
132     * is that is is pre-compiled off-line for speed and robustness;
133     * code here is also easier to test.
134     */
135    public final class WebUtils
136        {
137        /**Prevent construction of an instance. */
138        private WebUtils() { }
139    
140    
141        /**Name of event/variable to which we post a voter's approximate geo location. */
142        public static final SimpleVariableDefinition VOTER_LOCATION_STATS_EVENT_DEF = SystemVariables.GENSTATS_STRING_GLOBAL_EVENT;
143    
144        /**Prefix of event value for voter's approximate geo location. */
145        public static final String VOTER_LOCATION_STATS_EVENT_PREFIX = "voterLocation=";
146    
147    
148        /**Handler for exhibit voting; holds no strong references to anything important. */
149        private static final class VoteHandler extends StatsSink.AbstractStatsListener
150            {
151            private final ExhibitFull exhibitFullName;
152            private final WeakReference<SimpleVariablePipelineIF> varsWR;
153            private final String dpID;
154            private final InetAddress voterIPAddr;
155            private final long expireBy;
156    
157            private VoteHandler(
158                    final String uniqueDataPointID,
159                    final long expireBy,
160                    final ExhibitFull exhibitFullName,
161                    final WeakReference<SimpleVariablePipelineIF> varsWR,
162                    final InetAddress voterIPAddr)
163                {
164                super(uniqueDataPointID, expireBy);
165                this.exhibitFullName = exhibitFullName;
166                this.varsWR = varsWR;
167                dpID = uniqueDataPointID;
168                this.voterIPAddr = voterIPAddr;
169                this.expireBy = expireBy;
170                }
171    
172            @Override public final String handle(final Map<String, String[]> parameters)
173                {
174                // When we've done, redirect back to the exhibit page...
175                // Stick a random parameter value on the end to ensure that
176                // all common/broken browsers (eg IE6, FF1) reload the page.
177                final String result = WebUtils.makeCatPageRRURL(exhibitFullName, WebConsts.F_secondary_generated_HTML_suffix) + "?rnd="+(Rnd.fastRnd.nextLong() >>> 1);
178    
179                // If the pipeline has gone away then return immediately...
180                final SimpleVariablePipelineIF vars = varsWR.get();
181                if(null == vars) { return(result); }
182    
183                if(parameters != null)
184                    {
185                    // Allow for GET or POST plain/image style (.x, .y) values.
186                    final boolean votePro =
187                        (null != parameters.get(VOTE_PRO_PARAM_NAME)) ||
188                        (null != parameters.get(VOTE_PRO_PARAM_NAME + ".x"));
189                    final boolean voteCon =
190                        (null != parameters.get(VOTE_CON_PARAM_NAME)) ||
191                        (null != parameters.get(VOTE_CON_PARAM_NAME + ".x"));
192                    if(votePro != voteCon) // Exactly one selected...
193                        {
194                        try
195                            {
196                            // Post vote event value to correct event stream...
197    /*if(IsDebug.isDebug)*/ { System.out.println("***VOTE registered (for="+votePro+"): " + exhibitFullName + " by " + dpID); }
198                            vars.setVariable(new SimpleVariableValue(
199                                    (votePro ? SystemVariables.VOTE_PRO : SystemVariables.VOTE_CON),
200                                    exhibitFullName.getShortName().toString()));
201    
202                            if(voterIPAddr != null)
203                                {
204                                // Note location of voter (as ccTLD or region).
205                                // We are prepared to spend some time on this
206                                // as voting is rare and significant
207                                // and we've probably already collected/cached
208                                // any required data.
209                                final String location = GeoUtils.getRegionByAddress(voterIPAddr, false);
210                                vars.setVariable(new SimpleVariableValue(
211                                    VOTER_LOCATION_STATS_EVENT_DEF,
212                                        VOTER_LOCATION_STATS_EVENT_PREFIX + location));
213                                }
214                            }
215                        catch(final IOException e)
216                            { e.printStackTrace();  /* Just absorb errors. */ }
217                        }
218                    }
219    
220                // Compute time before next vote will be requested
221                // if a user does take this opportunity to vote.
222                // A relatively long time after we expect the original voting opportunity to expire
223                // to act as a second-level screen against spiders
224                // and to avoid pestering a human voter too often.
225                // (We also make this time more unpredictable with a good random source.)
226                final long nextVote = expireBy + 11*WebConsts.VOTE_MIN_REQUEST_GAP_MS +
227                        (GenUtils.mustConservePowerExtreme() ? Rnd.fastRnd : Rnd.goodRnd).nextInt(7*WebConsts.VOTE_MIN_REQUEST_GAP_MS);
228    
229                // Install a dummy handler to postpone the next time that this user gets asked to vote.
230                // Assumes that the extant listener is removed *before* a call to handle()
231                // so that this new listener will not be removed on return.
232                StatsSink.addListenerForDataPoint(new StatsSink.AbstractStatsListener(dpID, nextVote){
233                    /**Dummy handler that should never in fact be invoked. */
234                    @Override public final String handle(final Map<String, String[]> parameters) { return(result); }
235                    });
236    
237                return(result);
238                }
239            }
240    
241    
242        /**Simple class to allow logging to the given servlet's log().
243         * This holds only a WeakReference to the ServletContext
244         * so as not to obstruct GC when all strong refs go away.
245         * <p>
246         * Stops logging when the referent becomes null.
247         */
248        public static final class ServletLogger extends AbstractSimpleLogger
249            {
250            public ServletLogger(final ServletContext ctxt)
251                {
252                if(ctxt == null) { throw new IllegalArgumentException(); }
253                ctxtWR = new WeakReference<ServletContext>(ctxt);
254                }
255    
256            /**Weak ref to servlet context; never null but the referent may be. */
257            private volatile WeakReference<ServletContext> ctxtWR;
258    
259            /**Log the given message.
260             * If the weak reference to the context has died
261             * the log output is silently discarded.
262             */
263            public void log(final String message)
264                {
265                final ServletContext context = ctxtWR.get();
266                if(null == context) { return; }
267                context.log(message);
268                }
269            }
270    
271    
272        /**Simple class to allow logging to a given servlet's log() or System.out if none available.
273         * This allows a logger to be created at instance scope
274         * for (say) a Filter, and set with a context when the config is set
275         * and cleared when one is not available,
276         * all the while remaining a valid logger.
277         * <p>
278         * This holds only a WeakReference to the ServletContext
279         * so as not to obstruct GC when all strong refs go away.
280         */
281        public static final class ServletLoggerWithFallback extends AbstractSimpleLogger
282            {
283            /**Weak ref to servlet context; may be null or the referent may be. */
284            private volatile WeakReference<ServletContext> ctxtWR;
285    
286            /**Set context, or clear/remove it if null. */
287            public void setContext(final ServletContext context)
288                {
289                final WeakReference<ServletContext> ctxtWROld = ctxtWR;
290                final ServletContext ctxtOld = (null == ctxtWROld) ? null : ctxtWROld.get();
291                if(null == context) { ctxtWR = null; }
292                else { ctxtWR = new WeakReference<ServletContext>(context); }
293                if(ctxtOld != context)
294                    { log("ServletLoggerWithFallback: now logging to " + ((null == context) ? "System.out" : "context.log()")); }
295                }
296    
297            /**Log the given message.
298             * Logs to the servlet context logger if available,
299             * else logs to System.out.
300             */
301            public void log(final String message)
302                {
303                final WeakReference<ServletContext> wr = ctxtWR;
304                final ServletContext context = (null == wr) ? null : wr.get();
305                if(context != null) { context.log(message); }
306                else { System.out.println(message); }
307                }
308            }
309    
310    
311        /**System variables tried, in order, for a "popular" exhibit; private to getPopularExhibit().
312         * We put the download var first to get a decent rate of update
313         * since this value changes quite frequently.
314         * <p />
315         * We don't put the vote var first so as to reduce the temptation
316         * to "throw" a vote to get an exhibit shown on the front page.
317         */
318        private static final SimpleVariableDefinition _gPE_vars[] = {
319            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
320            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
321            SystemVariables.VOTE_PRO,
322            SystemVariables.ACCESSPATTERN_CLICKTHROUGH,
323            SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
324            };
325    
326    //    /**Gets "popular" exhibit, possibly filtered by type; null if none available.
327    //     * Tries to pick a "popular" exhibit by looking at one recently voted for,
328    //     * or downloaded, etc, in the system variables,
329    //     * and that has both thumbnails available where they are possible.
330    //     * <p>
331    //     * This rejects exhibits with a below-par (negative) rating.
332    //     * <p>
333    //     * Using the system variables should mean that this can pick up
334    //     * values set from any mirror, etc, fairly quickly.
335    //     * <p>
336    //     * This cannot guarantee to return a non-null value,
337    //     * but any value that it does return is a current, valid exhibit.
338    //     *
339    //     * @param dsb  handle on the system variables and data; never null
340    //     * @param type  if not null only exhibits of this type are candidates
341    //     * @deprecated Use {@link #getPopularExhibit(DataSourceBean,ExhibitMIME.ExhibitTypeParameters,Collection)} instead
342    //     */
343    //    @Deprecated
344    //    public static String getPopularExhibit(final DataSourceBean dsb,
345    //                                           final ExhibitMIME.ExhibitTypeParameters type)
346    //        { return getPopularExhibit(dsb, type, null); }
347    
348        /**Gets name of "popular" exhibit, possibly filtered by type; null if none available.
349         * Tries to pick a "popular" exhibit by looking at one recently voted for,
350         * or downloaded, etc, in the system variables,
351         * and that has both thumbnails available where they are possible.
352         * <p>
353         * This rejects exhibits with a below-par (negative) rating.
354         * <p>
355         * Using the system variables should mean that this can pick up
356         * values set from any mirror, etc, fairly quickly.
357         * <p>
358         * This cannot guarantee to return a non-null value,
359         * but any value that it does return is a current, valid exhibit.
360         *
361         * @param dsb  handle on the system variables and data; never null
362         * @param type  if not null only exhibits of this type are candidates
363         * @param excludeFullNames  if non-null, any exhibits included by full name
364         *     are not candidates to be returned
365         * @param beQuick  if true then don't spend too long trying to calculate this
366         *     but instead give up quickly if need be
367         *     (so as not to block page generation for example)
368         */
369        public static Name.ExhibitFull getPopularExhibit(final DataSourceBean dsb,
370                                                         final ExhibitMIME.ExhibitTypeParameters type,
371                                                         final Collection<String> excludeFullNames,
372                                                         final boolean beQuick)
373            {
374            if(dsb == null) { throw new IllegalArgumentException(); }
375    
376            final long start = System.currentTimeMillis();
377    
378            for(final SimpleVariableDefinition def : _gPE_vars)
379                {
380                assert(def != null);
381    
382                // Half the time skip a "local" definition
383                // so as to get to see global popular items in the mix.
384                if(def.isLocal() && Rnd.fastRnd.nextBoolean())
385                    { continue; }
386    
387                try
388                    {
389                    final SimpleVariableValue svv = dsb.getVariable(def);
390                    if(svv == null) { continue; }
391    
392                    // Specified variable must be of String type.
393                    assert(def.getType() == SimpleVariableDefinition.TYPE_STRING);
394    
395                    final String s = (String) svv.getValue();
396                    // Skip any null values.
397                    if(null == s) { continue; }
398    
399                    final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
400                    final Name.ExhibitFull fullName = aep.aeid.getFullName(s);
401    
402                    // If this is in the exclusion list then skip it...
403                    if((excludeFullNames != null) && excludeFullNames.contains(fullName)) { continue; }
404    
405                    // Seems not to be a valid/extant exhibit, so give up...
406                    if(fullName == null) { continue; }
407                    final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(fullName);
408                    if(esa == null) { continue; }
409    
410                    // If the type was specified and does not match, give up!
411                    final ExhibitTypeParameters actualType = (ExhibitMIME.getInputFileType(esa.getCharSequence()));
412                    if(actualType == null)
413                        { continue; /* Reject untyped exhibit. */ }
414                    if((type != null) && !type.equals(actualType))
415                        { continue; /* Reject wrong-type exhibit. */ }
416    
417                    // Skip if this exhibit may be "sensitive" somehow.
418                    final GenProps gp = dsb.getGenProps(-1);
419                    if(GenUtils.isSensitive(fullName, gp)) { continue; }
420    
421                    // If this exhibit type supports thumbnails
422                    // then reject anything without both immediately available.
423                    if(actualType.canPossiblyCreateThumbnailOfSameMIMEType())
424                        {
425                        final ExhibitThumbnails thumbnails = dsb.getThumbnails(fullName, false);
426                        if((thumbnails == null) ||
427                           (thumbnails.getSmall() == null) ||
428                           (thumbnails.getStandard() == null))
429                            { continue; /* Reject this. */ }
430                        }
431    
432                    // We allow use of a stale (and ignore a not-yet-computed) rating so as to be quick.
433                    final ExhibitPropsComputableMutable ePCM =
434                        aep.getExhibitPropsComputableMutable(fullName);
435                    // If we don't actually know (absent/stale rating) how good this exhibit is
436                    // then attempt to find out for next time in the background
437                    // unless the system is (temporarily) conserving energy or other than lightly loaded.
438                    if((ePCM == null) || ePCM.isStale())
439                        {
440                        if(!GenUtils.mustConservePower() && WebUtils.isLightlyLoaded(dsb.getServletContext()))
441                            {
442                            // Use 'discardable' task pool to ensure that we don't block.
443                            ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable() {
444                                /**Force full non-stale recomputation of EPCM of not-apparently-unpopular exhibit. */
445                                public void run() { aep.getExhibitPropsComputableMutable(fullName, false, gp, dsb, dsb.getScorerCache()); }
446                                });
447                            }
448                        }
449                    // Reject/skip anything with a definite below-par (non-positive) rating.
450                    if((ePCM != null) && (ePCM.getGoodness() <= 0)) { continue; }
451    
452                    return(fullName); // Got one!
453                    }
454                catch(final IOException e)
455                    {
456                    // Silently ignore a probably-transient problem...
457                    }
458    
459                // If urged to be quick by our caller
460                // then abort if we've already taken too long trying
461                // (a significant fraction of allowed page-generation time).
462                if(beQuick && ((System.currentTimeMillis() - start) > WebConsts.MAX_PG_DOWNLOAD_MS/2))
463                    { break; }
464                }
465    
466            return(null); // Nothing found...
467            }
468    
469        /**If true then check for bots/spiders by UA (User-Agent); note that clients can forge their UA. */
470        private static final boolean CHECK_FOR_SPIDERS_BY_UA = true;
471    
472        /**Immutable Set of known spider/bot UA strings; should probably be moved to a text/properties file.
473         * This is a set of lower-cased first (space/tab/bracket-delimited) words
474         * from the UA strings.
475         * <p>
476         * The names consist only of non-regex-metacharacters in the set [a-z'_-],
477         * so are safe to embed in a regex.
478         */
479    //    @SuppressWarnings("unchecked")
480        private static final Set<String> spiderUAName1stWordsLC = (!CHECK_FOR_SPIDERS_BY_UA) ? Collections.<String>emptySet() :
481            Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(new String[]{
482            "", /* Empty UA string... */
483            "-",
484            "alexibot",
485            "appie",
486            "aqua_products",
487            "asterias",
488            "b2w",
489            "baiduspider",
490            "backdoorbot",
491            "becomebot",
492            "blowfish",
493            "bookmark",
494            "botalot",
495            "builtbottough",
496            "bullseye",
497            "bunnyslippers",
498            "cheesebot",
499            "cherrypicker",
500            "cherrypickerelite",
501            "cherrypickerse",
502            "copernic",
503            "copyrightcheck",
504            "cosmos",
505            "crescent",
506            "curl",
507            "dittospyder",
508            "dumbot",
509            "emailcollector",
510            "emailsiphon",
511            "emailwolf",
512            "enterprise_search",
513            "erocrawler",
514            "extractorpro",
515            "fairad",
516            "faxobot",
517            "findlinks",
518            "flaming",
519            "foobot",
520            "freefind",
521            "gaisbot",
522            "getright",
523            "gigabot",
524            "googlebot-image",
525            "grub",
526            "grub-client",
527            "harvest",
528            "hatena",
529            "hloader",
530            "http",
531            "httplib",
532            "humanlinks",
533            "ia_archiver",
534            "indy",
535            "infonavirobot",
536            "iron33",
537            "jennybot",
538            "jetbot",
539            "kalooga",
540            "kenjin",
541            "keyword",
542            "larbin",
543            "lexibot",
544            "libweb",
545            "libwww-perl",
546            "linkextractorpro",
547            "linkscan",
548            "linkwalker",
549            "lnspiderguy",
550            "looksmart",
551            "lwp-trivial",
552            "lynx",
553            "mata",
554            "miixpc",
555            "mister",
556            "moget",
557            "msiecrawler",
558            "msnbot",
559            "naver",
560            "netants",
561            "netmechanic",
562            "nicerspro",
563            "nutch",
564            "offline",
565            "omniexplorer_bot",
566            "openbot",
567            "openfind",
568            "oracle",
569            "perman",
570            "port",
571            "propowerbot",
572            "prowebwalker",
573            "psbot",
574            "python-urllib",
575            "queryn",
576            "radiation",
577            "repomonkey",
578            "rma",
579            "searchpreview",
580            "sitesnagger",
581            "sootle",
582            "spankbot",
583            "spanner",
584            "stanford",
585            "suzuran",
586            "szukacz",
587            "teleport",
588            "teleportpro",
589            "telesoft",
590            "thenomad",
591            "tocrawl",
592            "true_robot",
593            "turingos",
594            "url",
595            "url_spider_pro",
596            "urly",
597            "vci",
598            "wbdbot",
599            "webauto",
600            "webbandit",
601            "webcopier",
602            "webenhancer",
603            "websauger",
604            "website",
605            "webster",
606            "webstripper",
607            "webvac",
608            "webzip",
609            "wget",
610            "www-collector-e",
611            "xenu's",
612            "yahooseeker",
613            "zeus",
614            })));
615    
616        /**Set of characters taken as a main-part terminator in a User-Agent header, including whitespace.
617         * This should be usable as the separator arg to StringTokenizer,
618         * and in a regex when wrapped in "[]" square brackets.
619         */
620        private static final String UA_TERMINATOR_CHARS = " \t/(:";
621    
622        /**Extra case-insensitive patterns matched in UA names, "|"-separated, or null if none.
623         * Essentially, anything alphanumeric-ish ending in "bot", or
624         * anything alphanumeric-ish containing "spider".
625         */
626        private static final String UA_BOT_PATTERNS = "([a-z0-9._-]*bot)|([a-z0-9._-]*spider[a-z0-9._-]*)";
627    
628        /**Case-insensitive regex match for all non-empty UA names from spiderUAName1stWordsLC; null if not checking UA names.
629         * Made public to enable some unit testing.
630         */
631        public static final Pattern UA_REGEX;
632        /**Initialise UA_REGEX. */
633        static
634            {
635            if(!CHECK_FOR_SPIDERS_BY_UA) { UA_REGEX = null; }
636            else
637                {
638                final StringBuilder sb = new StringBuilder(32 + (spiderUAName1stWordsLC.size()<<4));
639                // First build the simple compound regex name1|name2|...
640                // possibly with some additional common patterns.
641                if(null != UA_BOT_PATTERNS) { sb.append(UA_BOT_PATTERNS); }
642                for(final String s : spiderUAName1stWordsLC)
643                    {
644                    if((s == null) || (s.length() == 0)) { continue; }
645                    if(sb.length() > 0) { sb.append('|'); }
646                    sb.append(s);
647                    }
648                // Now wrap and add terminator...
649                sb.insert(0, "^(");
650                sb.append(")[").append(UA_TERMINATOR_CHARS).append("]?.*$");
651                // Now compile the regex...
652                UA_REGEX = Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
653                }
654            }
655    
656        /**Name of Boolean attribute in request we cache result of requestProbablyFromSpider() by. */
657        private static final String _rPFS_CACHE_PNAME = "org.hd.pg2k._rPFS_CACHE";
658    
659        /**LRU cache from (common) whole UAs to "bot"ness to save some repeated/slow String parsing; never null when checking for bots by UA.
660         * We're prepared to discard all of this under memory stress
661         * as we only have to work this out at most once per request for example.
662         * <p>
663         * TODO: consider some form of incremental auto-expiry even when not full since keys can be relatively large (eg something like SimpleLRUMap + AutoExpirable)
664         */
665        private static final CacheMiniMap<String,Boolean> _isBot_cache = !CHECK_FOR_SPIDERS_BY_UA ? null :
666            SimpleProbabilisticCache.<String,Boolean>create(Math.max(32, (int)(Runtime.getRuntime().totalMemory() >> 20)), "_isBot_cache");
667    
668        /**Attempt to determine quickly if the current request is probably from a spider/bot (ie not a human).
669         * NB: This does not attempt to distinguish between good spiders (ie bona fide SEs)
670         * and bad/broken/rude bots/scrapers/spiders.
671         * <p>
672         * This tries to distinguish human from non-human,
673         * at least in part to conserve (prime interactive) resources for humans,
674         * and does not claim to be perfect.
675         * <p>
676         * This returns true if there is no (valid) referring page
677         * (and this visitor is not new to the site, ie has recently pulled another page),
678         * though some browsers/firewalls may routinely block this info,
679         * and some referrals, eg to target="_blank", may show no Referer value.
680         * <p>
681         * This should be quick as most or all requests may be tested with this.
682         * <p>
683         * TODO: Should expand this to test against well-known IP addresses.
684         * <p>
685         * TODO: Should include a behavioural element, eg mindless, rapid, pauseless browsing.
686         *
687         * @param request  the incoming request; never null
688         *
689         * @return true if the requesting client is probably a bot, false if probably a human
690         */
691        public static boolean requestProbablyFromSpider(final HttpServletRequest request)
692            {
693            // If we already worked this out, return result already computed/cached!
694            final Object cachedResult = request.getAttribute(_rPFS_CACHE_PNAME);
695            if(cachedResult instanceof Boolean)
696                { return(((Boolean) cachedResult).booleanValue()); }
697    
698            // First check for lack of a valid "Referer" header
699            // AND the the client being 'new' to this site.
700            if((null == request.getAttribute(ThroughputMonitorFilterBase.REQ_ATTR_NAME_NEW_CLIENT_FLAG)) &&
701               "".equals(requestProbablyReferredFromExternalSite(request)))
702                {
703    if(IsDebug.isDebug) { System.out.println("[Client has no/unparsable Referer and is not new: probably a spider/robot.]"); }
704                request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
705                return(true);
706                }
707    
708            // Now check for a known bot User-Agent...
709            if(CHECK_FOR_SPIDERS_BY_UA)
710                {
711                final Enumeration<?> aeEn = request.getHeaders("User-Agent");
712                final boolean hasNoUA = (aeEn == null) || !aeEn.hasMoreElements();
713                if(hasNoUA)
714                    {
715    if(IsDebug.isDebug) { System.out.println("[Client has no UA: assumed to be a spider/robot.]"); }
716                    request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
717                    return(true); /* Rude/suspicious. */
718                    }
719                else
720                    {
721                    // Check all such UA headers if more than one for some reason...
722                    while(aeEn.hasMoreElements())
723                        {
724                        final String wholeUA = (String) aeEn.nextElement();
725    
726                        final int lenUA = wholeUA.length();
727                        if((lenUA < 2) || (lenUA > 512))
728                            {
729                            // Strange-sized UA is suspicious...
730                            // And we don't want our cache/memory full of huge UA strings.
731    if(IsDebug.isDebug) { System.out.println("[Client has empty/tiny/huge (ie rude/broken/silly/dangerous) UA: assumed to be a spider/robot; client IP: "+request.getRemoteAddr()+".]"); }
732                            request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
733                            return(true);
734                            }
735    
736    //final long t1 = System.nanoTime();
737                        final boolean isBot;
738                        final Boolean b = _isBot_cache.get(wholeUA);
739                        if(b != null) { isBot = b.booleanValue(); }
740                        else
741                            {
742                            isBot = UA_REGEX.matcher(wholeUA).matches();
743                            // Cache match result for this whole User-Agent value.
744                            _isBot_cache.put(wholeUA, isBot ? Boolean.TRUE : Boolean.FALSE);
745    
746                            // When we see a (new-ish) bot UA then log it and where it came from...
747                            if(isBot) { System.out.println("[INFO: Probable bot/spider UA seen (full UA='"+TextUtils.sanitiseForXML(wholeUA, 256, false)+"'); client IP: "+request.getRemoteAddr()+".]"); }
748                            }
749    //final long t2 = System.nanoTime(); System.out.println("[UA match time: "+(t2-t1)+"ns]");
750    
751                        if(isBot)
752                            {
753                            request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
754                            return(true);
755                            }
756                        }
757                    }
758                }
759    
760            // TODO: check known spider/bot IP addresses...
761    
762            request.setAttribute(_rPFS_CACHE_PNAME, Boolean.FALSE);
763            return(false); // Probably human...
764            }
765    
766        /**Attempt to detect if the current request has been referred from an external Web site.
767         * This checks if the "Referer" is apparently from gallery mirrors or aliases.
768         * <p>
769         * Note that since this info can be forged,
770         * or disabled/knobbled for security reasons,
771         * this is not completely reliable.
772         * <p>
773         * A missing "Referer" will be taken as indicating an "external" reference,
774         * and will catch most well-behaved spiders as a result.
775         * <p>
776         * Since we may have to do some string parsing this may not be very fast,
777         * but it should not be very slow either.
778         * <p>
779         * Even if there is more than one "Referer" header we will only look at one.
780         *
781         * @param request  the incoming request; never null
782         *
783         * @return  null if referred from a Gallery site/host/alias,
784         *          "" if unparsable or no referring URL,
785         *          else normalised (lowercase, stripped of common prefixes, etc)
786         *          referring host's name
787         */
788        public static String requestProbablyReferredFromExternalSite(final HttpServletRequest request)
789            {
790            final Enumeration<?> rEn = request.getHeaders("Referer");
791            final boolean noRef = ((rEn == null) || !(rEn.hasMoreElements())); // No Referer...
792            if(noRef) { return(""); }
793    
794            // Get the referring URL...
795            final String ref = (String) rEn.nextElement();
796    
797            try
798                {
799                // Attempt to parse it for the hostname.
800                // If we fail, ie ref is ill-formed, then count this as "external".
801                final URL url = new URL(ref);
802    
803                // If we can't find virtual host details for the host
804                // then assume that this is an external host.
805                final String rawHostName = url.getHost();
806                final String normalisedHostName = HostUtils.normaliseVirtualHostName(rawHostName);
807                if(null == VirtualHosts.getVirtualHostDetails(normalisedHostName, null))
808                    {
809                    final String ourName = request.getServerName();
810                    if((null != ourName) &&
811                            normalisedHostName.equals(HostUtils.normaliseVirtualHostName(ourName)))
812                        {
813                        return(null); // Our host name, even if not expected one, so treat as OK.
814                        }
815                    return(normalisedHostName); // OK, got the referring host, and it's not us.
816                    }
817                return(null); // This was apparently referred by us, so is OK.
818                }
819            catch(final Exception e)
820                {
821                return(""); /* Cannot parse referrer, so treat as if external. */
822                }
823            }
824    
825        /**Generate full URL for exhibit given exhibit name; never null.
826         * This may take account of such factors as which servers
827         * are up, how heavily loaded, etc,
828         * in order to give best throughput and perform automatic load balancing,
829         * to give the best possible user experience.
830         * <p>
831         * In order to do this well we may need the request details,
832         * so they should be supplied if possible.
833         * These should be the client's request to a Gallery site,
834         * else null.
835         * <p>
836         * Defaults to generic main host if specific better mirror (etc) cannot be identified.
837         */
838        public static URL makeExhibitURL(final CharSequence exhibitName,
839                                         final HttpServletRequest request,
840                                         final DataSourceBean vars)
841            throws MalformedURLException
842            {
843            return(new URL("http://" +
844                           MirrorSelectionUtils.chooseMirrorHostForHighBandwidth(request, vars) +
845                           makeExhibitRRURL(exhibitName)));
846            }
847    
848        /**Generate root-relative URL for exhibit given exhibit name; never null.
849         */
850        public static String makeExhibitRRURL(final CharSequence exhibitName)
851            //throws MalformedURLException
852            {
853            return("/" + WebConsts.BASE_PATH_EXHIBITS + "/" + exhibitName);
854            }
855    
856        /**Generate full URL for thumbnail/sample given exhibit name and standard/small selector; never null.
857         */
858        public static URL makeThumbnailURL(final CharSequence exhibitName, final boolean std)
859            throws MalformedURLException
860            {
861            return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeThumbnailRRURL(exhibitName, std)));
862            }
863    
864        /**Generate root-relative URL for thumbnail/sample given exhibit name and standard/small selector; never null.
865         */
866        public static String makeThumbnailRRURL(final CharSequence exhibitName, final boolean std)
867            {
868            return("/" + WebConsts.BASE_PATH_TN + "/" + (std ? WebConsts.PATH_TN_STD : WebConsts.PATH_TN_SML) + "/" + exhibitName);
869            }
870    
871        /**Generate full URL for catalogue page given exhibit name; never null.
872         * This should always generate the canonical/"official" form of the URL,
873         * eg not including mirrors or aliases.
874         * <p>
875         * The media-type suffix (eg ".html" or ".wml") must be supplied.
876         */
877        public static URL makeCatPageURL(final CharSequence exhibitName, final String mediaTypeSuffix)
878            throws MalformedURLException
879            {
880            return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeCatPageRRURL(exhibitName, mediaTypeSuffix)));
881            }
882    
883        /**Generate root-relative URL for catalogue page given exhibit name; never null.
884         * This should take account of such factors as which servers
885         * are up, how heavily loaded, etc.
886         * <p>
887         * The media-type suffix (eg ".html" or ".wml") must be supplied.
888         */
889        public static String makeCatPageRRURL(final CharSequence exhibitName, final String mediaTypeSuffix)
890            {
891            return("/" + WebConsts.BASE_PATH_CATPAGE + "/" + exhibitName + mediaTypeSuffix);
892            }
893    
894        /**Returns true if this Web server is overloaded (eg for bandwidth or CPU).
895         * If this server is clearly overloaded then this routine returns true,
896         * and it is possible to start trimming UI features to reduce load,
897         * eg the number of search results shown.
898         * <p>
899         * If not overloaded or not possible to tell, this returns false.
900         * <p>
901         * (If the context is null then this routine may have to return false.)
902         * <p>
903         * This may gather information from a number of sources,
904         * but in the main the ThroughputFilter's data will be used.
905         * We may explicitly set the overload flag here
906         * if we detect the system to be overloaded
907         * to make it quicker for us and others on a subsequent call;
908         * this will get overwritten by TMF when it next gets to assess load.
909         * <p>
910         * If true then the UI and application should attempt to use less bandwidth
911         * and CPU time than normal, perhaps by using cheaper algorithms than
912         * usual (eg less comprehensive searches) or a less-graphics-intensive UI.
913         * <p>
914         * This may default to true while the server is starting up to ensure that
915         * as little extra CPU (for example) as possible is consumed while
916         * the server is compiling JSPs, loading classes, etc.
917         * <p>
918         * This is intended to be relatively cheap to call,
919         * since it may get called frequently,
920         * for example especially when the system is busy,
921         * and/or at start-up before JIT compilation (eg HotSpot) has kicked in.
922         */
923        public static boolean isOverloaded(final ServletContext ctxt)
924            {
925            if(ctxt != null)
926                {
927                // If the ThroughputFilter thinks that the server is overloaded,
928                // then this reports the system as overloaded too.
929                final Object overloadFlag =
930                        ctxt.getAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME);
931                // Take anything but the presence of the value Boolean.FALSE
932                // (eg the absence of any value at all) as indication of a problem,
933                // eg that the system may be too busy even to clear the flag,
934                // as well as a short-term cache of overload status computed in this routine
935                // until the TF computes/sets a new value.
936                if((!(overloadFlag instanceof Boolean)) || ((Boolean) overloadFlag).booleanValue())
937                    { return(true); }
938                }
939    
940            // If the system is known to be at or over our CPU load limit then report overload.
941            if(ThreadUtils.isCPUHeavilyLoaded())
942                {
943                // Effectively cache overload status for a while (until the next TF update).
944                if(ctxt != null) { ctxt.removeAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME); }
945                return(true);
946                }
947    
948            // Can't see any problems, given the available data, so not overloaded.
949            return(false);
950            }
951    
952        /**Private flag for isLightlyLoaded() to note time when we were last non lightly loaded.
953         * Once we discover that we are not lightly loaded we set this to the current time,
954         * and then do not re-test for a little while in case the testing itself adds to the load.
955         * <p>
956         * Initially zero.
957         * <p>
958         * Marked volatile for thread-safe lock-free access.
959         */
960        private static volatile long _lastNotLightlyLoaded;
961    
962        /**Time that we last reset the target load average to zero to restart our load ramp-up.
963         * Initially 'now' in encourage a gentle start.
964         * <p>
965         * Marked volatile for thread-safe lock-free access.
966         */
967        private static volatile long _lastResetLA = System.currentTimeMillis();
968    
969        /**Time over which to ramp up load limit in isLightlyLoaded(), ms; strictly positive.
970         * Should be at least the 60s time claimed in the documentation for
971         * OperatingSystemMXBean.getSystemLoadAverage()
972         * to dampen CPU-load flapping,
973         * especially when emerging from power-conservation mode.
974         */
975        private static final int LOAD_AVERAGE_RAMP_UP_TIME = 180000 + Rnd.fastRnd.nextInt(60000); // 3+ minutes.
976    
977        /**Returns true if the site seems to be lightly loaded (CPU, bandwidth, etc).
978         * In case of difficulties/confusion this reports false.
979         * <p>
980         * When running well, the system should over around the normal / light-load boundary.
981         * <p>
982         * This routine is quite careful and conservative,
983         * and will only return true if this server and the host system
984         * both seem to be lightly loaded and stable by all the appropriate metrics.
985         * <p>
986         * This never reports the system to be lightly loaded if it is low on power
987         * (eg on a nearly-expired battery) so as to avoid unnecessary background work.
988         * <p>
989         * Where this can check 'uptime' then it tries to enforce a modest CPU ramp-up
990         * over approximately the 60s-or-so of OperatingSystemMXBean.getSystemLoadAverage()
991         * to limit rapid fluctuations in CPU load (and, for example, power consumption).
992         * <p>
993         * This routine is designed to be called frequently,
994         * ie is reasonably fast and efficient.
995         */
996        public static boolean isLightlyLoaded(final ServletContext ctxt)
997            {
998            final long now = System.currentTimeMillis();
999            // Don't re-test within ~1s of finding the system to be significantly loaded.
1000            if(now - _lastNotLightlyLoaded < 1013) { return(false); }
1001    
1002            // Treat the system as stressed and thus NOT lightly loaded if (temporarily) short of power
1003            // so as to discourage unnecessary background/speculative work.
1004            // Also forces load-limit ramp-up to restart from scratch.
1005            if(GenUtils.mustConservePower())
1006                { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1007    
1008            // We check for memory stress; we are not lightly loaded if memory is hurting.
1009            if(MemoryTools.isMemoryStressed())
1010                { _lastNotLightlyLoaded = now; return(false); }
1011    
1012            final double loadFrac = ThreadUtils.loadFraction();
1013            if(loadFrac >= 0) // -ve indicates load average not available so we don't test.
1014                {
1015                // Now we check the whole-host-operating-system (time-averaged) load.
1016                // This should be well under full capacity to be considered lightly loaded,
1017                // ie the run-queue length must be much less than the number of available CPUs.
1018                // Note that where this value is not available getSystemLoadAverage() returns a -ve value
1019                // and we then regard the system as not busy by this metric, as a reasonable fallback.
1020                // This does not take into account any stress on I/O nor other subsystems.
1021                final float targetBasicThreshold = LocalProps.getLightLoadMax();
1022                // If the system is well over over our lower load limit, our fault or not,
1023                // then restart our load ramp-up to give the system a chance to recover.
1024                if(loadFrac >= 2 * targetBasicThreshold)
1025                    { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1026                final long timeSinceLoadRampUpStart = (now - _lastResetLA);
1027                // If still in load ramp-up phase then reduce 'uptime' threshold accordingly.
1028                final float targetThreshold = (timeSinceLoadRampUpStart >= LOAD_AVERAGE_RAMP_UP_TIME) ?
1029                        targetBasicThreshold :
1030                        (targetBasicThreshold * Math.max(0, Math.min(1, timeSinceLoadRampUpStart / (float) LOAD_AVERAGE_RAMP_UP_TIME)));
1031                final boolean aboveThreshold = loadFrac >= targetThreshold;
1032                if(aboveThreshold)
1033                    { _lastNotLightlyLoaded = now; return(false); }
1034                }
1035    
1036            if(ctxt != null)
1037                {
1038                // If the ThroughputFilter doesn't think that we are lightly loaded,
1039                // then we immediately report the system as not lightly loaded.
1040                final Object underloadFlag =
1041                        ctxt.getAttribute(WebConsts.BANDWIDTH_LIGHTLOAD_ATTR_NAME);
1042                // We take the absence of any value as indication of a problem,
1043                // eg that the system may be too busy even to set the flag.
1044                if((!(underloadFlag instanceof Boolean)) ||
1045                       (!((Boolean) underloadFlag).booleanValue()))
1046                    { _lastNotLightlyLoaded = now; return(false); }
1047    
1048                // We double-check that some problem is not making us seem overloaded at the same time...
1049                if(isOverloaded(ctxt)) { _lastNotLightlyLoaded = now; return(false); }
1050    
1051                return(true); // Seems lightly loaded...
1052                }
1053    
1054            // Site status is unclear, so assume not quiet for now.
1055            // However, this 'false' is not cacheable, ie is not a real result.
1056            return(false);
1057            }
1058    
1059        /**If true then exhibitHasThumbnail() will try to cache its results.
1060         * This should avoid us having to load thumbnails into memory
1061         * just to see if they exist or not.
1062         */
1063        public static final boolean TN_AVAIL_CACHE = true;
1064    
1065        /**The target thumbnail availability cache minimum size, strictly positive.
1066         * Should be large enough that most thumbnail availability requests
1067         * will be caught by it regardless of exhibit set size,
1068         * but small enough to limit memory requirements to something reasonable.
1069         * Should be *much* larger than the full set of thumbnails ever seen
1070         * on one page and/or that might be in our "popular" page set
1071         * so as to avoid thrashing fruitlessly.
1072         * <p>
1073         * We are prepared to give up all cache entries rather than cause an OOME.
1074         */
1075        private static final int TN_AVAIL_CACHE_SIZE_MIN_TARGET = 131 +
1076            5*WebConsts.SINGLE_PAGE_CONTACT_SHEET_TN_COUNT +
1077            SystemVariables.MAX_DIFF_EXHIBIT_NAME_VALUES;
1078    
1079        /**Private key used by exhibitHasThumbnail(); never null.
1080         * Made AEP linked to more be able to accurately size it for the current AEP,
1081         * and ensure that it is periodically rebuilt/refreshed from scratch.
1082         */
1083        private static final DataSourceBean.AEPLinkedKey tnHTMLCacheKey = new DataSourceBean.AEPLinkedKey("tnHTMLCacheKey");
1084    
1085        /**Test if the given exhibits have thumbnails (locally) available; never null.
1086         * This is suitable for a bulk check, eg before rendering a table,
1087         * and may be internally parallelised to overcome I/O latency, etc.
1088         *
1089         * @param exhibitNames  non-null list of full exhibit names;
1090         *     not altered by this routine and must not be changed by the caller
1091         *     while this routine is running
1092         *
1093         * @return  a bit in the result set for each thumbnail that definitely
1094         *     exists in the requested size; an unset bit may represent "not known"
1095         */
1096        @SuppressWarnings("unchecked")
1097        public static BitSet exhibitsHaveThumbnail(final DataSourceBean dataSource,
1098                        final List<Name.ExhibitFull> exhibitNames,
1099                        final boolean standard,
1100                        final boolean forceCreation)
1101            {
1102            final int n = exhibitNames.size();
1103    
1104            // Only try the parallel fetch if:
1105            //   * We have an extant cache of thumbnail state.
1106            //   * There is more than one item to check.
1107            //   * We're not starved of resources (eg power).
1108            final LRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte> tnExistenceMap;
1109            if(TN_AVAIL_CACHE && (n > 1) &&
1110               /* (!GenUtils.mustConservePower()) && */
1111               (null != (tnExistenceMap = (LRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte>) dataSource.getAEPLinkedValue(tnHTMLCacheKey))))
1112                {
1113                // Find all the exhibits whose thumbnail state is not cached at all,
1114                // and try to start one or more background thread(s) to check their status.
1115                // We expect this activity to be largely I/O bound.
1116    
1117                // Tasks to wait for the completion of...
1118                final List<Future<?>> tasks = new LinkedList<Future<?>>();
1119    
1120                for(final Name.ExhibitFull en : exhibitNames)
1121                    {
1122                    // If we have any tn state already cached then skip this entry...
1123                    if(null != tnExistenceMap.get(en)) { continue; }
1124    
1125                    // Try to fetch this missing entry into cache concurrently...
1126                    tasks.add(ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
1127                        public final void run()
1128                            { exhibitHasThumbnail(dataSource, en, standard, forceCreation); }
1129                        }));
1130                    }
1131    
1132                // Wait for any tasks to complete.
1133                for(final Future<?> task : tasks)
1134                    {
1135                    try { task.get(); }
1136                    catch(final Exception e) { e.printStackTrace(); /* Absorb but log any error. */ }
1137                    }
1138                }
1139    
1140            // Fetch all values through the cache using the caller's thread.
1141            final BitSet result = new BitSet(n);
1142            for(int i = n; --i >= 0; )
1143                {
1144                if(exhibitHasThumbnail(dataSource, exhibitNames.get(i), standard, forceCreation))
1145                    { result.set(i); }
1146                }
1147    
1148            return(result);
1149            }
1150    
1151        /**Test if the given exhibit has a thumbnail (locally) available.
1152         * Does not try to force one to be made if one is not extant (or in cache)
1153         * unless the forceCreation argument is true.
1154         * <p>
1155         * Since this is likely to be testing what is in (local) cache,
1156         * its results can be considered at best a (good) hint.
1157         * <p>
1158         * We do not cache entirely negative answers
1159         * (ie that an exhibit has no thumbnails)
1160         * as this may change if we overcome (eg) a temporary resource issue.
1161         * But we assume the converse,
1162         * ie that once we have a thumbnail then it will always be available.
1163         * <p>
1164         * Always returns false for exhibit types that do not support thumbnails.
1165         *
1166         * @param dataSource  the data source; never null
1167         * @param exhibitName  the full exhibit name; must be valid (eg non-null)
1168         * @param standard  if true, tests for the presence of a standard-size
1169         * @param forceCreation  if true, we can try to force creation/fetch
1170         *     of the underlying thumbnail if not already available locally
1171         *
1172         * @return true if exhibit definitely has/had one/both thumbnails,
1173         *     false if unknown or thumbnails are not currently available
1174         */
1175        @SuppressWarnings("unchecked")
1176        public static boolean exhibitHasThumbnail(final DataSourceBean dataSource,
1177                                                  final Name.ExhibitFull exhibitName,
1178                                                  final boolean standard,
1179                                                  final boolean forceCreation)
1180            {
1181            if((dataSource == null) || (exhibitName == null))
1182                { throw new IllegalArgumentException(); }
1183    
1184            // If we are cacheing the availability of thumbnails
1185            // then ensure the cache map exists here.
1186            // This is a thread-safe size-limited LRU map.
1187            // Note that this is *not* linked to the AEP
1188            // so that we may retain a little stale information indefinitely,
1189            // especially though AEP changes when much has to be recomputed.
1190            MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte> tnExistenceMap;
1191            if(TN_AVAIL_CACHE)
1192                {
1193                while((tnExistenceMap = (MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte>) dataSource.getAEPLinkedValue(tnHTMLCacheKey)) == null)
1194                    {
1195                    // Size the table to suit the exhibit set and allow growth of the exhibit set...
1196                    final int nExhibits = dataSource.getAllExhibitProperties(-1).aeid.length;
1197                    dataSource.putIfAbsentAEPLinkedValue(tnHTMLCacheKey, LRUMapAutoSizeForHitRate.<Name.ExhibitFull, Byte>create(0, Math.max(2*TN_AVAIL_CACHE_SIZE_MIN_TARGET, 1+nExhibits), "exhibitHasThumbnail"));
1198                    }
1199                }
1200    
1201            // We store availability as a bit-mask
1202            //   * bit 0 is 1 iff the small thumbnail is available
1203            //   * bit 1 is 1 iff the std thumbnail is available
1204            byte availability = 0; // Assume none available by default.
1205    
1206            // Only use cached positive values.
1207            // A negative cached result has us check again...
1208            // We cache negative results mainly to maintain correct hit-rate stats.
1209            final Byte cachedAvailability;
1210            if(!TN_AVAIL_CACHE || (null == (cachedAvailability = tnExistenceMap.get(exhibitName))) || (cachedAvailability.byteValue() <= 0))
1211                {
1212                // Hauling the thumbnail pair into view can be very expensive,
1213                // eg may involve disc fetches or image decoding or worse,
1214                // and may simply displace other items better kept in cache/memory.
1215                final ExhibitThumbnails tns = dataSource.getThumbnails(exhibitName, forceCreation);
1216    
1217                if(tns != null)
1218                    {
1219                    if(tns.getSmall() != null)
1220                        { availability |= 1; }
1221                    if(tns.getStandard() != null)
1222                        { availability |= 2; }
1223                    }
1224    
1225                if(TN_AVAIL_CACHE)
1226                    {
1227                    // Byte.valueOf() avoids creating distinct instances.
1228                    tnExistenceMap.put(exhibitName, Byte.valueOf(availability));
1229    if(IsDebug.isDebug && ((tnExistenceMap.size() & 0x3ff) == 0)) { dataSource.log("    tnExistenceMap: " + tnExistenceMap.toString()); }
1230                    }
1231                }
1232            // Use the cached (positive) value...
1233            else
1234                { availability = cachedAvailability.byteValue(); }
1235    
1236            final boolean tnAvailable = (0 != (availability & (standard ? 2 : 1)));
1237            return(tnAvailable);
1238            }
1239    
1240        /**Get thumbnail image URL to embed in HTML page (usually JPEG/GIF/PNG); null if none available.
1241         * By preference uses purpose-built thumbnail, else tries to
1242         * use image itself if small enough.
1243         * <p>
1244         * Has to be passed a dataSource and a full exhibit name.
1245         * <p>
1246         * Returns null if no suitable thumbnail URL can be generated.
1247         * <p>
1248         * This may cache its results,
1249         * in particular assuming that once a particular thumbnail becomes available
1250         * that it does not go away again.
1251         *
1252         * @param tnDim  (output argument) if not null and result is not null,
1253         *     is filled in with the thumbnail dimensions
1254         * @param dontCreateTn  if true, don't force creation of a thumbnail if
1255         *     one is not already readily available
1256         * @param rrURLOnly  if true then only generate a root-relative URL,
1257         *     else an absolute URL at a CDN (or with sneaky concurrency) is allowed
1258         *     and either form may be returned
1259         */
1260        public static String makeHTMLInlineImageThumbnailURL(final DataSourceBean dataSource,
1261                                                             final Name.ExhibitFull exhibitName,
1262                                                             final boolean std,
1263                                                             final boolean rrURLOnly,
1264                                                             final java.awt.Dimension tnDim,
1265                                                             final boolean dontCreateTn)
1266            throws IOException
1267            {
1268            final AllExhibitProperties aep = dataSource.getAllExhibitProperties(-1);
1269    
1270            // Get full exhibit details...
1271            final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
1272            // Stop if exhibit has gone away or is invalid.
1273            if(esa == null) { return(null); }
1274    
1275            if(WebUtils.canInlineInHTMLPage((ExhibitMIME.getInputFileType(esa.getCharSequence()))))
1276                {
1277                // Get the exhibit immutable computable properties if possible.
1278                final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
1279                final java.awt.Dimension xyDim = (epc == null) ? null : epc.getXyDimensions();
1280                // Compute what thumbnail dimensions should be...
1281                final java.awt.Dimension thumbnailXyDim = (xyDim == null) ? null : ExhibitThumbnails.computeThumbnailDimensions(xyDim, std);
1282    
1283                // Is this in fact a 2D image?
1284                if(thumbnailXyDim != null)
1285                    {
1286                    // Pass dimensions back to caller.
1287                    if(tnDim != null)
1288                        {
1289                        tnDim.width  = thumbnailXyDim.width;
1290                        tnDim.height = thumbnailXyDim.height;
1291                        }
1292    
1293                    // Is this a small enough (in bytes) exhibit to be its own thumbnail?
1294                    final boolean smallExhibit = (esa.length <=
1295                        (std ? ExhibitThumbnails.STD_ABS_MAX_BYTES : ExhibitThumbnails.SML_ABS_MAX_BYTES));
1296                    // Can this be its own thumbnail?
1297                    // It must be small enough in bytes,
1298                    // and possibly small enough in XxY pixels too.
1299                    final boolean canBeOwnThumbnail =
1300                        (smallExhibit && (WebConsts.ALLOW_BROWSER_IMAGE_SCALE || thumbnailXyDim.equals(xyDim)));
1301    
1302                    // See if we have thumbnails available
1303                    // (if eager, force one to be made,
1304                    // else see if one already exists if we can't inline directly).
1305                    // If we force them to be created
1306                    // then we expect them to remain cached!
1307                    // Shall we force creation?
1308                    final boolean forceCreation = !dontCreateTn &&
1309                        (WebConsts.EAGER_TN_USE || !canBeOwnThumbnail);
1310    
1311                    // Check for availability of real thumbnail.
1312                    final boolean tnAvailable = exhibitHasThumbnail(dataSource,
1313                                                                    exhibitName,
1314                                                                    std,
1315                                                                    forceCreation);
1316    
1317                    final boolean canShowThumbnail = tnAvailable || canBeOwnThumbnail;
1318    
1319    //System.err.println("[sE/cBOT/cST: "+smallExhibit+"/"+canBeOwnThumbnail+"/"+canShowThumbnail+".]");
1320    
1321                    if(canShowThumbnail)
1322                        {
1323                        // Chose the (root-relative, or CDN/optimised full) URL to use
1324                        // (always serve from same host as this page).
1325                        // Show true thumbnail by preference...
1326                        if(rrURLOnly)
1327                            { return(tnAvailable ? WebUtils.makeThumbnailRRURL(exhibitName, std) : WebUtils.makeExhibitRRURL(exhibitName)); }
1328                        else
1329                            { return(tnAvailable ? CDNUtils.makeThumbnailOptCDNURL(dataSource, exhibitName, std, null) : CDNUtils.makeExhibitOptCDNURL(dataSource, exhibitName, null).toString()); }
1330                        }
1331                    }
1332                }
1333    
1334            return(null); // Can't make a thumbnail.
1335            }
1336    
1337        /**Find the set of exhibits with the same subject as the indicated one.
1338         * Given a List of String exhibit names sorted by
1339         * ExhibitPropsGlobalImmutable.SMART_ORDER
1340         * (or possibly just by ExhibitPropsGlobalImmutable.SUBJECT_ORDER),
1341         * the SUBJECT_ORDER comparator,
1342         * and the index to a given item in that List,
1343         * finds all the items surrounding that have the same SUBJECT_ORDER,
1344         * ie that differ only in attribute words and are variants on the same
1345         * exhibit.  (The result will always contain the input item.)
1346         * <p>
1347         * The List must not contain nulls, all entries must be valid exhibit
1348         * names, and the List must be sorted implicitly with the comparator
1349         * passed in.  The input index must be within bounds.
1350         * <p>
1351         * This does not alter the List passed in.  The return value is
1352         * an unmodifiable sublist of the input.
1353         * <p>
1354         * This returns the sublist of variants on the indicated exhibit;
1355         * this will degenerate to just containing the input exhibit name if there
1356         * are no other exhibit names for the same exhibit.
1357         * <p>
1358         * This assumes that the number of variants of any given exhibit is
1359         * small, and so a linear search is used.
1360         * <p>
1361         * If the List does not support efficient bidirectional movement and seeks
1362         * then this routine will be very inefficient.
1363         */
1364        public static List<Name.ExhibitFull> getExhibitVariantRange(final List<Name.ExhibitFull> allExhibitNames,
1365                                                          final Comparator<Name.ExhibitFull> comparator,
1366                                                          final int thisExhibitIndex)
1367            {
1368            final Name.ExhibitFull thisExhibit = allExhibitNames.get(thisExhibitIndex);
1369            final int listSize = allExhibitNames.size();
1370    
1371            int first, last;
1372    
1373            // Search backwards for first matching item.
1374            for(first = thisExhibitIndex; first > 0; --first)
1375                {
1376                if(comparator.compare(thisExhibit, allExhibitNames.get(first-1)) != 0)
1377                    { break; } // Stop when we hit an item with a different subject.
1378                }
1379    
1380            // Search forwards for last matching item.
1381            for(last = thisExhibitIndex; last < listSize-1; ++last)
1382                {
1383                if(comparator.compare(thisExhibit, allExhibitNames.get(last+1)) != 0)
1384                    { break; } // Stop when we hit an item with a different subject.
1385                }
1386    
1387            return(Collections.unmodifiableList(allExhibitNames.subList(first, last+1)));
1388            }
1389    
1390        /**Computes a minimal human-readable immutable unique prefix of an exhibit short name to distinguish given exhibit from most others.
1391         * Uniqueness is not guaranteed, but this is meant to give a reasonably
1392         * short result that a human can read in the title of a page, for example.
1393         * <p>
1394         * If inputs are bad then this will try to fail gently with an empty-string
1395         * result rather than an exception to make it robust if called directly from
1396         * JSP code, for example.
1397         * <p>
1398         * This is passed a List of exhibits sorted in a "smart" order that
1399         * sorts mainly on the file component of the name,
1400         * probably in a case-insensitive way.
1401         * <p>
1402         * This routine finds a short word prefix that (case-insensitively)
1403         * is different from neighbouring exhibits
1404         * and is thus (depending on the overall sort order)
1405         * probably the shortest unique prefix amongst the entire collection.
1406         * <p>
1407         * (If no unique value is possible, the entire prefix is returned.)
1408         * <p>
1409         * If the List of size 0 we return the empty string and do not use
1410         * the index parameter at all.  This simplifies use where the list
1411         * may transiently be empty during start-up.
1412         * <p>
1413         * This routine may run very slowly if the List argument does not support
1414         * efficient random seeking.
1415         * <p>
1416         * The result is designed to be used in headings and other display text
1417         * such as image alt/title attributes.
1418         * <p>
1419         * TODO: fix inefficient double-parsing of main words...
1420         *
1421         * @param exhibits  sorted (smart-ish sorted) list of exhibit names
1422         *     (String value); must not change while routine is running
1423         * @param ourIndex  index (strictly positive) of the exhibit whose
1424         *     abbreviated name we wish to produce; must be within the List
1425         * @return "" in case of invalid arguments,
1426         *     else some whole-word prefix of the name
1427         */
1428        public static CharSequence minimalUniqueENTitlePrefix(final List<Name.ExhibitFull> exhibits,
1429                                                              final int ourIndex)
1430            {
1431            // Treat bad arguments gently as the outputs are likely to
1432            // be displayed in HTML directly.
1433            if(exhibits.size() == 0) { return(""); }
1434            if((ourIndex < 0) || (ourIndex >= exhibits.size())) { return(""); }
1435    
1436            // Set of attribute words we use for parsing names.
1437            // In this case, deliberately empty.
1438            final Set<String> noAttrWords = Collections.emptySet();
1439    
1440            // Dummy "empty" enumeration we use for missing neighbours.
1441            final Enumeration<?> emptyEn = Collections.enumeration(Collections.emptyList());
1442    
1443            // Get our name and previous/next names
1444            // as word enumerations (or empty enumerations if not present).
1445            // for this purpose we pretend that there are no attribute words...
1446            final ListIterator<Name.ExhibitFull> liFwd = exhibits.listIterator(ourIndex);
1447            final CharSequence ourNameFull = (liFwd.next());
1448            // FIXME: inefficient via full name and tokenizer...
1449            final CharSequence ourNameMainWords = ExhibitName.getMainWordsComponent(
1450                                            ourNameFull, noAttrWords).toString();
1451            final Enumeration<?> ourNameWords = ExhibitName.getMainWords(
1452                                            ourNameFull, noAttrWords);
1453    
1454            // Default to empty enumerations.
1455            Enumeration<?> nextNameWords = emptyEn;
1456            Enumeration<?> prevNameWords = emptyEn;
1457    
1458            // Now search forwards for different name to compare against...
1459            // Note that we implicitly start from just after ourName above.
1460            while(liFwd.hasNext())
1461                {
1462                final CharSequence n = (liFwd.next());
1463                // FIXME: inefficient via full name and tokenizer...
1464                final CharSequence nextNameMainWords =
1465                    ExhibitName.getMainWordsComponent(n, noAttrWords);
1466                if(!TextUtils.contentEqualsIgnoreCase(nextNameMainWords, ourNameMainWords))
1467                    {
1468                    nextNameWords = ExhibitName.getMainWords(n, noAttrWords);
1469                    break;
1470                    }
1471                }
1472    
1473            // Now search backwards for different name to compare against...
1474            final ListIterator<Name.ExhibitFull> liBack = exhibits.listIterator(ourIndex);
1475            while(liBack.hasPrevious())
1476                {
1477                final CharSequence n = (liBack.previous());
1478                // FIXME: inefficient via full name and tokenizer...
1479                final CharSequence prevNameMainWords =
1480                    ExhibitName.getMainWordsComponent(n, noAttrWords);
1481                if(!TextUtils.contentEqualsIgnoreCase(prevNameMainWords, ourNameMainWords))
1482                    {
1483                    prevNameWords = ExhibitName.getMainWords(n, noAttrWords);
1484                    break;
1485                    }
1486                }
1487    
1488            // Result word prefix (preserves case, ends with word separator).
1489            final StringBuilder result = new StringBuilder();
1490    
1491            // Iterate until we run out of words or both neighbours have
1492            // (or have different words).
1493            while(ourNameWords.hasMoreElements())
1494                {
1495                final String ourNameWord = (String) ourNameWords.nextElement();
1496                result.append(ourNameWord).append(ExhibitName.WORD_SEP);
1497    
1498                // Take next word, if extant, on both prev and next sides.
1499                final String nextNameWord = nextNameWords.hasMoreElements() ?
1500                    (String) nextNameWords.nextElement() : "";
1501                final String prevNameWord = prevNameWords.hasMoreElements() ?
1502                    (String) prevNameWords.nextElement() : "";
1503    
1504                // If neighbour has another word but it is different to ours,
1505                // pretend neighbour was truncated at previous word.
1506                // We ignore case, since users probably will.
1507                if(nextNameWord.equalsIgnoreCase(ourNameWord))
1508                    { continue; } // Looks the same so cannot stop yet.
1509                else
1510                    { nextNameWords = emptyEn; } // No longer a contender.
1511    
1512                if(prevNameWord.equalsIgnoreCase(ourNameWord))
1513                    { continue; } // Looks the same so cannot stop yet.
1514    //            else
1515    //                { prevNameWords = emptyEn; } // No longer a contender.
1516    
1517                break; // Done!
1518                }
1519    
1520            // Attempt to return as space-saving Name, else a String, either being interned.
1521            return(Name.createOrStringFallback(result, null));
1522            }
1523    
1524       /**Get BufferedImage containing expanded image loaded as static resource from WAR; never null.
1525        * Loaded on demand and cached statically,
1526        * via a SoftReference to allow the system to reclaim memory rather than OOM.
1527        * <p>
1528        * There may be an adverse effect on system behaviour if many large images
1529        * are cached in memory; this may be mitigated by storing them via
1530        * a SoftReference so that the memory can be recycled automatically.
1531        * <p>
1532        * This method is internally synchronized to allow only one image load/decode
1533        * to happen at once to conserve CPU and memory (and other) resources.
1534        * <p>
1535        * If the image is indexed and forceToARGBTrueColour is true then
1536        * we expand it to true-colour to make it possible to add markings.
1537        * <p>
1538        * This uses our built-in mediahandler classes to decode the image,
1539        * so the image type must be one that we have a decoder for.
1540        * <p>
1541        * This may not handle alpha correctly in all cases.
1542        * <p>
1543        * Under memory pressure this may release cached resources
1544        * whether hard or soft cached.
1545        * <p>
1546        * <strong>Beware:</strong> since a reference is returned rather than
1547        * a copy, be careful not to adjust the returned image unintentionally.
1548     * @param copyResult  if true, we force the result to be a copy of
1549    *     the cached value to avoid accidentally changing the cached copy;
1550    *     this may force a change in colour model and/or discarding properties
1551     * @param resourceRRURL  root-relative URL (eg "/my/image.gif") of
1552    *     source image in WAR; must not be null and must be a type
1553    *     (and with a file extension) that we have a MIME mediahandler for
1554     * @param forceToARGBTrueColour  if true, and the source image is not
1555    *     ARGB true-colour, then we force conversion to ARGB true-colour
1556    *     before cacheing to make it easier to draw on the image
1557     * @param context  gives context for WAR from which to load the raw
1558    *     base clickable-map image
1559        *
1560        * @return image, possibly a copy, possibly with a converted colour model
1561        *
1562        * @throws java.lang.IllegalStateException if the image is not loadable
1563        */
1564        public static BufferedImage getAndCacheStaticImage(
1565                                                final boolean copyResult,
1566                                                final String resourceRRURL,
1567                                                final boolean forceToARGBTrueColour,
1568                                                final ServletContext context)
1569            throws IllegalStateException
1570            {
1571            if((context == null) ||
1572               (resourceRRURL == null))
1573                { throw new IllegalArgumentException(); }
1574    
1575            synchronized(_gACSI_cache)
1576                {
1577                // An item in the cache is one of:
1578                //   * null (ie completely absent)
1579                //   * SoftReference (possibly cleared) to BufferedImage
1580                final Object rawItem = _gACSI_cache.get(resourceRRURL);
1581                BufferedImage result = (null == rawItem) ? null :
1582                    (BufferedImage) (((SoftReference<?>) rawItem).get());
1583    
1584                if(result == null) // Needs fetching.
1585                    {
1586                    final InputStream is =
1587                        context.getResourceAsStream(resourceRRURL);
1588                    if(is == null)
1589                        { throw new IllegalStateException("cannot get InputSteam for image to cache from " + resourceRRURL); }
1590    
1591                    // Find correct handler given name of file.
1592                    final ExhibitMIME.ExhibitTypeParameters etp =
1593                        ExhibitMIME.getInputFileType(resourceRRURL);
1594                    if(etp == null)
1595                        { throw new IllegalStateException("cannot get MIME type for image to cache from " + resourceRRURL); }
1596                    if(etp.handler == null)
1597                        { throw new IllegalStateException("cannot get handler for image to cache from " + resourceRRURL); }
1598    
1599                    try {
1600                        // Decode the image.
1601                        result = etp.handler.decodeImage(is);
1602                        if(result == null)
1603                            { throw new IllegalStateException("cannot get decode image to cache from " + resourceRRURL); }
1604    
1605                        // Force to ARGB if required.
1606                        if(forceToARGBTrueColour)
1607                            { result = ImageUtils.convertToTrueColourARGB(result, false); }
1608    
1609                        // Cache the result.
1610                        _gACSI_cache.put(resourceRRURL, new SoftReference<BufferedImage>(result));
1611                        }
1612                    catch(final IOException e)
1613                        { throw new IllegalStateException("cannot decode image to cache from " + resourceRRURL + ": IOException: " + e.getMessage()); }
1614                    }
1615    
1616                // If forced to copy the result, do so.
1617                // TODO: Maybe try to preserve relevant image properties?
1618                if(copyResult)
1619                    {
1620                    // Coerce data into original colour model.
1621                    // Discard any properties of the original.
1622                    final ColorModel cm = ImageUtils.extractColorModelOrRGB(result);
1623                    final int width = result.getWidth();
1624                    final int height = result.getHeight();
1625                    final WritableRaster raster = cm.createCompatibleWritableRaster(
1626                        width, height);
1627                    final BufferedImage copiedResult =
1628                        new BufferedImage(cm, raster, false, null);
1629    
1630                    // Actually copy the pixels...
1631                    copiedResult.setRGB(0, 0, width, height,
1632                        result.getRGB(0, 0, width, height, null, 0, width),
1633                                        0, width);
1634    
1635                    return(copiedResult);
1636                    }
1637    
1638                return(result);
1639                }
1640            }
1641    
1642        /**Private cache for getAndCacheStaticImage(); never null.
1643         * Is a map from root-relative URL to a BufferedImage
1644         * or SoftReference to a BufferedImage.
1645         * <p>
1646         * Thread-safe LRU-managed limited-size memory-sensitive map.
1647         * <p>
1648         * We are prepared to discard everything if very short of memory.
1649         * <p>
1650         * All getAndCacheStaticImage() activity is synchronized on this instance.
1651         */
1652        private static final LRUMapAutoSizeForHitRate<String,Object> _gACSI_cache =
1653            LRUMapAutoSizeForHitRate.<String,Object>create(0, 1024, "_gACSI_cache");
1654    
1655    
1656        /**Generate a unique key for the given HTTP request; returns null if not possible to generate.
1657         * For example, we use this to help limit the number of times that
1658         * a given user is asked to vote.
1659         * <p>
1660         * Generates a string starting with the given prefix and some
1661         * leading portion of the client's IP address...
1662         * <p>
1663         * It is not fatal if this conflates users,
1664         * but it is more useful that this never thinks one user is more than one
1665         * to avoid pestering them too much
1666         * (or letting a spider inject too much noise for example).
1667         *
1668         * @param prefix  unique prefix to the generated key; non-empty, non-null
1669         * @param request  the user's request
1670         */
1671        public static String generateUserDataPointID(final String prefix,
1672                                                     final HttpServletRequest request)
1673            {
1674            if((prefix == null) || (prefix.length() == 0)) { return(null); }
1675            if(request == null) { return(null); }
1676    
1677            final StringBuilder sb = new StringBuilder(prefix.length() + 21); // 21 for IPv4.
1678            sb.append(prefix);
1679    
1680            try
1681                {
1682                // Get the IP address
1683                final InetAddress ia = InetAddress.getByName(request.getRemoteAddr());
1684    
1685                // Use all but the final octet to construct the data-point ID for a non-IPv6 address (ie IPv4).
1686                // For IPv6 addresses use hex bytes of the network part for now.
1687                // Assume that this will distinguish most genuine users from one another
1688                // but also not completely identify an individual (for privacy/security reasons).
1689                final byte[] addr = ia.getAddress();
1690                if(ia instanceof Inet6Address) // IPv6
1691                    {
1692                    sb.append("6:");
1693                    for(int i = 0; i < 8; ++i) // Network part only...
1694                        { sb.append(Integer.toHexString(addr[i] & 0xff)); }
1695                    }
1696                else // IPv4
1697                    {
1698                    for(int i = 0; i < addr.length-1; ++i)
1699                        { sb.append(addr[i] & 0xff).append('.'); }
1700                    }
1701    
1702                return(sb.toString());
1703                }
1704            catch(final UnknownHostException e)
1705                {
1706                return(null); // Could not understand the client's address.
1707                }
1708            }
1709    
1710        /**Name of additional parameter to record if this is a vote "for" ("pro").
1711         * Value of the parameter is to be "true" or "false".
1712         */
1713        public static final String VOTE_PRO_PARAM_NAME = "pro";
1714    
1715        /**Name of additional parameter to record if this is a vote "against" ("con").
1716         * Value of the parameter is to be "true" or "false".
1717         */
1718        public static final String VOTE_CON_PARAM_NAME = "con";
1719    
1720        /**Create and post the handler for a vote if possible and returns the unique listenerID.
1721         * This replaces any existing listener for this voter.
1722         * <p>
1723         * This returns null if we could not create a listener.
1724         * <p>
1725         * If the handler is invoked,
1726         * then this registers a dummy (inactive) handler
1727         * to delay the next request to the user to vote.
1728         *
1729         * @param exhibitFullName  valid exhibit name; never null
1730         * @param request  client's HTTP request; never null
1731         * @param vars  where stats updates are posted; never null
1732         *
1733         * @return listenerID, or null if one could not be generated
1734         */
1735        public static String createAndFileVoteListener(final Name.ExhibitFull exhibitFullName,
1736                                                       final HttpServletRequest request,
1737                                                       final SimpleVariablePipelineIF vars)
1738            {
1739            if(exhibitFullName == null) { return(null); }
1740            if(vars == null) { return(null); }
1741    
1742            final String dpID = generateUserDataPointID(WebConsts.VOTER_DATA_POINT_PREFIX, request);
1743            if(dpID == null) { return(null); }
1744    
1745            // Compute expiry time (with a random element).
1746            final long expireBy = System.currentTimeMillis() + WebConsts.VOTE_MIN_REQUEST_GAP_MS +
1747                    Rnd.fastRnd.nextInt(WebConsts.VOTE_MIN_REQUEST_GAP_MS);
1748    
1749            // Capture the visitor's IP address for later (in case they vote).
1750            InetAddress va = null;
1751            try { va = InetAddress.getByName(request.getRemoteAddr()); }
1752            catch(final UnknownHostException e) { } // Ignore errors...
1753            final InetAddress voterIPAddr = va;
1754    
1755            // Create new listener.
1756            // Pass in a weak reference to the vars pipeline to avoid blocking GC...
1757            final StatsSink.AbstractStatsListener newAsl =
1758                    (new VoteHandler(dpID, expireBy, exhibitFullName, new WeakReference<SimpleVariablePipelineIF>(vars), voterIPAddr));
1759    
1760            // Get the listener ID...
1761            final String listenerID = newAsl.uniqueListenerID;
1762    
1763            // File the request!
1764            StatsSink.addListenerForDataPoint(newAsl);
1765    
1766            return(listenerID);
1767            }
1768    
1769    
1770        /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
1771         * The list is sorted:
1772         * <ol>
1773         * <li>By dictionary-order i18n localised title.
1774         * </ol>
1775         * <p>
1776         * We may flag categories as good or bad if significantly so.
1777         *
1778         * @param asList  if true, entries are preceded by &lt;li&gt;
1779         *     else they are followed by &lt;br /&gt\r\n;
1780         */
1781        public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
1782                                                               final HttpServletRequest request,
1783                                                               final LocaleBeanBase localeBean,
1784                                                               final boolean asList)
1785            throws IOException
1786            {
1787            return(getCategoryListSortedAsHTML(dsb, request, localeBean, asList ? null : "<br />\r\n"));
1788            }
1789    
1790        /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
1791         * The list is sorted:
1792         * <ol>
1793         * <li>By dictionary-order i18n localised title.
1794         * </ol>
1795         * <p>
1796         * We may flag categories as good or bad if significantly so.
1797         *
1798         * @param entrySeparator  text (followed by CRLF) to terminate entries;
1799         *     null if entries are to be wrapped with li tags
1800         */
1801        public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
1802                                                               final HttpServletRequest request,
1803                                                               final LocaleBeanBase localeBean,
1804                                                               final String entrySeparator)
1805            throws IOException
1806            {
1807            if((dsb == null) || (localeBean == null))
1808                { throw new IllegalArgumentException(); }
1809    
1810            final boolean asList = (null == entrySeparator);
1811    
1812            // Private data on each category, for sorting.
1813            final class PerCat implements Comparable<PerCat>
1814                {
1815                PerCat(final String cat,
1816                       final String i18nTitle,
1817                       final Boolean isGood,
1818                       final int entriesInCat)
1819                    {
1820                    category = cat;
1821                    title = i18nTitle;
1822                    good = isGood;
1823                    numEntries = entriesInCat;
1824                    }
1825    
1826                final String category;
1827                final String title;
1828                final Boolean good;
1829                final int numEntries;
1830    
1831                /**Compares this object with the specified object for order.
1832                 * Order is:
1833                 * <ol>
1834                 * <li>By dictionary-order i18n localised title.
1835                 * </ol>
1836                 */
1837                public int compareTo(final PerCat other)
1838                    {
1839                    // Sort by localised title, case insensitively.
1840                    final int tComp = String.CASE_INSENSITIVE_ORDER.compare(title, other.title);
1841                    if(tComp != 0) { return(tComp); }
1842    
1843                    // Officially break ties by underlying canonical category name.
1844                    // This should never really be needed.
1845                    return(category.compareTo(other.category));
1846                    }
1847                }
1848    
1849            final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
1850    
1851            // Get the list of categories.
1852            final Map<String,Integer> categories = aep.getCategoryExhibitCounts();
1853    
1854            // Create sorted set of details.
1855            // We don't expend huge effort on getting the category good/bad status.
1856            final SortedSet<PerCat> cs = new TreeSet<PerCat>();
1857            for(final String cat : categories.keySet())
1858                {
1859                cs.add(new PerCat(
1860                    cat,
1861                    GenUtils.computeSectionTitle(aep, cat, localeBean),
1862                    aep.isCategoryGood(cat, dsb, false),
1863                    categories.get(cat)
1864                    ));
1865                }
1866    
1867            // Convert to nicely-formatted HTML.
1868            final StringBuilder sb = new StringBuilder(79 * categories.size());
1869            for(final PerCat pc : cs)
1870                {
1871                if(asList) { sb.append("<li>"); }
1872    
1873                // If category significantly good/bad then mark it so.
1874                if(pc.good != null)
1875                    {
1876                    final String proIcon = WebConsts.PRO_ICON_15x15_NAME;
1877                    final String conIcon = WebConsts.CON_ICON_15x15_NAME;
1878                    sb.append("<img src=\"").
1879                        append(WebUtils.getOptionalSneakyConcurrencyRRURLPrefix(request)).
1880                        append("/_static/icon/").
1881                        append(pc.good.booleanValue() ? proIcon : conIcon).
1882                        append("\" width=15 height=15> ");
1883                    }
1884    
1885                // Open link to category RRURL.
1886                sb.append("<a href=\"").
1887                    append(WebConsts.VIRTUAL_COLLECTIONS_BYCATEGORY_ROOT).
1888                    append(pc.category).
1889                    append("/\">");
1890                sb.append(pc.title);
1891                // Close link to category.
1892                sb.append("</a>");
1893    
1894                // Insert exhibit count.
1895                sb.append(" <i>(").append(pc.numEntries).append(")</i>");
1896    
1897                // Finish the point
1898                if(!asList) { sb.append(entrySeparator); }
1899                else { sb.append("</li>\r\n"); }
1900                }
1901    
1902            return(sb.toString());
1903            }
1904    
1905        /**Returns true if this seems to be a slave disconnected from the master.
1906         * This instance may, for example, not wish to collect votes from users
1907         * if the votes may get discarded without getting to the master.
1908         */
1909        public static final boolean isDisconnectedSlave(final DataSourceBean dsb)
1910            {
1911            if(dsb == null) { throw new IllegalArgumentException(); }
1912    
1913            // If definitely a master then this is not 'disconnected' by definition.
1914            if(Boolean.FALSE.equals(dsb.isSlave()))
1915                { return(false); }
1916    
1917            // This instance is treated as disconnected if it is a slave with no xfer key
1918            // since that implies that it won't be allowed to send updates (eg votes) home.
1919            if(!LocalProps.hasXferKey())
1920                { return(true); }
1921    
1922            // This instance is to be treated as NOT disconnected
1923            // if it can see at least one other system via the system variables,
1924            // ie at least two systems in total.
1925            // This uses the availability of client-count as a measure of connectivity.
1926            final SimpleVarStats stats = VarTools.generateSimpleStats(dsb,
1927                                    SystemVariables.ThroughputMonitorFilter_CLIENT_COUNT,
1928                                    0); /* Minimum possible life. */
1929            return((stats == null) || (stats.getSystemCount() < 2));
1930            }
1931    
1932        /**Returns true if this request is apparently a precacheing request, eg from a "Web accelerator".
1933         * This is true if a client (such as FireFox) is "reading ahead"
1934         * but it may be the case that no real human gets to see the content.
1935         * <p>
1936         * See https://developer.mozilla.org/en/Link_prefetching_FAQ
1937         */
1938        public static boolean isPrecacheRequest(final HttpServletRequest request)
1939            {
1940            // "X-Moz: prefetch" header covers FF1--3.5 and Google's Web Accelerator.
1941            return("prefetch".equalsIgnoreCase(request.getHeader("X-Moz")));
1942            }
1943    
1944    
1945        /**Private key used by getCatPageExhibitMetaDataHTML(); never null. */
1946        private static final AEPLinkedKey metadataCacheKey = new AEPLinkedKey("metadataCacheKey");
1947    
1948        /**Static dictionary used by getCatPageExhibitMetaDataHTML() for compression of in-memory data; never null.
1949         * The static dictionary content should be reviewed after any major format changes,
1950         * though this is not a correctness issue, only a matter of compression efficiency.
1951         */
1952        public static final Compact7BitString.StaticDictionary sDictMD = new Compact7BitString.StaticDictionary("getCatPageExhibitMetaDataHTML",
1953            Arrays.asList(new String[]{
1954                "JPEG",    /* MANUALLY ADDED: count=16817, saving=50451, meanFirstPos=126 */
1955                "javax_imageio_1", /* count=17761, saving=248654, meanFirstPos=37 */
1956                "ColorSpaceType", /* count=17752, saving=230776, meanFirstPos=67 */
1957                "Compression", /* count=17684, saving=176840, meanFirstPos=106 */
1958                "NumChannels", /* count=16942, saving=169420, meanFirstPos=86 */
1959                "metadata", /* count=17939, saving=125573, meanFirstPos=9 */
1960                "TypeName", /* count=16826, saving=117782, meanFirstPos=120 */
1961                "compact", /* count=18684, saving=112104, meanFirstPos=3 */
1962                "Chroma", /* count=17755, saving=88775, meanFirstPos=53 */
1963                "value", /* count=18684, saving=74736, meanFirstPos=87 */
1964                "image", /* count=16951, saving=67804, meanFirstPos=23 */
1965                "YCbCr", /* count=15240, saving=60960, meanFirstPos=73 */
1966                "name", /* count=17751, saving=53253, meanFirstPos=71 */
1967                "\"</", /* count=18684, saving=37368, meanFirstPos=72 */
1968                "></", /* count=17805, saving=35610, meanFirstPos=95 */
1969                "</", /* count=18684, saving=18684, meanFirstPos=10 */
1970                "=\"", /* count=18684, saving=18684, meanFirstPos=70 */
1971                "> ", /* count=18684, saving=18684, meanFirstPos=68 */
1972                "><", /* count=18684, saving=18684, meanFirstPos=4 */
1973                "dd", /* count=18684, saving=18684, meanFirstPos=13 */
1974                "dl", /* count=18684, saving=18684, meanFirstPos=1 */
1975                "dt", /* count=18684, saving=18684, meanFirstPos=5 */
1976                "BackgroundIndex", /* count=808, saving=11312, meanFirstPos=103 */
1977                "BlackIsZero", /* count=934, saving=9340, meanFirstPos=89 */
1978                "accessionData", /* count=745, saving=8940, meanFirstPos=9 */
1979                "stream", /* count=984, saving=4920, meanFirstPos=23 */
1980                "GRAY", /* count=1157, saving=3471, meanFirstPos=73 */
1981                "CRC32", /* count=745, saving=2980, meanFirstPos=61 */
1982                "RGB", /* count=1353, saving=2706, meanFirstPos=73 */
1983                "TRUE", /* count=837, saving=2511, meanFirstPos=91 */
1984                "date", /* count=745, saving=2235, meanFirstPos=23 */
1985                "hash", /* count=745, saving=2235, meanFirstPos=59 */
1986                "size", /* count=745, saving=2235, meanFirstPos=41 */
1987                "sampleRate", /* count=174, saving=1566, meanFirstPos=88 */
1988                "MD5", /* count=745, saving=1490, meanFirstPos=82 */
1989                "encoding", /* count=174, saving=1218, meanFirstPos=70 */
1990                "frames", /* count=162, saving=810, meanFirstPos=37 */
1991                "audio", /* count=178, saving=712, meanFirstPos=56 */
1992                "57024", /* count=173, saving=692, meanFirstPos=47 */
1993                "PCM_SIGNED", /* count=55, saving=495, meanFirstPos=78 */
1994                "ULAW", /* count=112, saving=336, meanFirstPos=75 */
1995                "PCM_UNSIGNED", /* count=7, saving=77, meanFirstPos=77 */
1996                "BI_RGB", /* count=6, saving=30, meanFirstPos=74 */
1997                "1136915", /* MANUALLY ADDED: count ~ 10 as prefix. */
1998                }));
1999    
2000        /**Generates HTML form of exhibit metadata, "" if no such metadata for the specified exhibit; never null.
2001         * The result is keyed to both the DataSourceBean and the exhibitName.
2002         * <p>
2003         * Cached values are discarded when the AEP changes.
2004         * <p>
2005         * The computed HTML is locale-invariant, which is why cacheing is viable.
2006         * <p>
2007         * The toString() method should be called on the result to get the String HTML text,
2008         * which may be a String or some other internal representation.
2009         *
2010         * @param dsb  valid non-null DataSourceBean
2011         * @param exhibitName  valid non-null exhibit full name
2012         */
2013        @SuppressWarnings("unchecked")
2014        public static Object getCatPageExhibitMetaDataHTML(final DataSourceBean dsb,
2015                                                           final Name.ExhibitFull exhibitName)
2016            {
2017            if(null == dsb) { throw new IllegalArgumentException(); }
2018            if(null == exhibitName) { throw new IllegalArgumentException(); }
2019    
2020            // Get existing cache map, or create new one.
2021            // The cache is a size-limited, thread-safe Map
2022            // from full exhibit name to HTML formatted metadata ("" if none).
2023            // The items can be large and possibly moderately expensive to (re)compute
2024            // though all in-memory (no disc access for example) so a miss is not that bad.
2025            // Races here may result in some wasted work but no errors.
2026            CacheMiniMap<Name.ExhibitFull,Object> cachedMetaDataMap;
2027            while((cachedMetaDataMap = (CacheMiniMap<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey)) == null)
2028                {
2029                // Limit size to ~10 per 1MB of heap, with a minimum of a few tens to cover popular pages.
2030                final int maxCacheSize = Math.max(32, (int) Math.min(16384, Runtime.getRuntime().totalMemory() >> 17));
2031                dsb.putIfAbsentAEPLinkedValue(metadataCacheKey, SimpleLRUMap.<Name.ExhibitFull, Object>create(maxCacheSize, metadataCacheKey.comment));
2032                }
2033            final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2034            if(cachedMetaData != null) { return(cachedMetaData); }
2035    
2036            // Need to (re)compute metadata for this exhibit
2037            // eg for the first time and/or after an AEP load/change.
2038            final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2039    
2040            final String result = getCatPageExhibitMetaDataHTMLRaw(exhibitName, aep);
2041            assert(result != null);
2042    //if(IsDebug.isDebug) { dsb.log("INFO: getCatPageExhibitMetaDataHTML() result size of "+result.length()+" chars"); }
2043    
2044            // We do not intern() the result
2045            // since we expect each non-"" metadata value to be unique.
2046            // Providing that the system has plenty of free memory
2047            // then cache in an a compact form if possible else as a plain String.
2048            if(MemoryTools.lotsFree())
2049                {
2050                // Cache the text wrapped as an AutoExpirable with limited lifetime
2051                // to gradually reclaim unused entries automatically,
2052                // in part because these can be quite large.
2053                Object optionallyCompacted = result;
2054                try { optionallyCompacted = Compact7BitString.convertToCompact7BitString(result, sDictMD); }
2055                catch(final IllegalArgumentException e) { /* Not 7-bit text, so leave as full String. */ }
2056                final Object toCache = optionallyCompacted;
2057                // Put in cache, wrapped to expire automatically.
2058                cachedMetaDataMap.put(exhibitName, new MemoryTools.AutoExpirableFixedLifeBase(WebConsts.DEFAULT_PAGE_CACHE_MS)
2059                    { @Override public String toString() { return(toCache.toString()); } });
2060    //            try { cachedMetaDataMap.put(exhibitName, Compact7BitString.convertToCompact7BitString(result, sDictMD)); }
2061    //            catch(final IllegalArgumentException e) { cachedMetaDataMap.put(exhibitName, result); }
2062                }
2063    
2064            // Return the uncompressed result to save the caller a little time...
2065            return(result);
2066            }
2067    
2068        /**Computes the raw data for getCatPageExhibitMetaDataHTML() without cacheing; never null.
2069         * @return "" if there is no metadata for this exhibit
2070         */
2071        public static String getCatPageExhibitMetaDataHTMLRaw(final Name.ExhibitFull exhibitName, final AllExhibitProperties aep)
2072            {
2073            if(null == exhibitName) { throw new IllegalArgumentException(); }
2074            final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
2075            final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
2076    
2077            final Node metadata = (epc == null) ? null : epc.getMetadata();
2078            final AccessionData accessionMetadata = epl.getAccessionMetadata();
2079            if((metadata != null) || (accessionMetadata != null))
2080                {
2081                final StringBuilder sb = new StringBuilder(2048);
2082                if(metadata != null)
2083                    { sb.append(TextUtils.toXML(metadata, true, true)); }
2084                if(accessionMetadata != null)
2085                    { sb.append(TextUtils.toXML(accessionMetadata.getAsDOM(), true, true)); }
2086                return(sb.toString());
2087                }
2088    
2089            // No metadata at all.
2090            return("");
2091            }
2092    
2093        /**As for getCatPageExhibitMetaDataHTML(), but will compute a missing value asynchronously; never null.
2094         * If the value is already computed then it is available immediately,
2095         * else this attempts to spin off task compute the value,
2096         * and get() will block until the value is ready/computed.
2097         * <p>
2098         * If the target thread pool is full the computation will be done synchronously,
2099         * ie in this thread blocking this call until complete.
2100         * <p>
2101         * The toString() method should be called on the result to get the String text.
2102         */
2103        @SuppressWarnings("unchecked")
2104        public static Future<?> getCatPageExhibitMetaDataHTMLFuture(
2105                                                 final DataSourceBean dsb,
2106                                                 final Name.ExhibitFull exhibitName)
2107            {
2108            // Try first to return any extant cached value without blocking.
2109            // If present, we need not start any thread at all.
2110            final CacheMiniMap<Name.ExhibitFull,Object> cachedMetaDataMap = (CacheMiniMap<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey);
2111            if(null != cachedMetaDataMap)
2112                {
2113                final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2114                // Return already-finished Future with value, if any.
2115                if(null != cachedMetaData)
2116                    { return(ThreadUtils.makeCompletedFuture(cachedMetaData)); }
2117                }
2118    
2119            // If we did not find a cached value,
2120            // then set up the task to compute the value asynchronously.
2121            final Callable<?> callable = new Callable<Object>(){
2122                public final Object call() throws Exception
2123                    { return(getCatPageExhibitMetaDataHTML(dsb, exhibitName)); }
2124                };
2125            // Start the (CPU-intensive) thread immediately.
2126            final Future<?> result = ThreadUtils.computeIntensiveThreadPool.submit(callable);
2127    
2128            // Return the handle for the caller to retrieve the value...
2129            return(result);
2130            }
2131    
2132        /**Hyphenate long HTML text (that contains zero or more `-' characters and little or no whitespace).
2133         * Replaces hyphens ('-') with spaces to allow a browser to wrap the text.
2134         * <p>
2135         * Usually used with exhibit names or fragments of such names.
2136         */
2137        public static final String hyphenateHTMLText(final String s)
2138            {
2139            return(s.replace('-', ' '));
2140            }
2141    
2142        /**LRU thread-safe private cache mapping from exhibit type and Accept header to acceptability of that MIME type for inlining in XHTML mobile text.
2143         * We assume that the Accept headers will be more or less constant for a given device,
2144         * and probably constant between instances of the device,
2145         * so we have enough entries to cover the likely different <em>types</em> of device
2146         * using the Gallery at any one time.
2147         * <p>
2148         * We take care to avoid using huge Accept values in keys to avoid DoS-style issues.
2149         * <p>
2150         * We're prepared to discard this entirely when short of memory.
2151         * <p>
2152         * TODO: consider some form of incremental auto-expiry even when not full since keys can be relatively large (eg something like SimpleLRUMap + AutoExpirable)
2153         */
2154        private static final CacheMiniMap<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean> _inlineableInXHTML =
2155            SimpleProbabilisticCache.<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean>create(Math.max(32, (int)(Runtime.getRuntime().totalMemory() >> 20)), "_inlineableInXHTML");
2156    
2157        /**Returns true if the given MIME-type can always be inlined in an XHTML (mobile) page.
2158         * If the type argument is null, this returns false.
2159         * <p>
2160         * This always allows JPEG and GIF,
2161         * but may also allow other (image) types listed in an incoming "Accept" header.
2162         */
2163        public static boolean canInlineInXHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType,
2164                                                   final HttpServletRequest request)
2165            {
2166            if(exhibitType == null) { return(false); }
2167    
2168            // Allow commonly-supported GIF (89a) and JPEG types always.
2169            switch(exhibitType.type)
2170                {
2171                case ExhibitMIME.ET_JPEG:
2172                case ExhibitMIME.ET_GIF: // Should really check GIF version.
2173                    return(true);
2174                }
2175            // No header to analyse, so cannot allow more than the basic types.
2176            if(request == null) { return(false); }
2177    
2178            // We are prepared to test for a limited selection of other inlineable types.
2179            switch(exhibitType.type)
2180                {
2181                case ExhibitMIME.ET_PNG:
2182                case ExhibitMIME.ET_SWF:
2183                case ExhibitMIME.ET_BMP:
2184                // TODO: WBMP, SVG?
2185                    {
2186                    // Look for any Accept headers.
2187                    final Enumeration<?> headers = request.getHeaders("Accept");
2188                    // No Accept headers to analyse, so cannot allow more than the basic types.
2189                    if(headers == null) { return(false); }
2190    
2191                    // Allow for possibility of multiple Accept headers (probably rare though).
2192                    while(headers.hasMoreElements())
2193                        {
2194                        final String h = (String) headers.nextElement();
2195                        // We refuse to process gigantic header values at all
2196                        // to preserve performance and to avoid DoS-style problems.
2197                        if(h.length() >= 1024) { continue; }
2198    
2199                        // Make cache lookup key and attempt cache lookup.
2200                        final Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String> key =
2201                            new Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>(exhibitType, h);
2202                        final Boolean cachedResult = _inlineableInXHTML.get(key);
2203                        if(Boolean.TRUE.equals(cachedResult)) { return(true); }
2204                        if(Boolean.FALSE.equals(cachedResult)) { continue; /* Try other headers. */ }
2205    if(IsDebug.isDebug) { System.out.println("INFO: new XHTML Accept header '"+TextUtils.sanitiseForXML(h, 256, true)+"' from User-Agent: "+TextUtils.sanitiseForXML(request.getHeader("User-Agent"), 256, true)); }
2206    
2207                        // Do explicit search though header for specified MIME type.
2208                        for(final String t : h.split(","))
2209                            {
2210                            // Allow for direct MIME-type match,
2211                            // or with trailing q factor, eg "image/png;q=0.9",
2212                            // or just a generic catch-all */*.
2213                            final String trimmed = t.trim(); // Remove padding whitespace.
2214                            if(trimmed.equals("*/*") ||
2215                               trimmed.equals(exhibitType.mimeType) ||
2216                               (trimmed.startsWith(exhibitType.mimeType) && trimmed.substring(exhibitType.mimeType.length()).startsWith(";")))
2217                                {
2218                                _inlineableInXHTML.put(key, Boolean.TRUE); // Cache success.
2219                                return(true);
2220                                }
2221                            }
2222    
2223                        // Cache failure (against this header).
2224                        _inlineableInXHTML.put(key, Boolean.FALSE);
2225                        }
2226                    }
2227                }
2228            return(false);
2229            }
2230    
2231        /**Returns true if the given MIME-type can always be inlined in an HTML page.
2232         * If the argument is null, this returns false.
2233         */
2234        public static boolean canInlineInHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType)
2235            {
2236            if(exhibitType == null) { return(false); }
2237            switch(exhibitType.type)
2238                {
2239                case ExhibitMIME.ET_JPEG:
2240                case ExhibitMIME.ET_GIF:
2241                case ExhibitMIME.ET_PNG: // Most HTML browsers will accept PNG now.
2242                case ExhibitMIME.ET_SWF: // Most browsers have a Flash plug-in now.
2243                case ExhibitMIME.ET_BMP: // Embedded BMP should be widely supported.
2244                case ExhibitMIME.ET_HTMLFRAG:
2245                    return(true);
2246                }
2247            return(false);
2248            }
2249    
2250        /**Private key used by getCategoryTreeFilterBean(); never null. */
2251        private static final DataSourceBean.AEPLinkedKey _getCategoryTreeFilterBeanKey = new DataSourceBean.AEPLinkedKey("_getCategoryTreeFilterBeanKey");
2252    
2253        /**Get selected by-category TreeFilterBean from entire exhibit set; never null.
2254         * Used for the "by category" exhibit tree view and elsewhere.
2255         * <p>
2256         * This data is cached linked to the DSB
2257         * (which in passing ensures that it can be dropped automatically under extreme memory stress).
2258         * <p>
2259         * The category name is primarily checked for syntactic validity,
2260         * not for actual presence in the AEP.
2261         */
2262        @SuppressWarnings("unchecked")
2263        public static TreeFilterBean getCategoryTreeFilterBean(final DataSourceBean dsb,
2264                                                               final CharSequence category)
2265            {
2266            if((dsb == null) || !ExhibitName.validNameInitialComponentSyntax(category))
2267                { throw new IllegalArgumentException(); }
2268    
2269            // Map from category name to TreeFilerBean.
2270            // Created as necessary on first use (after AEP change).
2271            ConcurrentMap<String,TreeFilterBean> trees;
2272            while(null == (trees = (ConcurrentMap<String,TreeFilterBean>) dsb.getAEPLinkedValue(_getCategoryTreeFilterBeanKey)))
2273                { dsb.putIfAbsentAEPLinkedValue(_getCategoryTreeFilterBeanKey, new ConcurrentHashMap<String,TreeFilterBean>()); }
2274    
2275            // Atomically ensure that the right tree filter bean for category exists, else create it.
2276            TreeFilterBean tfb;
2277            final String categoryAsString = category.toString();
2278            while(null == (tfb = trees.get(categoryAsString)))
2279                {
2280                // Create the filter bean.
2281                tfb = new TreeFilterBean();
2282                // Give it a name for improved diagnostics.
2283                tfb.setName("category:"+categoryAsString);
2284                // Although this is assumed to be expensive to (re)compute,
2285                // allow it to be dumped unless there's loads of memory free right now
2286                // AND this is a multiprocessor (ie, assumed 'big') host.
2287                // In an unstressed system this may hang around indefinitely
2288                // providing quick access to the pages based on them.
2289                // Note that being DSB-linked allows this to be dumped under extreme memory stress anyway.
2290                tfb.setMemorySensitiveCache((ThreadUtils.AVAILABLE_PROCESSORS == 1) || !MemoryTools.lotsFree());
2291                // Set the filter for the right category.
2292                tfb.setExpr(new FilterExpr(null, new BuiltInFilters.filtByCategory(new String[]{categoryAsString})));
2293                // Store the new bean in the cache, iff no one else got there first.
2294                trees.putIfAbsent(MemoryTools.intern(categoryAsString), tfb);
2295                }
2296    
2297            return(tfb);
2298            }
2299    
2300        /**Flags for User-Agent pattern matching checking for mobile phones. */
2301        private static final int MOBILE_REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
2302    
2303    //    /**Mobile-browser detection regex 1 c/o detectmobilebrowser.com 2010/06/30. */
2304    //    private static final Pattern MOBILE_REGEX_DMB_1_20100630 = Pattern.compile(".*(android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*", MOBILE_REGEX_FLAGS);
2305    //
2306    //    /**Mobile-browser detection regex 2 c/o detectmobilebrowser.com 2010/06/30. */
2307    //    private static final Pattern MOBILE_REGEX_DMB_2_20100630 = Pattern.compile("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", MOBILE_REGEX_FLAGS);
2308    
2309        /**Mobile-browser detection regex 1 c/o detectmobilebrowser.com 2010/08/12. */
2310        private static final Pattern MOBILE_REGEX_DMB_1_20110812 = Pattern.compile(".*(android.+mobile|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*", MOBILE_REGEX_FLAGS);
2311    
2312        /**Mobile-browser detection regex 2 c/o detectmobilebrowser.com 2010/08/12. */
2313        private static final Pattern MOBILE_REGEX_DMB_2_20110812 = Pattern.compile("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", MOBILE_REGEX_FLAGS);
2314    
2315        /**Return true if client appears likely to be a mobile device (browser sniffing).
2316         * This attempts to detect a small-display, CPU- and bandwidth- constrained device,
2317         * that might benefit from being sent small XHTML pages rather than standard HTML.
2318         * <p>
2319         * This examines the User-Agent and is unlikely to be completely reliable.
2320         * <p>
2321         * Thanks for the regex to http://detectmobilebrowser.com/
2322         */
2323        public static boolean isBrowserOnMobileDevice(final HttpServletRequest request)
2324            {
2325            final String ua = request.getHeader("User-Agent");
2326            if(null == ua) { return(false); } // No UA so assume not a mobile device.
2327            return(MOBILE_REGEX_DMB_1_20110812.matcher(ua).matches() ||
2328                   MOBILE_REGEX_DMB_2_20110812.matcher(ua.substring(0,4)).matches());
2329            }
2330    
2331    
2332        /**If true then allow some "sneaky" browser concurrency.
2333         * For browsers that do not always pipeline by default
2334         * but do allow fetches from different-named hosts in parallel
2335         * (eg the biggies such as IE6/IE7, FF1/1.5/2 as of 2006Q4),
2336         * then we can possibly help throughput by fetching some images (etc)
2337         * from a different name or the literal IP address for this same host.
2338         * (The literal IP address has the benefit of needing no DNS lookup.)
2339         * <p>
2340         * The trick is that where the user has arrived at the site
2341         * with a name other than the local mirror name (or literal IP),
2342         * and the URI that we were going to use was a rrURL (root-relative URL),
2343         * then we can prepend the local mirror name/IP to make a new absolute URL
2344         * that the browser may be prepared to fetch from concurrently.
2345         * <p>
2346         * This has to be done completely consistently for any given item,
2347         * eg a static "page-furniture" image, to avoid defeating cacheing.
2348         */
2349        public static final boolean ALLOW_SNEAKY_HTTP_CONCURRENCY = true;
2350    
2351        /**Iff true, do sneaky concurrency with a literal IP address.
2352         * This is instead of using the local mirror name,
2353         * and avoids any extra DNS lookup by the client,
2354         * and can be used even when the client is visiting a mirror explicitly.
2355         */
2356        private static final boolean LITERAL_IP_SNEAKY_HTTP_CONCURRENCY = true;
2357    
2358        /**Request attribute to cache getOptionalSneakyConcurrencyRRURLPrefix() response.
2359         * Useful if sneaky concurrency is attempted several times in one response.
2360         * If a non-null value is stored against the attribute, it's used.
2361         */
2362        private static final String SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE = "org.hd.pg2k.SNEAKY.CACHE";
2363    
2364        /**Get optional prefix for rrURL for extra "sneaky" browser concurrency.
2365         * This can only apply if:
2366         * <ul>
2367         * <li>ALLOW_SNEAKY_HTTP_CONCURRENCY is true.
2368         * <li>This host has a mirror prefix,
2369         *     ie so that there is an "alternate" name/IP for this host.
2370         * <li>The incoming request hostname is not this local mirror hostname.
2371         * <li>We do recognise the form (a vhost for) the URL in the request.
2372         * </ul>
2373         * else this routine always returns "".
2374         * <p>
2375         * This basically only works if the user is using a "generic" URL,
2376         * but that is more likely to be a machine far away (ie with large RTT),
2377         * so extra concurrency to try to overcome latency is especially helpful.
2378         * <p>
2379         * This slightly inflates the HTML that the client will see
2380         * but only if using a generic URL.
2381         * <p>
2382         * May inflate the number of concurrent connections back to this host,
2383         * but usually only by 1 or 2 at most.
2384         * <p>
2385         * Note that this scheme <em>does not</em> rely on any other host being up,
2386         * nor having exactly the same content as us.
2387         * <p>
2388         * This <em>is not</em> a technique for distributing load.
2389         *
2390         * @return "" or the http://mirror-... name (with no trailing slash) for this host
2391         */
2392        public static String getOptionalSneakyConcurrencyRRURLPrefix(final HttpServletRequest request)
2393            {
2394            if(!ALLOW_SNEAKY_HTTP_CONCURRENCY)
2395                { return(""); /* The default, ie no prefix. */}
2396    
2397            final String cached = (String) request.getAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE);
2398            if(null != cached) { return(cached); }
2399    
2400            final String mirrorTag = LocalProps.getMirrorTag();
2401            if(null == mirrorTag)
2402                {
2403                request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2404                return(""); /* No reliable client-visible alternate hostname for us. */
2405                }
2406    
2407            // If we don't understand the hostname in the query then don't add the prefix.
2408            // (This also covers the case where the hostname is already an IP literal.)
2409            final String serverName = request.getServerName();
2410            // Our preferred form of the hostname is all-lower-case.
2411            final String serverNameLC = serverName.toLowerCase();
2412            final VirtualHosts.VirtualHost vHost =
2413                VirtualHosts.getVirtualHostDetails(serverNameLC, null);
2414            if(null == vHost)
2415                {
2416                request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2417                return(""); /* Don't recognise the name in this request or already IP literal. */
2418                }
2419    
2420            final String localMirrorName = HostUtils.makeMirrorNameGeneric(mirrorTag);
2421    
2422            final String sneakyName;
2423            if(LITERAL_IP_SNEAKY_HTTP_CONCURRENCY)
2424                {
2425                try {
2426                    // TODO: allow IPv6 addresses too.
2427                    // Attempt non-blocking fast lookup at the cost of losing sneaky concurrency for this page.
2428                    final InetAddress[] v4Addresses = AddrTools.lookupARecords(localMirrorName, true);
2429                    // Always use first IP address returned, assuming that all are public/routable...
2430                    sneakyName = v4Addresses[0].getHostAddress();
2431                    }
2432                catch(final Exception e)
2433                    {
2434                    request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2435                    return(""); /* Cannot find an address for us. */
2436                    }
2437                }
2438            else
2439                {
2440                if(localMirrorName.equals(request.getServerName()))
2441                    {
2442                    request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2443                    return(""); /* Client is already using the mirror/alternate hostname for us. */
2444                    }
2445                sneakyName = localMirrorName;
2446                }
2447    
2448            // Return suitable prefix to make rrURL into absolute URL.
2449            final String sneakyPrefix = "http://" + sneakyName;
2450            request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, sneakyPrefix);
2451            return(sneakyPrefix);
2452            }
2453    
2454        /**Prefix of all user testimonials in the global properties. */
2455        private static final String TEST_PNAME_PREFIX = "org.hd.org.pg2k.testimonial.";
2456    
2457        /**Logically immutable cached lists of testimonal Strings by language (not locale); never null.
2458         * Private to getUserTestimonial().
2459         * <p>
2460         * No null keys, not null/empty values.
2461         * <p>
2462         * Small fixed size.
2463         * <p>
2464         * Thread-safe (and supporting reasonable concurrency if possible).
2465         */
2466        private static final Map<String, List<String>> _gUT_cache = new ConcurrentHashMap<String, List<String>>();
2467    
2468        /**Get short user quote/testimonial at random from those available for the given locale; never null but may be "".
2469         * This finds a quote, if any, suitable for the current locale
2470         * (infact, currently just the language is matched),
2471         * picked randomly from those available, or "" if none is available.
2472         * <p>
2473         * The text is HTML/XML safe, and is pure 7-bit printable ASCII,
2474         * with any non-ASCII characters encoded as HTML/XML entities.
2475         *
2476         * @param l  the required locale; never null
2477         */
2478        public static String getUserTestimonial(final LocaleBeanBase l)
2479            {
2480            if(l == null) { throw new IllegalArgumentException(); }
2481    
2482            // If the cache is completely empty then try to populate it now.
2483            // Other calls while we are builing the cache are safe
2484            // but may see empty entries until we are done.
2485            if(_gUT_cache.isEmpty())
2486                {
2487                try
2488                    {
2489                    // We build a map of by-language lists,
2490                    // which we then wrap up as immutable and post in the cache map (atomically).
2491                    final Map<String, ArrayList<String>> m = new HashMap<String, ArrayList<String>>();
2492                    final ResourceBundle gp = ResourceBundle.getBundle(CoreConsts.GLOBAL_PROPS_NAME);
2493                    // Common prefix for all testimonial properties.
2494                    final int prefixLength = TEST_PNAME_PREFIX.length();
2495                    // Regex pattern to match the tail of each testimonial property.
2496                    final Pattern p = Pattern.compile("^[a-z][a-z][.].*");
2497                    // Search the global properties...
2498                    for(final String key : gp.keySet())
2499                        {
2500                        // Skip entries not of interest to us.
2501                        if(!key.startsWith(TEST_PNAME_PREFIX)) { continue; }
2502                        // We expect to find 2 lower-case letters and then another dot
2503                        // and then a unique number/ID (which we don't use).
2504                        final String tail = key.substring(prefixLength);
2505                        if(!p.matcher(tail).matches())
2506                            {
2507                            System.err.println("WARNING: bad testimonial property name prefix "+key+" in "+CoreConsts.GLOBAL_PROPS_NAME+" properties");
2508                            continue;
2509                            }
2510                        final String lang = tail.substring(0, 2); // Extract the language code...
2511                        // Add this to the end of the appropriate List, or create one if need be.
2512                        ArrayList<String> list = m.get(lang);
2513                        if(list == null)
2514                            {
2515                            list = new ArrayList<String>();
2516                            m.put(lang, list);
2517                            }
2518                        list.add(gp.getString(key));
2519                        }
2520                    // Copy immutable versions of the by-language lists into the cache.
2521                    for(final String lang : m.keySet())
2522                        {
2523                        final ArrayList<String> arrayList = m.get(lang);
2524                        arrayList.trimToSize(); // Save space since this won't ever be expanded...
2525                        _gUT_cache.put(lang, Collections.unmodifiableList(arrayList)); // Atomically update cache.
2526                        }
2527                    }
2528                catch(final Throwable t)
2529                    {
2530                    System.err.println("ERROR: problem retrieving testimonial texts.");
2531                    t.printStackTrace();
2532                    return(""); // Nothing will be ready yet...
2533                    }
2534                }
2535    
2536            // Retrieve all quotes for the specified language.
2537            final List<String> quotes = _gUT_cache.get(l.getLocale().getLanguage());
2538            if(quotes != null)
2539                {
2540                assert(!quotes.isEmpty()); // Should be no empty lists.
2541                final int qSize = quotes.size();
2542                if(qSize == 1) { return(quotes.get(0)); }
2543                return(quotes.get(Rnd.fastRnd.nextInt(qSize)));
2544                }
2545    
2546            return(""); // Nothing available.
2547            }
2548    
2549    
2550        /**Private key used by getTrialData(); never null. */
2551        private static final AEPLinkedKey trailDataCacheKey = new AEPLinkedKey("trailDataCacheKey");
2552    
2553        /**Get the TrailData for a given trail exhibit (by full name); null if none.
2554         * This caches the result in the DSB, linked to the AEP,
2555         * so never retaining data for expired trails.
2556         * <p>
2557         * (This may negatively cache failure to load TrailData (for a while) for efficiency.)
2558         * <p>
2559         * Safe to apply to an arbitrary/unvetted exhibit name, even an invalid/null value.
2560         */
2561        @SuppressWarnings("unchecked")
2562        public static TrailData getTrailData(final DataSourceBean dsb,
2563                                             final Name.ExhibitFull trailExhibitFullName)
2564            {
2565            if((dsb == null) || (trailExhibitFullName == null)) { return(null); }
2566    
2567            // Get cache (or atomically create on first access, eg after an AEP change).
2568            ConcurrentMap<Name.ExhibitFull,TrailData> cache;
2569            while(null == (cache = (ConcurrentMap<Name.ExhibitFull,TrailData>) dsb.getAEPLinkedValue(trailDataCacheKey)))
2570                { dsb.putIfAbsentAEPLinkedValue(trailDataCacheKey, new ConcurrentHashMap<Name.ExhibitFull,TrailData>()); }
2571    
2572            // Return value immediately if already cached...
2573            final TrailData cachedValue = cache.get(trailExhibitFullName);
2574            if(cachedValue != null) { return(cachedValue); }
2575    
2576    //        // If the exhibit name is invalid (eg from bogus Web/unsafe input)
2577    //        // then don't both even trying to compute a value to save time and risk.
2578    //        if(!ExhibitName.validNameFinalComponentSyntax(trailExhibitName)) { return(null); }
2579    
2580            try
2581                {
2582                // Compute and cache value if possible.
2583                final TrailData value = TrailData.readTrailFromExhibit(dsb,
2584                        trailExhibitFullName, dsb.getLogger());
2585                if(value != null) { cache.putIfAbsent(trailExhibitFullName, value); }
2586                return(value);
2587                }
2588            catch(final Exception e)
2589                {
2590                // Report but absorb any error to avoid it propagating upwards...
2591                e.printStackTrace();
2592                return(null);
2593                }
2594            }
2595    
2596    
2597        /**Private key used by approxWordCount(); never null. */
2598        private static final AEPLinkedKey _awc_CacheKey = new AEPLinkedKey("_awc_CacheKey");
2599    
2600        /**Compute (crude) estimate of words in catalogue page for given exhibit; non-negative.
2601         * This is designed to be reasonably fast, though not necessarily amazingly accurate,
2602         * and is intended to help decide how many ad blocks a page may reasonably support.
2603         * <p>
2604         * This may cache its results against the AEP instance.
2605         * <p>
2606         * This counts 'non-furniture' words, ie those originating from the data itself,
2607         * including the exhibit name, exhibit description, tree AKA/description, etc,
2608         * with different constituents possibly weighted differently.
2609         * <p>
2610         * For simplicity, this does its computations based on the default site language,
2611         * even if there may be significant variation in apparent word count
2612         * for other localisations.
2613         *
2614         * @return zero in case of difficulty (eg exhibit does not exist),
2615         *     else approximate (positive) word count
2616         */
2617        public static final int approxWordCount(final DataSourceBean dsb,
2618                                                final Name.ExhibitFull fullExhibitName)
2619            {
2620            if((dsb == null) || (fullExhibitName == null))
2621                { throw new IllegalArgumentException(); }
2622    
2623            // Get existing (thread-safe) cache, or atomically create it if necessary.
2624            // The cache is a size-limited thread-safe Map with reasonably-fast get()
2625            // from full exhibit name to (positive) word count.
2626            // Races here may result in some wasted work but no errors.
2627            // Cap relative to heap size: about 8k (2^13) entries for 1GB (2^30) heap.
2628            MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer> cachedCounts;
2629            while((cachedCounts = GenUtils.<MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer>>cast(dsb.getAEPLinkedValue(_awc_CacheKey))) == null)
2630                { dsb.putIfAbsentAEPLinkedValue(_awc_CacheKey, SimpleProbabilisticCache.<Name.ExhibitFull,Integer>create(Math.max(128, (int) (Runtime.getRuntime().totalMemory() >>> 17)), _awc_CacheKey.comment)); }
2631            // Return the cached word count, if present.
2632            final Integer cachedCount = cachedCounts.get(fullExhibitName);
2633            if(cachedCount != null) { return(cachedCount.intValue()); }
2634    
2635            // Get a default-locale LocaleBean
2636            // to extract the descriptive/AKA text with.
2637            final LocaleBeanBase lb = new LocaleBean();
2638    
2639            // Count words in any extant per-exhibit description text.
2640            final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2641    
2642            // Return (uncached) zero for non-extant exhibit.
2643            if(null == aep.aeid.getStaticAttr(fullExhibitName))
2644                { return(0); }
2645    
2646            // Running word count for this exhibit.
2647            int wordCount = 0;
2648    
2649            // Count words in its name main component, including modifiers.
2650            wordCount += ExhibitName.getMainWordsCount(fullExhibitName, Collections.<String>emptySet());
2651            assert(wordCount > 0);
2652    
2653            final ExhibitPropsLoadable epl = aep
2654                    .getExhibitPropsLoadable(fullExhibitName);
2655            final String description = epl.getDescription();
2656            // Use the slow-but-sensible (eg for line-end handling) tokeniser.
2657            if(description != null)
2658                { wordCount += TextUtils.quickWordCount(description); }
2659    
2660            // Get AKA/treedesc text with as little markup as we can easily manage.
2661            final String akaText = GenUtils.getLocalisedTreeDesc(aep,
2662                    fullExhibitName, lb, true, true, false, false).toString();
2663            // Only count real spaces as word boundaries
2664            // and halve the raw word count to allow for markup, repetition, etc.
2665            // FIXME: use find() or somesuch to avoid redundant construction of substrings.
2666            if(!akaText.isEmpty())
2667                { wordCount += (TextUtils.quickWordCount(akaText) >>> 1); }
2668    
2669            // Add (at low weighting) word count of section text.
2670            final CharSequence sectionDesc = GenUtils.getLocalisedSectionDesc(aep,
2671                    ExhibitName.getCategoryComponent(fullExhibitName),
2672                    lb);
2673            // Only count real spaces as word boundaries
2674            // and quarter the raw word count to allow for markup and lack of uniqueness
2675            // ie the fact that descriptive text is shared between related exhibits.
2676            // FIXME: use find() or somesuch to avoid redundant construction of substrings.
2677            if(sectionDesc != null)
2678                { wordCount += (TextUtils.quickWordCount(sectionDesc) >>> 2); }
2679    
2680            // Cache the (complete) approx word count for next time...
2681            cachedCounts.put(fullExhibitName, Integer.valueOf(wordCount));
2682    
2683    if(IsDebug.isDebug) { System.out.println("[Approx word count "+wordCount+" on cat page for "+fullExhibitName+".]"); }
2684            return(wordCount);
2685            }
2686    
2687        /**Events to be examined by isPopularCatalogueEntry(); never null.
2688         * These must all have VLONG data stored.
2689         */
2690        private static final List<SimpleVariableDefinition> _iPCE_vars = Arrays.asList(new SimpleVariableDefinition[] {
2691            SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
2692            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
2693            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
2694            });
2695    
2696        /**Returns true iff the named exhibit and/or catalogue page is popular (well visited/downloaded).
2697         * Uses the history to decide if a catalogue page and its exhibit
2698         * are frequently visited/downloaded
2699         * (wrt other catalogue pages globally and locally).
2700         *
2701         * @param vars  source of event history; never null
2702         * @param exhibitFullName  full exhibit name; never null
2703         * @return true if popular, false otherwise
2704         */
2705        public static boolean isPopularCatalogueEntry(final SimpleVariablePipelineIF vars,
2706                                                      final CharSequence exhibitFullName)
2707            {
2708            if((vars == null) || (exhibitFullName == null)) { throw new IllegalArgumentException(); }
2709    
2710            final String shortName = ExhibitName.getFileComponent(exhibitFullName).toString();
2711            // TODO: allow for shortened unique-key form in future...
2712    
2713            for(final SimpleVariableDefinition var : _iPCE_vars)
2714                {
2715                // Look for activity yesterday and today.
2716                final EventVariableValue eventsToday = vars.getEventValue(var, EventPeriod.VLONG, true);
2717                if(eventsToday.getRank(shortName) < (eventsToday.getTotalDistinctValues()/2))
2718                    { return(true); }
2719                final EventVariableValue eventsYesterday = vars.getEventValue(var, EventPeriod.VLONG, false);
2720                if(eventsYesterday.getRank(shortName) < (eventsYesterday.getTotalDistinctValues()/3))
2721                    { return(true); }
2722    
2723                // Now we look into the full collected history for this value.
2724                final EventVariableValue[] all = vars.getEventValues(var, EventPeriod.VLONG, 0, null);
2725                // If no "all" history at all or not enough to be significant
2726                // then the item may just not be especially popular...
2727                if(all.length < 1) { continue; }
2728                final EventVariableValue allEVV = all[0];
2729                if(allEVV == null) { continue; }
2730                if(allEVV.getRank(shortName) < (allEVV.getTotalDistinctValues()/4))
2731                    { return(true); }
2732                }
2733    
2734            return(false); // Not popular apparently.
2735            }
2736    
2737    
2738        /**Private key used by findLatestCodeBundle(); never null. */
2739        private static final AEPLinkedKey findLatestCodeBundleKey = new AEPLinkedKey("findLatestCodeBundleKey");
2740    
2741        /**Name of the section/dir in which code/doc bundles are filed. */
2742        private static final String CODE_SECTION_DIR = "code";
2743    
2744        /**Returns full exhibit name for latest version of a code bundle, or null if none.
2745         * This locates the latest (with a major-minor-micro versioning) bundle
2746         * in the 'code' section, for the given prefix, or null of none.
2747         * <p>
2748         * The author and extension are ignored for selection purposes.
2749         * <p>
2750         * For example, for the prefix/argument 'javadoc',
2751         * if the code section includes the files
2752         * 'javadoc-1-2-3-DHD.zip' and javadoc-1-10-1-ANON.zip'
2753         * this will return 'code/javadoc-1-10-1-ANON.zip'.
2754         * <p>
2755         * To be found a bundled archive name must be exactly of the form:<br />
2756         * <code>prefix-major-minor-micro-AUTH.XTN</code><br />
2757         * where the major, minor and micro components are (small, non-negative) integers.
2758         * There must be no attributes present.
2759         * <p>
2760         * (Note that a '-' is appended to the supplied prefix.)
2761         * <p>
2762         * This may cache the results against the AEP instance,
2763         * since the lookup may happen may times
2764         * and we may have to search through a fair amount of data for each lookup.
2765         * Note: this does not cache negative results
2766         * in part to bound the amount of space that can be consumed.
2767         *
2768         * @param dsb  current data source; never null
2769         * @param prefix  legitimate short-name as bundle name; never null nor empty
2770         */
2771        @SuppressWarnings("unchecked")
2772        public static Name.ExhibitFull findLatestCodeBundle(final DataSourceBean dsb, final String prefix)
2773            {
2774            if(dsb == null) { throw new IllegalArgumentException(); }
2775            if((prefix == null) || (prefix.length() < 1)) { throw new IllegalArgumentException(); }
2776    
2777            final ServletContext context = dsb.getServletContext();
2778            if(context == null) { throw new IllegalStateException(); }
2779    
2780            // Look up the prefix in our (thread-safe) cache from prefix to full exhibit name.
2781            // This is AEP-linked, so is automatically discarded when a new AEP appears.
2782            ConcurrentMap<String,Name.ExhibitFull> cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
2783            Name.ExhibitFull result = null;
2784            if(cache != null)
2785                {
2786                result = cache.get(prefix);
2787                if(result != null) { return(result); /* Return cached result! */ }
2788                }
2789    
2790            // Do the lookup.
2791            // First quickly filter for only plausible candidates
2792            // with the correct prefix and in the correct section.
2793            final String fullNamePrefix = CODE_SECTION_DIR + '/';
2794            final String shortNamePrefix = prefix + "-";
2795            final AllExhibitProperties aep= dsb.getAllExhibitProperties(-1);
2796            final Name.ExhibitFull candidates[] = aep.select(new AEPFilter() {
2797                /* (non-Javadoc)
2798                 * @see org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter#accept(org.hd.d.pg2k.svrCore.AllExhibitProperties, java.lang.String)
2799                 */
2800                //@Override
2801                public boolean accept(final AllExhibitProperties aep, final Name.ExhibitFull fullExhibitName)
2802                    {
2803                    // Quickly filter for the correct section.
2804    //                if(!TextUtils.contentEquals(ExhibitName.getCategoryComponent(fullExhibitName), CODE_SECTION_DIR)) { return(false); }
2805                    if(!TextUtils.startsWith(fullExhibitName, fullNamePrefix)) { return(false); }
2806                    final Name.ExhibitShort shortName = fullExhibitName.getShortName();
2807                    // Quickly filter for the correct bundle name.
2808                    if(!TextUtils.startsWith(shortName, shortNamePrefix)) { return(false); }
2809                    // OK, we can check more thoroughly later..
2810                    return(true);
2811                    }
2812                }, null, 0);
2813            if(candidates.length == 0)
2814                {
2815    context.log("WARNING: no candidate bundles with prefix "+prefix);
2816                return(null); /* No candidates... */
2817                }
2818    
2819            // Now find the highest-versioned syntactically-valid candidate, if any...
2820            int bestMajor = -1;
2821            int bestMinor = -1;
2822            int bestMicro = -1;
2823            for(final Name.ExhibitFull c : candidates)
2824                {
2825                final Name.ExhibitShort shortName = c.getShortName();
2826                assert(TextUtils.startsWith(shortName, shortNamePrefix)) : "should have the correct bundle name";
2827                // Rip off prefix and tokenise remainder (and ignore trailing AUTH.XTN part as last token).
2828                final int snpl = shortNamePrefix.length();
2829                final String[] tokens = shortName.subSequence(snpl, shortName.length()).toString().split(ExhibitName.WORD_SEPS);
2830                if(tokens.length != 4)
2831                    {
2832    context.log("WARNING: badly-named (wrong token count "+tokens.length+") candidate bundle for prefix "+prefix+": "+c);
2833                    continue; /* Invalid format. */
2834                    }
2835                try
2836                    {
2837                    final int maj = Integer.parseInt(tokens[0], 10);
2838                    assert(maj >= 0);
2839                    final int min = Integer.parseInt(tokens[1], 10);
2840                    assert(min >= 0);
2841                    final int mic = Integer.parseInt(tokens[2], 10);
2842                    assert(mic >= 0);
2843                    if(maj < bestMajor) { continue; /* Too old. */ }
2844                    if(maj > bestMajor)
2845                        { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
2846                    if(min < bestMinor) { continue; /* Too old. */ }
2847                    if(min > bestMinor)
2848                        { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
2849                    if(mic < bestMicro) { continue; /* Too old. */ }
2850                    if(mic > bestMicro)
2851                        { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
2852                    }
2853                catch(final NumberFormatException e)
2854                    {
2855    context.log("WARNING: badly-named (NumberFormatException + "+e.getMessage()+") candidate bundle for prefix "+prefix+": "+c);
2856                    continue; /* Invalid format. */
2857                    }
2858                }
2859    
2860            // If the result is positive then cache it,
2861            // creating a new (thread-safe) cache if necessary.
2862            // Negative results are not cached.
2863            if(result != null)
2864                {
2865                while(cache == null)
2866                    {
2867                    dsb.putIfAbsentAEPLinkedValue(findLatestCodeBundleKey, new ConcurrentHashMap<String, Name.ExhibitFull>());
2868                    // Retrieve whatever the current cache now is (there may have been a race).
2869                    cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
2870                    }
2871                // Update the cached value if not already done...
2872                cache.putIfAbsent(prefix, result);
2873    
2874    if(IsDebug.isDebug) { context.log("INFO: found bundle for prefix "+prefix+" as "+result); }
2875                }
2876    
2877    else { context.log("WARNING: found no bundle for prefix "+prefix); }
2878    
2879            // Return the result!
2880            return(result);
2881            }
2882    
2883        /**Returns true (and sets SC_NOT_MODIFIED status) iff the caller should avoid sending a GET response body.
2884         * Intended to be be called by a servlet handling a GET/HEAD operation
2885         * before most headers are set or any response body is sent/commited.
2886         *
2887         * @param lastModified  last time this entity changed, or -1 if not known / not applicable
2888         * @param request  never null (unless lastModified == -1)
2889         * @param response  never null (unless lastModified == -1)
2890         * @return true if SC_NOT_MODIFIED has been set and servlet should return immediately
2891         *             without sending a body,
2892         *     false if no status set and body may still have to be sent.
2893         */
2894        public static boolean abortIfNotModifiedSince(final long lastModified,
2895                                                      final HttpServletRequest request,
2896                                                      final HttpServletResponse response)
2897            {
2898            if(lastModified != -1)
2899                {
2900                if(null == request) { throw new IllegalArgumentException(); }
2901                // Handle any If-Modified-Since if not already done by the container.
2902                final long ifModifiedSince = request.getDateHeader("If-Modified-Since");
2903                if((ifModifiedSince > -1) && (lastModified/1000 <= ifModifiedSince/1000))
2904                    {
2905                    if(null == response) { throw new IllegalArgumentException(); }
2906                    // Don't send body because content has not been modified
2907                    // (since client last requested it).
2908                    response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
2909                    return(true);
2910                    }
2911                }
2912    
2913            // Content (may have) changed; do send response body.
2914            return(false);
2915            }
2916    
2917        /**Returns true (and sets SC_NOT_MODIFIED status) iff the caller should avoid sending a GET response body.
2918         * Intended to be be called by a servlet handling a GET/HEAD operation
2919         * before most headers are set or any response body is sent/commited.
2920         *
2921         * @param eTag  valid single ETag token, strong or weak, for the page; null if not known / not applicable
2922         * @param lastModified  last time this entity changed; -1 if not known / not applicable
2923         * @param request  never null (unless eTag == null and lastModified == -1)
2924         * @param response  never null (unless eTag == null and lastModified == -1)
2925         * @return true if SC_NOT_MODIFIED has been set and servlet should return immediately
2926         *             without sending a body,
2927         *     false if no status set and body may still have to be sent.
2928         */
2929        public static boolean abortIfETagMatchOrNotModifiedSince(final String eTag,
2930                                                               final long lastModified,
2931                                                               final HttpServletRequest request,
2932                                                               final HttpServletResponse response)
2933            {
2934            if(null != eTag)
2935                {
2936                if(!eTag.endsWith("\"")) { throw new IllegalArgumentException(); }
2937                if(null == request) { throw new IllegalArgumentException(); }
2938                if(null == response) { throw new IllegalArgumentException(); }
2939                final String inm = request.getHeader("If-None-Match");
2940                if(null != inm)
2941                    {
2942                    if("*".equals(inm))
2943                        {
2944                        // Since this entity exists (else this call wouldn't be being made),
2945                        // abort the body with 'NOT MODIFIED'.
2946                        response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
2947                        return(true);
2948                        }
2949    
2950                    // Parse the tokens from the header value
2951                    // and check for any matches, weak or strong.
2952                    final BasicHeader bh = new BasicHeader("If-None-Match", inm);
2953                    for(final HeaderElement he : bh.getElements())
2954                        {
2955                        final String token = he.toString();
2956                        if(eTag.equals(token))
2957                            {
2958                            // Since one of the tokens matches our ETag
2959                            // abort the body with 'NOT MODIFIED'.
2960                            response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
2961                            return(true);
2962                            }
2963                        }
2964    
2965                    // If INM header is present then must not fall back to LM/IMS.
2966                    // Content (may have) changed; do send response body.
2967                    return(false);
2968                    }
2969                }
2970    
2971            // Fall back to use of If-Modified-Since in absence of ETag/If-None-Match.
2972            return(abortIfNotModifiedSince(lastModified, request, response));
2973            }
2974    
2975        /**Compute a suitable cache expiry time for a usually slowly-changing object (ms); non-negative.
2976         * Treats the item as if almost static in terms of rate of change
2977         * (but constrains the result to be no longer than the minimum for static objects).
2978         * <p>
2979         * Makes the cache time usually a significant multiple of
2980         * the interval between rechecks of exhibit immutable data
2981         * as this is expected to change relatively slowly.
2982         * <p>
2983         * Extend to a reasonable fraction of the underlying item's time since last change
2984         * capped to the maximum allowed for static content,
2985         * essentially replicating a common heuristic from browsers.
2986         * <p>
2987         * Increase it if the system is conserving/busy so as to reduce future server load.
2988         */
2989        public static long computeCacheMaxAgeMSFromTimestamp(
2990                final long timestamp,
2991                final ServletContext ctxt, final org.hd.d.pg2k.svrCore.props.GenProps gp)
2992            {
2993            final boolean conserve = GenUtils.mustConservePower() || WebUtils.isOverloaded(ctxt);
2994            final long basicCacheLifetime = Math.max((System.currentTimeMillis() - timestamp) >> 2,
2995                (Math.max(CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S * 1000,
2996                    gp.getWEBSVR_MIN_EX_IMATTR_RECHECK_MS()) << (conserve ? 5 : 3)));
2997            // Constrain to be never more than the minimum lifetime of static/furniture items.
2998            return(Math.min(WebConsts.MIN_STATIC_WEBITEMS_CACHE_MS, basicCacheLifetime));
2999            }
3000    
3001        /**Get "newsflash" HTML for the main site front page, or "" if none; never null.
3002         * This is retrieved from the GenProps.
3003         */
3004        public static String getNewsflashHTML(final GenProps gp)
3005            {
3006            if(gp == null) { return(""); /* Be kind to the caller. */ }
3007            final String result = gp.getGen().get(GenPropsGenNames.GEN_NEWSFLASH_HTML);
3008            if(result == null) { return(""); }
3009            return(result);
3010            }
3011    
3012    
3013    
3014        /**Target traffic cycle time for recentTrafficLowForTypicalCycle() in milliseconds; strictly positive.
3015         * This is typically a week or multiple thereof to allow for the usual major cycle in traffic flows,
3016         * and to detect weekends and holiday dips for example.
3017         */
3018        private static final int WEB_TRAFFIC_CYCLE_MS = 7 * 24 * 3600 * 1000; // 1W
3019    
3020        /**Sample interval used by WEB_TRAFFIC_CYCLE_MS; not null.
3021         * A smaller interval gives a finer-grained response at the cost of more work.
3022         * Typically MEDIUM or LONG would be used to have a response time around the hour mark.
3023         */
3024        private static final EventPeriod WEB_TRAFFIC_SAMPLE_PERIOD = EventPeriod.MEDIUM;
3025    
3026        /**Minimum number of samples to trust when deciding traffic levels; strictly positive.
3027         * A value between about 4 and whatever a whole day is (inclusive) probably makes sense.
3028         */
3029        private static final int WEB_TRAFFIC_MIN_SAMPLES = 4; // Math.max(4, (24*3600*1000)/WEB_TRAFFIC_SAMPLE_PERIOD.getIntervalMs());
3030    
3031        /**Integer number of LONG sample periods used to cover WEB_TRAFFIC_CYCLE_MS; strictly positive. */
3032        private static final int WEB_TRAFFIC_SAMPLE_PERIODS = Math.min(
3033                Math.max(WEB_TRAFFIC_CYCLE_MS / WEB_TRAFFIC_SAMPLE_PERIOD.getIntervalMs(), WEB_TRAFFIC_MIN_SAMPLES),
3034                SystemVariables.EVENT_SAMPLES_RETAINED);
3035    
3036        /**If true then ignore any leading run of (oldest) zero traffic counts to give initial faster response for new server. */
3037        private static final boolean WEB_TRAFFIC_TRIM_LEADING_ZEROS = true;
3038    
3039        /**Non-AEP-linked cache key for recentTrafficLowForTypicalCycle() result; non-null.
3040         * Stored value against key is (immutable) paid of interval number and boolean 'traffic low' flag.
3041         * This usually is usually from WEB_TRAFFIC_SAMPLE_PERIOD key,
3042         * but a more cautious shorter-term key with disjoint values can be used instead
3043         * to avoid repeated recomputation in the face of possible-transient problems such as with start-up and I/O.
3044         */
3045        private static final DataSourceBean.UnlinkedKey rTLFLC_resultKey = new DataSourceBean.UnlinkedKey("rTLFLC_resultKey");
3046    
3047        /**Returns true if the last full period or so had low traffic compared to the last larger or so.
3048         * Generally true if traffic by some metric, local or global, is in the bottom quartile,
3049         * typically for a recent hour or thereabouts (to allow a nimble response) compared to a weekly cycle.
3050         * Can be used to stabilise ad revenue for example by switching in extra ads
3051         * when traffic is low, for example at weekends.
3052         * <p>
3053         * The calculation result is cached (against the dsb) to reduce CPU effort.
3054         *
3055         * @param dsb  data source bean from which to retrieve stats; never null
3056         * @return false unless recent traffic was clearly below normal over a typical cycle
3057         */
3058        public static boolean recentTrafficLowForLatestCycle(final DataSourceBean dsb)
3059            {
3060            if(null == dsb) { throw new IllegalArgumentException(); }
3061    
3062            // DEBUG MODE ONLY: additional ability to wiggle status on or off from LocalProps.
3063            if(IsDebug.isDebug)
3064                {
3065                final String f = LocalProps.getGen().get(GenPropsGenNames.GPGEN_AD_FORCE_LOWTRAFFIC_MODE);
3066                if(f != null) { return(Boolean.parseBoolean(f)); }
3067                }
3068    
3069            // Compute current period for which result is good (and usually cached).
3070            // Data will be sampled to up to and including the previous period.
3071            final long now = System.currentTimeMillis();
3072            final long currentInterval = WEB_TRAFFIC_SAMPLE_PERIOD.getIntervalNumber(now);
3073    
3074            // Return cached value if present and valid for the current period.
3075            final Tuple.Pair<Long, Boolean> cached = (Tuple.Pair<Long, Boolean>)dsb.getUnlinkedValue(rTLFLC_resultKey);
3076            if((null != cached) && (currentInterval == cached.first.longValue()))
3077                { return(cached.second); }
3078    
3079            // Don't spend time (re)computing this if short of energy; indicate 'don't know' immediately.
3080            if(GenUtils.mustConservePowerExtreme()) { return(false); }
3081    
3082            // Try cache with short-term key (lasting a minute or so), disjoint with the main key.
3083            // We only use this for presumed-transient results only,
3084            // and if no value or an old one is present only
3085            // so as to avoid overwriting a concurrently-computed value.
3086            final long shortTermKey = now >> 17; // ~2 minute lifetime.
3087            if((null != cached) && (shortTermKey == cached.first.longValue()))
3088                { return(cached.second); }
3089            // Create result for cacheing short-term transient/holding result if necessary.
3090            final Tuple.Pair<Long, Boolean> shortTermFalse = new Tuple.Pair<Long, Boolean>(shortTermKey, false);
3091    
3092            // Need to (re)compute value.
3093            // Try to minimise time from here to cacheing a result
3094            // to minimise probablity of races where two or more threads redundantly compute the result concurrently.
3095            try
3096                {
3097    if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): computing for periods: " + WEB_TRAFFIC_SAMPLE_PERIODS); }
3098    
3099                // The metric used is the number of local catalogue page hits.
3100                // This local stat may work best for tuning ad behaviour to local needs and traffic
3101                // and may be more robust than any global estimate in the face of comms problems, etc.
3102                // Will generally require a full cycle to pass for any new mirror/instance to participate.
3103                final SimpleVariableDefinition stat = SystemVariables.GENSTATS_STRING_LOCAL_EVENT;
3104                final BitSet whichValues = new BitSet(WEB_TRAFFIC_SAMPLE_PERIODS);
3105                whichValues.set(0, WEB_TRAFFIC_SAMPLE_PERIODS);
3106                final EventVariableValue[] evvs = dsb.getEventValues(stat,
3107                        WEB_TRAFFIC_SAMPLE_PERIOD,
3108                        currentInterval-1,
3109                        whichValues);
3110                assert(null != evvs);
3111                // Get values for slot just before the current one.
3112                final EventVariableValue evvPrev;
3113                if((0 == evvs.length) || (null == (evvPrev = evvs[0])))
3114                    {
3115                    if(null != cached) { dsb.replaceUnlinkedValue(rTLFLC_resultKey, cached, shortTermFalse); } else { dsb.putIfAbsentUnlinkedValue(rTLFLC_resultKey, shortTermFalse); }
3116                    dsb.log("recentTrafficLowForLatestCycle(): no evv data for previous slot, cannot compute now, will retry");
3117                    return(false); // May be transient problem so result cached only briefly.
3118                    }
3119                final int totalPrevCount = evvPrev.getCount(ThroughputMonitorFilterPG2K.THRFNAME_HIT_CAT_PAGE);
3120    if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): prev interval count: " + totalPrevCount); }
3121                // Collect and filter, and sort, all interval counts.
3122                final ArrayList<Integer> counts = new ArrayList<Integer>(evvs.length);
3123                // Examine evvs, oldest first.
3124                for(int i = evvs.length; --i >= 0; )
3125                    {
3126                    final EventVariableValue evv = evvs[i];
3127                    if(null == evv) { continue; } // Exclude outages.
3128                    final int count = evv.getCount(ThroughputMonitorFilterPG2K.THRFNAME_HIT_CAT_PAGE);
3129                    if(WEB_TRAFFIC_TRIM_LEADING_ZEROS && (0 == count) && counts.isEmpty()) { continue; }
3130                    counts.add(count);
3131                    }
3132                final int intervalCount = counts.size();
3133                if(intervalCount < WEB_TRAFFIC_MIN_SAMPLES)
3134                    {
3135                    if(null != cached) { dsb.replaceUnlinkedValue(rTLFLC_resultKey, cached, shortTermFalse); } else { dsb.putIfAbsentUnlinkedValue(rTLFLC_resultKey, shortTermFalse); }
3136    if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): too few samples to be reliable now, will retry; got " + intervalCount + "/" + WEB_TRAFFIC_MIN_SAMPLES + " " + counts); }
3137                    return(false); // Too few samples to be reliable; may be transient problem so result cached only briefly.
3138                    }
3139    if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): counts per slot, oldest first (omitting null entries): " + counts); }
3140                Collections.sort(counts);
3141                final int index = Collections.binarySearch(counts, totalPrevCount);
3142                assert(index >= 0); // We know the totalPrevCount must be present in the array.
3143    
3144                // DEFAULT: result is true if cat page views are in the bottom quartile...
3145                int threshold = (intervalCount >> 2);
3146    
3147                // If a specific (valid) percentage has been supplied then use it.
3148                // But if GenProps still 'empty' then don't cache result for long in hope real GP value along soon.
3149                final GenProps gp = dsb.getGenProps(-1);
3150                final boolean gpNotLoadedYet = (0 == gp.timestamp);
3151                final String tpcS = gp.getGen().get(GenPropsGenNames.GPGEN_LOW_TRAFFIC_THRESHOLD_PERCENT);
3152                if(null != tpcS)
3153                    {
3154                    try
3155                        {
3156                        final int percent = Integer.parseInt(tpcS, 10);
3157                        if((percent >= 0) && (percent <= 100))
3158                            {
3159    if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): threshold percentage from GP: " + percent); }
3160                            // Measure the index against the specified percentage threshold.
3161                            threshold = (intervalCount * percent) / 100;
3162                            }
3163                        }
3164                    catch(final Exception e)
3165                        {
3166                        // Absorb/log error and continue.
3167                        dsb.log("Could not parse GP gen value "+GenPropsGenNames.GPGEN_LOW_TRAFFIC_THRESHOLD_PERCENT, e);
3168                        }
3169                    }
3170    
3171                // Compute if traffic is below threshold...
3172                final boolean result = (index < threshold);
3173    
3174                // Cache the result... (Only briefly if GenProps value not loaded yet.)
3175                dsb.putUnlinkedValue(rTLFLC_resultKey, new Tuple.Pair<Long, Boolean>(gpNotLoadedYet ? shortTermKey : currentInterval, result));
3176    /*if(IsDebug.isDebug)*/ { dsb.log("recentTrafficLowForLatestCycle(): (temp="+gpNotLoadedYet+" cached, "+(System.currentTimeMillis()-now)+"ms) result="+result+", prev interval count="+totalPrevCount+", index=" + index + "/"+(intervalCount-1)+", threshold="+threshold+", from " + counts); }
3177                return(result); // Cache result!
3178                }
3179            catch(final Exception e)
3180                {
3181                if(null != cached) { dsb.replaceUnlinkedValue(rTLFLC_resultKey, cached, shortTermFalse); } else { dsb.putIfAbsentUnlinkedValue(rTLFLC_resultKey, shortTermFalse); }
3182                dsb.log("recentTrafficLowForLatestCycle(): unexpected error trying to compute if previous period was low-traffic", e);
3183                return(false); // May be transient result cached only briefly.
3184                }
3185            }
3186        }