001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.webSvr.util;
031    
032    import java.awt.image.BufferedImage;
033    import java.awt.image.ColorModel;
034    import java.awt.image.WritableRaster;
035    import java.io.IOException;
036    import java.io.InputStream;
037    import java.lang.ref.SoftReference;
038    import java.lang.ref.WeakReference;
039    import java.net.InetAddress;
040    import java.net.MalformedURLException;
041    import java.net.URL;
042    import java.net.UnknownHostException;
043    import java.util.ArrayList;
044    import java.util.Arrays;
045    import java.util.BitSet;
046    import java.util.Collection;
047    import java.util.Collections;
048    import java.util.Comparator;
049    import java.util.Enumeration;
050    import java.util.HashMap;
051    import java.util.HashSet;
052    import java.util.Iterator;
053    import java.util.LinkedList;
054    import java.util.List;
055    import java.util.ListIterator;
056    import java.util.Map;
057    import java.util.ResourceBundle;
058    import java.util.Set;
059    import java.util.SortedSet;
060    import java.util.StringTokenizer;
061    import java.util.TreeSet;
062    import java.util.concurrent.Callable;
063    import java.util.concurrent.ConcurrentHashMap;
064    import java.util.concurrent.ConcurrentMap;
065    import java.util.concurrent.Future;
066    import java.util.regex.Pattern;
067    
068    import javax.servlet.ServletContext;
069    import javax.servlet.http.HttpServletRequest;
070    
071    import org.hd.d.pg2k.svrCore.AccessionData;
072    import org.hd.d.pg2k.svrCore.AllExhibitProperties;
073    import org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter;
074    import org.hd.d.pg2k.svrCore.Compact7BitString;
075    import org.hd.d.pg2k.svrCore.CoreConsts;
076    import org.hd.d.pg2k.svrCore.ExhibitName;
077    import org.hd.d.pg2k.svrCore.ExhibitPropsComputable;
078    import org.hd.d.pg2k.svrCore.ExhibitPropsComputableMutable;
079    import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
080    import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
081    import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
082    import org.hd.d.pg2k.svrCore.GenUtils;
083    import org.hd.d.pg2k.svrCore.HostUtils;
084    import org.hd.d.pg2k.svrCore.ImageUtils;
085    import org.hd.d.pg2k.svrCore.LocaleBeanBase;
086    import org.hd.d.pg2k.svrCore.MemoryTools;
087    import org.hd.d.pg2k.svrCore.Name;
088    import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
089    import org.hd.d.pg2k.svrCore.Rnd;
090    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
091    import org.hd.d.pg2k.svrCore.TextUtils;
092    import org.hd.d.pg2k.svrCore.ThreadUtils;
093    import org.hd.d.pg2k.svrCore.Tuple;
094    import org.hd.d.pg2k.svrCore.VarTools;
095    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
096    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME.ExhibitTypeParameters;
097    import org.hd.d.pg2k.svrCore.location.GeoProximity;
098    import org.hd.d.pg2k.svrCore.location.GeoUtils;
099    import org.hd.d.pg2k.svrCore.location.LoadBalancingUtils;
100    import org.hd.d.pg2k.svrCore.props.GenProps;
101    import org.hd.d.pg2k.svrCore.props.LocalProps;
102    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
103    import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
104    import org.hd.d.pg2k.svrCore.vars.InstanceID;
105    import org.hd.d.pg2k.svrCore.vars.SimpleVarStats;
106    import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
107    import org.hd.d.pg2k.svrCore.vars.SimpleVariablePipelineIF;
108    import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
109    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
110    import org.hd.d.pg2k.webSvr.catalogue.TrailData;
111    import org.hd.d.pg2k.webSvr.exhibit.BuiltInFilters;
112    import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
113    import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean.AEPLinkedKey;
114    import org.hd.d.pg2k.webSvr.exhibit.FilterExpr;
115    import org.hd.d.pg2k.webSvr.exhibit.TreeFilterBean;
116    import org.hd.d.pg2k.webSvr.virtualHosts.VirtualHosts;
117    import org.hd.d.tmf.ThroughputMonitorFilterBase;
118    import org.w3c.dom.Node;
119    
120    import ORG.hd.d.IsDebug;
121    
122    /**Web-server-related utility functions.
123     * This is for algorithms only of interest to Web apps, often JSPs.
124     * <p>
125     * One advantage of having code here rather than in-line in a JSP
126     * is that is is pre-compiled off-line for speed and robustness;
127     * code here is also easier to test.
128     */
129    public final class WebUtils
130        {
131        /**Prevent construction of an instance. */
132        private WebUtils() { }
133    
134    
135        /**Name of event/variable to which we post a voter's approximate geo location. */
136        public static final SimpleVariableDefinition VOTER_LOCATION_STATS_EVENT_DEF = SystemVariables.GENSTATS_STRING_GLOBAL_EVENT;
137    
138        /**Prefix of event value for voter's approximate geo location. */
139        public static final String VOTER_LOCATION_STATS_EVENT_PREFIX = "voterLocation=";
140    
141    
142        /**Private limited-size cache for chooseMirrorHostToBalanceLoad() for best mirror for given client IP.
143         * Map from client IP address to full mirror host name and time mirror was computed.
144         * <p>
145         * A lock can be held on this object to make multiple operations atomic.
146         * <p>
147         * Entries in this map go stale very quickly
148         * (in a matter of tens of seconds at most)
149         * and are mainly here to avoid repeated recalculations while preparing
150         * one HTML page to serve to the client for example.
151         * <p>
152         * This has a fixed maximum size to limit memory consumption
153         * and to avoid the need for explicit cleaning of old entries
154         * (they can just hang around until displaced).
155         * We vary this maximum size a little to try to avoid deadly embraces.
156         * <p>
157         * We allow this to be entirely discarded when memory is very stressed.
158         * <p>
159         * A size limit of a few thousand to allow for a few mega-proxies,
160         * and a time limit of some tens of seconds,
161         * is probably about right.
162         */
163        private static final MemoryTools.SimpleLRUMapAutoSizeForHitRate<InetAddress,Tuple.Pair<String,Long>> _cache_cMHTBL_byClient =
164                MemoryTools.SimpleLRUMapAutoSizeForHitRate.<InetAddress, Tuple.Pair<String,Long>>create(0, 1001 + Rnd.fastRnd.nextInt(601), "_cache_cMHTBL_byClient");
165    
166        /**Maximum time an entry in _cache_cMHTBL_byClient is valid in ms; strictly positive.
167         * Should be no more than the maximum staleness of the mirror liveness variables,
168         * and is mainly intended to avoid repeated recalculations during the preparation
169         * of a single HTML page for a user giving multiple different values on that one page...
170         */
171        private static final int MAX_cache_cMHTBL_byClient_AGE_MS = 1001 +
172                VarTools.MIN_AGE_MS/3;
173    
174        /**Lock to prevent multiple simultaneous polls of the main or "loopback" URLs at once.
175         * Otherwise multiple incoming requests may trigger multiple outgoing polls
176         * which actually overwhelm (or trigger throttling by) the remote machine(s).
177         */
178        private static final Object _lock_fallback_URL_poll = new Object();
179    
180        /**Fraction of time we redistribute load to possibly non-optimal mirrors; strictly positive.
181         * A higher value means that we redistribute traffic
182         * to possibly-non-optimal mirrors less often.
183         * <p>
184         * A little bit of redistributed traffic makes the system more robust
185         * and primed for action, but may give some unlucky users poor performance.
186         * <p>
187         * We make this probability lower (this value higher)
188         * as the potential (maximum) cost of a wrong choice goes up.
189         * The biggest error we can make for most user is to send them to
190         * the furthest corner of the planet when there is a same-COUNTRY server available.
191         */
192        private static final int REDIST_FRAC = GeoProximity.COUNTRY.getCloseness() + Rnd.fastRnd.nextInt(GeoProximity.COUNTRYGROUP.getCloseness());
193    
194        /**If true, always select the fastest available mirror; don't use any stochastic factor. */
195        private static final boolean ALWAYS_SELECT_FASTEST_MIRROR = false;
196    
197        /**If true, then ignore mirrors not close to the user unless we're desperate to shed load.
198         * This can take precedence over ALWAYS_SELECT_FASTEST_MIRROR if the fastest mirrors are remote.
199         */
200        private static final boolean IGNORE_REMOTE_MIRRORS_UNLESS_OVERLOADED = true;
201    
202        /**Returns true if we should ignore remote (proximity 'NONE') mirrors as candidates right now.
203         * We will generally ignore 'NONE' proximity mirrors
204         * unless we're busy or trying to conserve power or otherwise needing to reduce workload.
205         */
206        private static final boolean ignoreRemoteMirrorsNow(final ServletContext ctxt)
207            {
208            if(!IGNORE_REMOTE_MIRRORS_UNLESS_OVERLOADED) { return(false); } /* Never ignore. */
209    
210            // Desperate to shed load, thus don't ignore any otherwise-good mirrors.
211            if(GenUtils.mustConservePower()) { return(false); }
212            if(WebUtils.isOverloaded(ctxt)) { return(false); }
213    
214            return(true); // Not desperate to shed load, so can be picky about mirrors...
215            }
216    
217        /**Get name (and optional port) of server for best user experience; never null/empty.
218         * This is a hostname[:port] suitable to insert into the URL
219         * for retrieving an exhibit or another page, etc.
220         * <p>
221         * This will attempt to pick one based on a number of factors
222         * such as current load, client proximity, which mirrors are up,
223         * which mirrors are one the same AEP version as us, etc.
224         * <p>
225         * If this cannot chose a suitable server/mirror name
226         * it returns the generic "main" name, CoreConsts.MAIN_DATA_HOST.
227         * <p>
228         * Though either or both arguments can be null,
229         * this is most likely to give useful non-default answers
230         * when both values are non-null.
231         * <p>
232         * A couple of items of hardwired policy need to be pointed out:
233         * <ul>
234         * <li>If the client for this request looks as if it is from a spider
235         *     (and therefore may cache the results for a long time)
236         *     then we always return the generic main hostname for speed
237         *     and for long-term robustness.
238         * <li>If there is no request/IP address for the client
239         *     (so we could do no more than statistical load balancing at best)
240         *     then we always return the generic main hostname for speed
241         *     and in the hope that we will be doing
242         *     DNS-based load/proximity stuff sometime.
243         * </ul>
244         * <p>
245         * Though we may sometimes randomise the selected mirror to spread load around,
246         * we try to avoid egregious selections, eg on the other side of the planet.
247         *
248         * @param request  the incoming request or null if none available
249         * @param vars  access to the system variables or null if not available
250         *
251         * @return a "mirror" subdomain of CoreConsts.MAIN_DATA_HOST,
252         *     or by default CoreConsts.MAIN_DATA_HOST
253         *     thus relying on DNS-based balancing
254         */
255        public static String chooseMirrorHostToBalanceLoad(final HttpServletRequest request,
256                                                           final DataSourceBean vars)
257            {
258            // The default host name to fall back to
259            // in the absence of any available/suitable mirrors.
260            // We must not cache the fallbackHost by IP address
261            // because multiple clients may share a single IP address (eg at a large proxy)
262            // and the fallbackHost may be computed depending on client-specific features.
263            String fallbackHost = CoreConsts.MAIN_DATA_HOST;
264    
265            // Get the client's IP address if possible.
266            // If the IP address is available
267            // then see if we have a valid cached result for this exact client IP and return it immediately!
268            // This is the fastest-possible path through the routine.
269            InetAddress clientIP = null;
270            if(request != null)
271                {
272                try
273                    {
274                    clientIP = InetAddress.getByName(request.getRemoteAddr());
275                    assert(clientIP != null);
276    
277                    // Look in cache for non-stale entry for this exact client IP...
278                    // (Fall through to do normal computation if not present.)
279                    synchronized(_cache_cMHTBL_byClient)
280                        {
281                        final Tuple.Pair<String,Long> entry = _cache_cMHTBL_byClient.get(clientIP);
282                        if(entry != null)
283                            {
284                            if(entry.second.longValue() >= System.currentTimeMillis())
285                                { return(entry.first); }
286                            // Remove the useless stale entry that will otherwise keep being held onto LRU!
287                            _cache_cMHTBL_byClient.remove(clientIP);
288                            }
289                        }
290                    }
291                catch(final UnknownHostException e) { /* Cannot parse client address... */ }
292                }
293            // No request and thus no client IP address,
294            // so always return the generic/fallback hostname.
295            else
296                { return(fallbackHost); }
297    
298    
299            // If the client looks like it might cache the result for a long time
300            // then we give it the generic domain for robustness (and speed).
301            // The most obvious case is spidering of pages to go in a search engine
302            // for recall years later when specific mirrors may have come and gone.
303            if((request != null) && requestProbablyFromSpider(request))
304                { return(fallbackHost); }
305    
306    
307            // If we can extract the host from the original request URL,
308            // and it is a legitimate name/alias for the site,
309            // then make it (and any non-standard port) the fallback.
310            if(request != null)
311                {
312                try
313                    {
314                    final URL requestURL = new URL(request.getRequestURL().toString());
315                    final String ourName = requestURL.getHost();
316                    final int ourPort = requestURL.getPort();
317                    final String fullName = ((ourPort == 80) || (ourPort == -1)) ? ourName :
318                                             (ourName + ':' + ourPort);
319                    if(!fullName.equals(fallbackHost) &&
320                            (null != VirtualHosts.getVirtualHostDetails(ourName, null)))
321                        { fallbackHost = fullName; }
322                    }
323                catch(final MalformedURLException e)
324                    {
325                    e.printStackTrace(); // Shouldn't really happen...
326                    }
327                }
328    
329    //System.out.println("chooseMirrorHostToBalanceLoad(): *** START CALC *** : clientIP = " + clientIP);
330    
331    
332            // In the case that we have the client's request available
333            // and the default/canonical domain/host seems to be unavailable,
334            // then we'll use the hostname the client specified for us
335            // in its request URL as the fallback.
336            // (This also allows off-line testing of the Gallery!)
337            if((request != null) && (fallbackHost.equals(CoreConsts.MAIN_DATA_HOST)))
338                {
339                // Prevent multiple redundant polls on the main/own URLs from occuring at once,
340                // which may have the effect of ovewhelming the remote site.
341                synchronized(_lock_fallback_URL_poll)
342                    {
343                    final Boolean mainDomainIsUp =
344                        LoadBalancingUtils.testIfHTTPServerIsUp(CoreConsts.MAIN_DATA_HOST, true);
345    
346                    // Try the client's suggested hostname
347                    // if the main host is definitely down
348                    // (because otherwise we are stuck!).
349                    // Security note: we must not trust this client-supplied
350                    // information nor cache it.
351                    if(Boolean.FALSE.equals(mainDomainIsUp))
352                        {
353                        try
354                            {
355                            // If the main host is not available,
356                            // but the name under which the client arrived at this server *is*,
357                            // then make the client's name for us the fallback hostname.
358                            // We don't cache these client-supplied names and their liveness results,
359                            // since we cannot necessarily trust them and they may be unbounded in number.
360                            final URL requestURL = new URL(request.getRequestURL().toString());
361                            final String ourName = requestURL.getHost();
362                            final int ourPort = requestURL.getPort();
363                            final String fullName = ((ourPort == 80) || (ourPort == -1)) ? ourName :
364                                                     (ourName + ':' + ourPort);
365                            // Don't bother testing liveness
366                            // if the client's request hostname is already the fallback host...
367                            // Nor if this does not look like one of our valid virtual names
368                            // (to avoid pestering third-party sites by mistake/proxy).
369                            if(!fullName.equals(fallbackHost) &&
370                                    (null != VirtualHosts.getVirtualHostDetails(ourName, null)))
371                                {
372                                // Do not look these up asynchronously
373                                // nor cache the results.
374                                if(Boolean.TRUE.equals(LoadBalancingUtils.testIfHTTPServerIsUp(fullName, false)))
375                                    {
376    System.err.println("WARNING: chooseMirrorHostToBalanceLoad(): having to fall back to our host as " + fullName);
377                                    fallbackHost = fullName;
378                                    }
379                                }
380                            }
381                        catch(final IOException e) { } // Ignore/absorb.
382                        }
383                    }
384                }
385    
386    
387            // If we don't have access to variables to check for live mirrors
388            // then return the fallback host immediately...
389            // Don't cache this result,
390            // as we didn't have all information to hand that we should have had.
391            if(vars == null)
392                { return(fallbackHost); /* Cannot find a working mirror, and can't cache this result. */ }
393    
394    
395            // See what mirrors *claim* to be available...
396            // We will check in a moment if they actually are...
397            final Map<String,Long> liveMirrors = LoadBalancingUtils.getActiveMirrors(vars, true);
398            // If there are no mirrors claiming to be live
399            // then return immediately with the generic name.
400            if(liveMirrors.size() < 1)
401                { return(fallbackHost); /* Cannot find a working mirror, and can't cache this result. */ }
402    
403    if(LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): mirror bandwidths: " + liveMirrors); }
404    
405    
406            // Filter out any mirrors not using the same AEP version as us.
407            // Else the user may, for example, get a 404 on a new exhibit!
408            // TODO: consider ignoring this filter if we're desperate to reduce load.
409            try
410                {
411                final AllExhibitProperties aep = vars.getAllExhibitProperties(-1);
412                final Long ourHash = new Long(aep.longHash);
413    
414                // First collect all those mirrors whose hash is correct...
415                // Usually it will be all of them.
416                final Set<InstanceID> goodMirrorIDs = new HashSet<InstanceID>(liveMirrors.size());
417                final SimpleVariableValue hashes =
418                        vars.getVariable(SystemVariables.ThroughputMonitorFilter_AEP_LONGHASH);
419                if(hashes != null)
420                    {
421                    // Zap any stale values...
422                    final SimpleVariableValue h2 = hashes.removeAllKeysOlder(System.currentTimeMillis() - VarTools.MIN_AGE_MS);
423                    final Map<InstanceID,SimpleVariableValue> globalMap = h2.getGlobalMap();
424                    if(globalMap != null)
425                        {
426                        for(final InstanceID id : globalMap.keySet())
427                            {
428                            if(ourHash.equals(globalMap.get(id).getValue()))
429                                { goodMirrorIDs.add(id); /* Found a good mirror... */ }
430                            }
431                        }
432                    }
433    
434                // If none of the candidate mirrors has the right AEP hash
435                // then return immediately but don't cache this result.
436                if(goodMirrorIDs.size() < 1)
437                    {
438    if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): no other mirror on same AEP hash as us"); }
439                    return(fallbackHost); /* Cannot find a suitable mirror, and can't cache this result. */
440                    }
441    
442    if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): mirrors on same AEP hash as us: " + new ArrayList<InstanceID>(goodMirrorIDs)); }
443    
444                // Convert list of good IDs to list of good mirrors...
445                final SimpleVariableValue activeMirrors =
446                        vars.getVariable(SystemVariables.ThroughputMonitorFilter_ACTIVE_MIRROR_NAME);
447                final Set<String> goodMirrorTags = new HashSet<String>(goodMirrorIDs.size());
448                if(activeMirrors != null)
449                    {
450                    final Map<InstanceID,SimpleVariableValue> globalMap = activeMirrors.getGlobalMap();
451                    if(globalMap != null)
452                        {
453                        for(final InstanceID id : goodMirrorIDs)
454                            {
455                            final SimpleVariableValue svv = globalMap.get(id);
456                            if(svv != null)
457                                {
458                                final Object value = svv.getValue();
459                                if(value instanceof String)
460                                    { goodMirrorTags.add((String) value); }
461                                }
462                            }
463                        }
464                    }
465    
466                // Iterate over live mirrors, zapping any unsuitable ones.
467                for(final Iterator<String> it = liveMirrors.keySet().iterator(); it.hasNext(); )
468                    {
469                    final String mirror = it.next();
470                    if(!goodMirrorTags.contains(mirror)) { it.remove(); }
471                    }
472    
473                // If none of the candidate mirrors has the right AEP hash,
474                // then return immediately but don't cache this result.
475                if(liveMirrors.size() < 1)
476                    {
477    if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): no mirror on same AEP hash as us from candidate set"); }
478                    return(fallbackHost); /* Cannot find a suitable mirror, and can't cache this result. */
479                    }
480                }
481            catch(final Exception e)
482                {
483                // Could not filter by hash for some reason...
484                // Whinge, but continue, as this is not essential...
485                e.printStackTrace();
486                }
487    
488    
489            // Sort the mirrors into bandwidth order and drop any known to be dead
490            // (or unreachable by us).
491            final List<String> orderedList = LoadBalancingUtils.orderMirrorTagsBestBandwidthFirst(liveMirrors);
492            // If they've all been filtered out
493            // then return immediately with the fallback name.
494            if(orderedList.size() < 1)
495                { return(fallbackHost); /* Cannot find a working mirror, and can't cache this result. */ }
496    
497    
498            // If this host is a mirror
499            // and this mirror appears at all in the list of surviving candidates
500            // and the user has explicitly named this mirror alias as the hostname
501            // then return this mirror name as the preferred mirror
502            // so that the user's explicit mirror choice is respected.
503            if(request != null)
504                {
505                final String mirrorTag = LocalProps.getMirrorTag();
506                if((mirrorTag != null) && orderedList.contains(mirrorTag))
507                    {
508                    try
509                        {
510                        final URL requestURL = new URL(request.getRequestURL().toString());
511                        final String hostName = requestURL.getHost();
512                        final String mirrorName = LoadBalancingUtils.makeMirrorNameFromTag(mirrorTag);
513                        if(mirrorName.equalsIgnoreCase(hostName))
514                            { return(mirrorName); }
515                        }
516                    catch(final MalformedURLException e)
517                        { /* Ignore/absorb this error... */ }
518                    }
519                }
520    
521    
522            // Filter out any mirrors with no proximity to the user
523            // unless we are desperate to shed load
524            // in which case we will consider all legitimate/running mirrors.
525            // Note that if removing non-proximal mirrors empties the list
526            // then we use it as-was.
527            String clientLocationQuick = null; // Quick attempt to locate user.
528            if(IGNORE_REMOTE_MIRRORS_UNLESS_OVERLOADED &&
529                    (clientIP != null) &&
530                    ignoreRemoteMirrorsNow((vars == null) ? null : vars.getServletContext()))
531                {
532                clientLocationQuick = GeoUtils.getRegionByAddress(clientIP, true); // Quick lookup, minimum resources.
533    if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): considering ignoring mirrors with client proximity 'NONE' for client@"+clientLocationQuick); }
534                // Only filter out mirrors if we have extracted a fairly definite client location.
535                if(GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationQuick) ||
536                   GeoUtils.isSyntaticallyValidRegistryName(clientLocationQuick))
537                    {
538                    final List<String> toRemove = new ArrayList<String>(orderedList.size());
539    if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): ignoring mirrors with client proximity 'NONE'"); }
540                    for(final String tag : orderedList)
541                        {
542                        final GeoUtils.CCTLD mirrorCC;
543                        try { mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); }
544                        catch(final IllegalArgumentException e) { e.printStackTrace(); continue; /* FIXME: shouldn't happen... */ }
545                        // Kill this tag if no proximity (or unknown proximity) to client.
546                        if(GeoUtils.computeProximity(clientLocationQuick, mirrorCC) == GeoProximity.NONE)
547                            { toRemove.add(tag); }
548                        }
549                    // If we've selected to remove nothing or everything then leave the list intact.
550                    // In part this helps ensure that the answer may be cacheable.
551                    if(!toRemove.isEmpty() && (toRemove.size() < orderedList.size()))
552                        { orderedList.removeAll(toRemove); }
553                    }
554                }
555    
556    
557            // If we always select the fastest mirror,
558            // or if we only have one candidate,
559            // then work through the list fastest-first for speed/simplicity...
560            if(ALWAYS_SELECT_FASTEST_MIRROR || (orderedList.size() == 1))
561                {
562    //System.out.println("chooseMirrorHostToBalanceLoad(): mirrors to try in order: " + orderedList);
563    
564                // One at a time,
565                // from first (best) to worst (last)
566                // check if we know the mirror to be alive or dead,
567                // of if we have an expired cache entry,
568                // then check it explicitly and update the cache accordingly.
569                // Return the first (fastest) live mirror found.
570                final String result = LoadBalancingUtils.findFirstWorkingMirror(orderedList);
571                if(result != null)
572                    {
573                    // File the result and return it...
574                    if(clientIP != null)
575                        { _cache_cMHTBL_byClient.put(clientIP, new Tuple.Pair<String, Long>(result, new Long(System.currentTimeMillis() + MAX_cache_cMHTBL_byClient_AGE_MS))); }
576                    return(result);
577                    }
578    
579                // Cannot choose a specific mirror so return the fallback host name.
580                // If nothing else, and if this is the generic name,
581                // then this may achieve some DNS-based load balancing.
582                return(fallbackHost); // Cannot find a working mirror.
583                }
584    
585            // Compute the client's location as a country code or
586            // (less good) a region/registry
587            // if there is actually a choice of mirrors...
588            // Compute a rough-and-ready quick value,
589            // and sometimes additionally try to compute a more accurate value,
590            // but more slowly.
591            String clientLocationSlow = null;
592            if(clientIP != null)
593                {
594                // Get approximate location first with a quick lookup (if not already done)...
595                // We'll be happy if this gives a country or a region/registry name.
596                // We don't need complete accuracy on this,
597                // just a rough indication for routing.
598                if(null == clientLocationQuick)
599                    { clientLocationQuick = GeoUtils.getRegionByAddress(clientIP, true); }
600    
601                // If we haven't got a country OR region code
602                // then do a full (slow, expensive) lookup.
603                // We could skip this if all available mirrors are in one country/region.
604                if(!GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationQuick) &&
605                   !GeoUtils.isSyntaticallyValidRegistryName(clientLocationQuick))
606                    { clientLocationSlow = GeoUtils.getRegionByAddress(clientIP, false); }
607    
608    //System.out.println("chooseMirrorHostToBalanceLoad(): client location: " + clientLocation);
609                }
610    
611    
612            // If we appear not to have any sort of valid country code or region
613            // then we're not going to be able to do better
614            // than just basing our choice on the available bandwidth.
615            final boolean bandwidthOnly =
616                    (!GeoUtils.isSyntaticallyValidRegistryName(clientLocationQuick) &&
617                     !GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationQuick)) &&
618                    (!GeoUtils.isSyntaticallyValidRegistryName(clientLocationSlow) &&
619                     !GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationSlow));
620    
621            // Choose from available mirrors based on more factors than just bandwidth if possible.
622            // We include a notion of proximity (ie RTT, packet loss, reliability, etc),
623            // and stochastically spread load amongst servers in proportion to their capacity,
624            // though having discarded the worst entries so as never to give too poor a performance.
625            final Map<String,Long> weightings = new HashMap<String, Long>(1 + 2*orderedList.size());
626            // By default, retain just the entries for items still in the ordered list of candidiates.
627            for(final String tag : orderedList)
628                { weightings.put(tag, liveMirrors.get(tag)); }
629    
630            if(!bandwidthOnly)
631                {
632                // Create a new map of available bandwidth multiplied by proximity
633                // for all the remaining candidates
634                // TODO (possibly filtering out those with least-good proximity to the user)
635                // and use it to re-sort our ordered list.
636                // Compute two versions: one with our quick result and one with our slow result,
637                // and use whichever gives the highest "top" weighting.
638    
639                // The best "top" weighting that we found so far.
640                // Intially this is the bandwidth of the fastest mirror.
641                final long bestWeight = liveMirrors.get(orderedList.get(0)).longValue();
642    
643                // Compute weightings with quick lookup...
644                if(clientLocationQuick != null)
645                    {
646                    final Map<String,Long> weightingsQuick = new HashMap<String, Long>(1 + 2*orderedList.size());
647                    for(final String tag : orderedList)
648                        {
649                        final GeoUtils.CCTLD mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); // Extract country of mirror.
650                        final int proximity = GeoUtils.computeProximity(clientLocationQuick, mirrorCC).getCloseness();
651                        weightingsQuick.put(tag, new Long(liveMirrors.get(tag).longValue() * proximity));
652                        }
653                    final ArrayList<String> tmp = new ArrayList<String>(orderedList);
654                    Collections.sort(tmp, new LoadBalancingUtils.BWOrder(weightingsQuick));
655                    final long topWeight = weightingsQuick.get(tmp.get(0)).longValue();
656                    // If this is better than before, reorder the main list and save the weightings.
657                    if(topWeight > bestWeight)
658                        {
659                        Collections.sort(orderedList, new LoadBalancingUtils.BWOrder(weightingsQuick));
660                        weightings.clear();
661                        weightings.putAll(weightingsQuick);
662                        }
663                    }
664    
665                // Compute weightings with slow/accurate lookup...
666                if(clientLocationSlow != null)
667                    {
668                    final Map<String,Long> weightingsSlow = new HashMap<String, Long>(1 + 2*orderedList.size());
669                    for(final String tag : orderedList)
670                        {
671                        final GeoUtils.CCTLD mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); // Extract country of mirror.
672                        final int proximity = GeoUtils.computeProximity(clientLocationSlow, mirrorCC).getCloseness();
673                        weightingsSlow.put(tag, new Long(liveMirrors.get(tag).longValue() * proximity));
674                        }
675                    final ArrayList<String> tmp = new ArrayList<String>(orderedList);
676                    Collections.sort(tmp, new LoadBalancingUtils.BWOrder(weightingsSlow));
677                    final long topWeight = weightingsSlow.get(tmp.get(0)).longValue();
678                    // If this is better than before, reorder the main list and save the weightings.
679                    if(topWeight > bestWeight)
680                        {
681                        Collections.sort(orderedList, new LoadBalancingUtils.BWOrder(weightingsSlow));
682                        weightings.clear();
683                        weightings.putAll(weightingsSlow);
684                        }
685                    }
686    
687    //System.out.println("chooseMirrorHostToBalanceLoad(): mirror weightings: " + weightings);
688                }
689    
690    
691            // Sometimes, perturb our list so that the first item in the list
692            // will be picked in proportion to its weighting
693            // (it will be swapped from its usual rank with the top item)
694            // and all other entries will be left as they are.
695            // This should distribute some load to all mirrors.
696            // We don't do this all the time because generally
697            // we want to direct requests to the best (closest*fastest) mirror.
698            // We don't promote/select a host with no proximity to the user.
699            long totalWeight = 0;
700            for(final Long w : weightings.values())
701                { totalWeight += w.longValue(); }
702            if((totalWeight > Integer.MAX_VALUE) || (totalWeight < weightings.size()))
703                { System.err.println("WARNING: weightings overflowed/underflowed in chooseMirrorHostToBalanceLoad(): " + totalWeight); }
704            else if(Rnd.fastRnd.nextInt(REDIST_FRAC) == 0) // Sometimes perturb...
705                {
706                // Pick the mirror to promote...
707                int r = Rnd.fastRnd.nextInt((int) totalWeight);
708                for(int i = 0; i < orderedList.size(); ++i)
709                    {
710                    final String tag = orderedList.get(i);
711                    final int w = weightings.get(tag).intValue();
712                    if(r < w)
713                        {
714                        if(i != 0)
715                            {
716                            final GeoUtils.CCTLD mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); // Extract country of mirror.
717                            final GeoProximity qProx = (clientLocationQuick == null) ? GeoProximity.NONE :
718                                GeoUtils.computeProximity(clientLocationQuick, mirrorCC);
719                            final GeoProximity sProx = (clientLocationSlow == null) ? GeoProximity.NONE :
720                                GeoUtils.computeProximity(clientLocationSlow, mirrorCC);
721                            // If we can't be fairly sure that this mirror isn't a long way from the user
722                            // then skip over this entry hoping that a later one might be OK.
723                            if((qProx == GeoProximity.NONE) && (sProx == GeoProximity.NONE))
724                                { continue; }
725    
726                            // This is the one we have selected to promote,
727                            // and it isn't the top slot anyway,
728                            // so swap it into position 0,
729                            // and stop.
730                            final String tmp = orderedList.get(0);
731                            orderedList.set(0, tag);
732                            orderedList.set(i, tmp);
733    if(LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): perturbed mirror list to: " + orderedList + " at probability " + (w / (float) totalWeight)); }
734                            }
735                        break;
736                        }
737    
738                    r -= w;
739                    }
740                }
741    
742    
743            // Go through the (possibly-reordered) mirror tag list in order,
744            // using the first available working mirror we find
745            // else drop back to the fallback host.
746            final String result = LoadBalancingUtils.findFirstWorkingMirror(orderedList);
747            if(result != null)
748                {
749    if(LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): *** BALANCING CHOICE MADE *** |clientIP|locationQuick|locationSlow|mirror| = |" + clientIP + "|" + clientLocationQuick + "|" + clientLocationSlow + "|" + result + "|"); }
750    
751                // File the result and return it...
752                if(clientIP != null)
753                    { _cache_cMHTBL_byClient.put(clientIP, new Tuple.Pair<String, Long>(result, new Long(System.currentTimeMillis() + MAX_cache_cMHTBL_byClient_AGE_MS))); }
754                return(result);
755                }
756    
757            // Cannot choose a specific mirror so return the fallback host name.
758            // If nothing else, and if this is the generic name,
759            // then this may achieve some DNS-based load balancing.
760            return(fallbackHost);
761            }
762    
763        /**Handler for exhibit voting; holds no strong references to anything important. */
764        private static final class VoteHandler extends StatsSink.AbstractStatsListener
765            {
766            private final ExhibitFull exhibitFullName;
767            private final WeakReference<SimpleVariablePipelineIF> varsWR;
768            private final String dpID;
769            private final InetAddress voterIPAddr;
770            private final long expireBy;
771    
772            private VoteHandler(
773                    final String uniqueDataPointID,
774                    final long expireBy,
775                    final ExhibitFull exhibitFullName,
776                    final WeakReference<SimpleVariablePipelineIF> varsWR,
777                    final InetAddress voterIPAddr)
778                {
779                super(uniqueDataPointID, expireBy);
780                this.exhibitFullName = exhibitFullName;
781                this.varsWR = varsWR;
782                dpID = uniqueDataPointID;
783                this.voterIPAddr = voterIPAddr;
784                this.expireBy = expireBy;
785                }
786    
787            @Override public final String handle(final Map<String, String[]> parameters)
788                {
789                // When we've done, redirect back to the exhibit page...
790                // Stick a random parameter value on the end to ensure that
791                // all common/broken browsers (eg IE6, FF1) reload the page.
792                final String result = WebUtils.makeCatPageRRURL(exhibitFullName, WebConsts.F_secondary_generated_HTML_suffix) + "?rnd="+(Rnd.fastRnd.nextLong() >>> 1);
793    
794                // If the pipeline has gone away then return immediately...
795                final SimpleVariablePipelineIF vars = varsWR.get();
796                if(null == vars) { return(result); }
797    
798                if(parameters != null)
799                    {
800                    // Allow for GET or POST plain/image style (.x, .y) values.
801                    final boolean votePro =
802                        (null != parameters.get(VOTE_PRO_PARAM_NAME)) ||
803                        (null != parameters.get(VOTE_PRO_PARAM_NAME + ".x"));
804                    final boolean voteCon =
805                        (null != parameters.get(VOTE_CON_PARAM_NAME)) ||
806                        (null != parameters.get(VOTE_CON_PARAM_NAME + ".x"));
807                    if(votePro != voteCon) // Exactly one selected...
808                        {
809                        try
810                            {
811                            // Post vote event value to correct event stream...
812    /*if(IsDebug.isDebug)*/ { System.out.println("***VOTE registered (for="+votePro+"): " + exhibitFullName + " by " + dpID); }
813                            vars.setVariable(new SimpleVariableValue(
814                                    (votePro ? SystemVariables.VOTE_PRO : SystemVariables.VOTE_CON),
815                                    exhibitFullName.getShortName().toString()));
816    
817                            if(voterIPAddr != null)
818                                {
819                                // Note location of voter (as ccTLD or region).
820                                // We are prepared to spend some time on this
821                                // as voting is rare and significant
822                                // and we've probably already collected/cached
823                                // any required data.
824                                final String location = GeoUtils.getRegionByAddress(voterIPAddr, false);
825                                vars.setVariable(new SimpleVariableValue(
826                                    VOTER_LOCATION_STATS_EVENT_DEF,
827                                        VOTER_LOCATION_STATS_EVENT_PREFIX + location));
828                                }
829                            }
830                        catch(final IOException e)
831                            { e.printStackTrace();  /* Just absorb errors. */ }
832                        }
833                    }
834    
835                // Compute time before next vote will be requested
836                // if a user does take this opportunity to vote.
837                // A relatively long time after we expect the original voting opportunity to expire
838                // to act as a second-level screen against spiders
839                // and to avoid pestering a human voter too often.
840                // (We also make this time more unpredictable with a good random source.)
841                final long nextVote = expireBy + 11*WebConsts.VOTE_MIN_REQUEST_GAP_MS +
842                        Rnd.goodRnd.nextInt(7*WebConsts.VOTE_MIN_REQUEST_GAP_MS);
843    
844                // Install a dummy handler to postpone the next time that this user gets asked to vote.
845                // Assumes that the extant listener is removed *before* a call to handle()
846                // so that this new listener will not be removed on return.
847                StatsSink.addListenerForDataPoint(new StatsSink.AbstractStatsListener(dpID, nextVote){
848                    /**Dummy handler that should never in fact be invoked. */
849                    @Override public final String handle(final Map<String, String[]> parameters) { return(result); }
850                    });
851    
852                return(result);
853                }
854            }
855    
856    
857        /**Simple class to allow logging to the given servlet's log().
858         * This holds only a WeakReference to the ServletContext
859         * so as not to obstruct GC when all strong refs go away.
860         * <p>
861         * Stops logging when the referent becomes null.
862         */
863        public static final class ServletLogger implements SimpleLoggerIF
864            {
865            public ServletLogger(final ServletContext ctxt)
866                {
867                if(ctxt == null) { throw new IllegalArgumentException(); }
868                ctxtWR = new WeakReference<ServletContext>(ctxt);
869                }
870    
871            /**Weak ref to servlet context; never null but the referent may be. */
872            private volatile WeakReference<ServletContext> ctxtWR;
873    
874            /**Log the given message.
875             * If the weak reference to the context has died
876             * the log output is silently discarded.
877             */
878            public void log(final String message)
879                {
880                final ServletContext context = ctxtWR.get();
881                if(null == context) { return; }
882                context.log(message);
883                }
884            }
885    
886    
887        /**Simple class to allow logging to a given servlet's log() or System.out if none available.
888         * This allows a logger to be created at instance scope
889         * for (say) a Filter, and set with a context when the config is set
890         * and cleared when one is not available,
891         * all the while remaining a valid logger.
892         * <p>
893         * This holds only a WeakReference to the ServletContext
894         * so as not to obstruct GC when all strong refs go away.
895         */
896        public static final class ServletLoggerWithFallback implements SimpleLoggerIF
897            {
898            /**Weak ref to servlet context; may be null or the referent may be. */
899            private volatile WeakReference<ServletContext> ctxtWR;
900    
901            /**Set context, or clear/remove it if null. */
902            public void setContext(final ServletContext context)
903                {
904                if(null == context) { ctxtWR = null; }
905                else { ctxtWR = new WeakReference<ServletContext>(context); }
906                }
907    
908            /**Log the given message.
909             * Logs to the servlet context logger if available,
910             * else logs to System.out.
911             */
912            public void log(final String message)
913                {
914                final  WeakReference<ServletContext> wr = ctxtWR;
915                final ServletContext context = (null == wr) ? null : wr.get();
916                if(context != null) { context.log(message); }
917                else { System.out.println(message); }
918                }
919            }
920    
921    
922        /**System variables tried, in order, for a "popular" exhibit; private to getPopularExhibit().
923         * We put the download var first to get a decent rate of update
924         * since this value changes quite frequently.
925         * <p />
926         * We don't put the vote var first so as to reduce the temptation
927         * to "throw" a vote to get an exhibit shown on the front page.
928         */
929        private static final SimpleVariableDefinition _gPE_vars[] = {
930            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
931            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
932            SystemVariables.VOTE_PRO,
933            SystemVariables.ACCESSPATTERN_CLICKTHROUGH,
934            SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
935            };
936    
937    //    /**Gets "popular" exhibit, possibly filtered by type; null if none available.
938    //     * Tries to pick a "popular" exhibit by looking at one recently voted for,
939    //     * or downloaded, etc, in the system variables,
940    //     * and that has both thumbnails available where they are possible.
941    //     * <p>
942    //     * This rejects exhibits with a below-par (negative) rating.
943    //     * <p>
944    //     * Using the system variables should mean that this can pick up
945    //     * values set from any mirror, etc, fairly quickly.
946    //     * <p>
947    //     * This cannot guarantee to return a non-null value,
948    //     * but any value that it does return is a current, valid exhibit.
949    //     *
950    //     * @param dsb  handle on the system variables and data; never null
951    //     * @param type  if not null only exhibits of this type are candidates
952    //     * @deprecated Use {@link #getPopularExhibit(DataSourceBean,ExhibitMIME.ExhibitTypeParameters,Collection)} instead
953    //     */
954    //    @Deprecated
955    //    public static String getPopularExhibit(final DataSourceBean dsb,
956    //                                           final ExhibitMIME.ExhibitTypeParameters type)
957    //        { return getPopularExhibit(dsb, type, null); }
958    
959        /**Gets name of "popular" exhibit, possibly filtered by type; null if none available.
960         * Tries to pick a "popular" exhibit by looking at one recently voted for,
961         * or downloaded, etc, in the system variables,
962         * and that has both thumbnails available where they are possible.
963         * <p>
964         * This rejects exhibits with a below-par (negative) rating.
965         * <p>
966         * Using the system variables should mean that this can pick up
967         * values set from any mirror, etc, fairly quickly.
968         * <p>
969         * This cannot guarantee to return a non-null value,
970         * but any value that it does return is a current, valid exhibit.
971         *
972         * @param dsb  handle on the system variables and data; never null
973         * @param type  if not null only exhibits of this type are candidates
974         * @param excludeFullNames  if non-null, any exhibits included by full name
975         *     are not candidates to be returned
976         * @param beQuick  if true then don't spend too long trying to calculate this
977         *     but instead give up quickly if need be
978         *     (so as not to block page generation for example)
979         */
980        public static Name.ExhibitFull getPopularExhibit(final DataSourceBean dsb,
981                                                         final ExhibitMIME.ExhibitTypeParameters type,
982                                                         final Collection<String> excludeFullNames,
983                                                         final boolean beQuick)
984            {
985            if(dsb == null) { throw new IllegalArgumentException(); }
986    
987            final long start = System.currentTimeMillis();
988    
989            for(final SimpleVariableDefinition def : _gPE_vars)
990                {
991                assert(def != null);
992    
993                // Half the time skip a "local" definition
994                // so as to get to see global popular items in the mix.
995                if(def.isLocal() && Rnd.fastRnd.nextBoolean())
996                    { continue; }
997    
998                try
999                    {
1000                    final SimpleVariableValue svv = dsb.getVariable(def);
1001                    if(svv == null) { continue; }
1002    
1003                    // Specified variable must be of String type.
1004                    assert(def.getType() == SimpleVariableDefinition.TYPE_STRING);
1005    
1006                    final String s = (String) svv.getValue();
1007                    if(s == null) { continue; }
1008    
1009                    // Skip any null values.
1010                    if(null == s) { continue; }
1011    
1012                    final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
1013                    final Name.ExhibitFull fullName = aep.aeid.getFullName(s);
1014    
1015                    // If this is in the exclusion list then skip it...
1016                    if((excludeFullNames != null) && excludeFullNames.contains(fullName)) { continue; }
1017    
1018                    // Seems not to be a valid/extant exhibit, so give up...
1019                    if(fullName == null) { continue; }
1020                    final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(fullName);
1021                    if(esa == null) { continue; }
1022    
1023                    // If the type was specified and does not match, give up!
1024                    final ExhibitTypeParameters actualType = (ExhibitMIME.getInputFileType(esa.getCharSequence()));
1025                    if(actualType == null)
1026                        { continue; /* Reject untyped exhibit. */ }
1027                    if((type != null) && !type.equals(actualType))
1028                        { continue; /* Reject wrong-type exhibit. */ }
1029    
1030                    // Skip if this exhibit may be "sensitive" somehow.
1031                    final GenProps gp = dsb.getGenProps(-1);
1032                    if(GenUtils.isSensitive(fullName, gp)) { continue; }
1033    
1034                    // If this exhibit type supports thumbnails
1035                    // then reject anything without both immediately available.
1036                    if(actualType.canPossiblyCreateThumbnailOfSameMIMEType())
1037                        {
1038                        final ExhibitThumbnails thumbnails = dsb.getThumbnails(fullName, false);
1039                        if((thumbnails == null) ||
1040                           (thumbnails.getSmall() == null) ||
1041                           (thumbnails.getStandard() == null))
1042                            { continue; /* Reject this. */ }
1043                        }
1044    
1045                    // We allow use of a stale (and ignore a not-yet-computed) rating so as to be quick.
1046                    final ExhibitPropsComputableMutable ePCM =
1047                        aep.getExhibitPropsComputableMutable(fullName);
1048                    // If we don't actually know (absent/stale rating) how good this exhibit is
1049                    // then attempt to find out for next time in the background
1050                    // unless the system is overloaded or (temporarily) conserving energy.
1051                    if((ePCM == null) || ePCM.isStale())
1052                        {
1053                        if(!GenUtils.mustConservePower() && !WebUtils.isOverloaded(dsb.getServletContext()))
1054                            {
1055                            // Use 'discardable' task pool to ensure that we don't block.
1056                            ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable() {
1057                                /**Force full non-stale recomputation of EPCM of not-apparently-unpopular exhibit. */
1058                                public void run() { aep.getExhibitPropsComputableMutable(fullName, false, gp, dsb, dsb.getScorerCache()); }
1059                                });
1060                            }
1061                        }
1062                    // Reject/skip anything with a definite below-par (non-positive) rating.
1063                    if((ePCM != null) && (ePCM.getGoodness() <= 0)) { continue; }
1064    
1065                    return(fullName); // Got one!
1066                    }
1067                catch(final IOException e)
1068                    {
1069                    // Silently ignore a probably-transient problem...
1070                    }
1071    
1072                // If urged to be quick by our caller
1073                // then abort if we've already taken too long trying
1074                // (a significant fraction of allowed page-generation time).
1075                if(beQuick && ((System.currentTimeMillis() - start) > WebConsts.MAX_PG_DOWNLOAD_MS/2))
1076                    { break; }
1077                }
1078    
1079            return(null); // Nothing found...
1080            }
1081    
1082        /**If true then check for bots/spiders by UA (User-Agent); note that clients can forge their UA. */
1083        private static final boolean CHECK_FOR_SPIDERS_BY_UA = true;
1084    
1085        /**Immutable Set of known spider/bot UA strings; should probably be moved to a text/properties file.
1086         * This is a set of lower-cased first (space/tab/bracket-delimited) words
1087         * from the UA strings.
1088         * <p>
1089         * The names consist only of non-regex-metacharacters in the set [a-z'_-],
1090         * so are safe to embed in a regex.
1091         */
1092    //    @SuppressWarnings("unchecked")
1093        private static final Set<String> spiderUAName1stWordsLC = (!CHECK_FOR_SPIDERS_BY_UA) ? Collections.<String>emptySet() :
1094            Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(new String[]{
1095            "", /* Empty UA string... */
1096            "-",
1097            "alexibot",
1098            "appie",
1099            "aqua_products",
1100            "asterias",
1101            "b2w",
1102            "baiduspider",
1103            "backdoorbot",
1104            "becomebot",
1105            "blowfish",
1106            "bookmark",
1107            "botalot",
1108            "builtbottough",
1109            "bullseye",
1110            "bunnyslippers",
1111            "cheesebot",
1112            "cherrypicker",
1113            "cherrypickerelite",
1114            "cherrypickerse",
1115            "copernic",
1116            "copyrightcheck",
1117            "cosmos",
1118            "crescent",
1119            "curl",
1120            "dittospyder",
1121            "dumbot",
1122            "emailcollector",
1123            "emailsiphon",
1124            "emailwolf",
1125            "enterprise_search",
1126            "erocrawler",
1127            "extractorpro",
1128            "fairad",
1129            "faxobot",
1130            "findlinks",
1131            "flaming",
1132            "foobot",
1133            "freefind",
1134            "gaisbot",
1135            "getright",
1136            "gigabot",
1137            "googlebot-image",
1138            "grub",
1139            "grub-client",
1140            "harvest",
1141            "hatena",
1142            "hloader",
1143            "http",
1144            "httplib",
1145            "humanlinks",
1146            "ia_archiver",
1147            "indy",
1148            "infonavirobot",
1149            "iron33",
1150            "jennybot",
1151            "jetbot",
1152            "kalooga",
1153            "kenjin",
1154            "keyword",
1155            "larbin",
1156            "lexibot",
1157            "libweb",
1158            "libwww-perl",
1159            "linkextractorpro",
1160            "linkscan",
1161            "linkwalker",
1162            "lnspiderguy",
1163            "looksmart",
1164            "lwp-trivial",
1165            "lynx",
1166            "mata",
1167            "miixpc",
1168            "mister",
1169            "moget",
1170            "msiecrawler",
1171            "msnbot",
1172            "naver",
1173            "netants",
1174            "netmechanic",
1175            "nicerspro",
1176            "nutch",
1177            "offline",
1178            "omniexplorer_bot",
1179            "openbot",
1180            "openfind",
1181            "oracle",
1182            "perman",
1183            "port",
1184            "propowerbot",
1185            "prowebwalker",
1186            "psbot",
1187            "python-urllib",
1188            "queryn",
1189            "radiation",
1190            "repomonkey",
1191            "rma",
1192            "searchpreview",
1193            "sitesnagger",
1194            "sootle",
1195            "spankbot",
1196            "spanner",
1197            "stanford",
1198            "suzuran",
1199            "szukacz",
1200            "teleport",
1201            "teleportpro",
1202            "telesoft",
1203            "thenomad",
1204            "tocrawl",
1205            "true_robot",
1206            "turingos",
1207            "url",
1208            "url_spider_pro",
1209            "urly",
1210            "vci",
1211            "wbdbot",
1212            "webauto",
1213            "webbandit",
1214            "webcopier",
1215            "webenhancer",
1216            "websauger",
1217            "website",
1218            "webster",
1219            "webstripper",
1220            "webvac",
1221            "webzip",
1222            "wget",
1223            "www-collector-e",
1224            "xenu's",
1225            "yahooseeker",
1226            "zeus",
1227            })));
1228    
1229        /**Set of characters taken as a main-part terminator in a User-Agent header, including whitespace.
1230         * This should be usable as the separator arg to StringTokenizer,
1231         * and in a regex when wrapped in "[]" square brackets.
1232         */
1233        private static final String UA_TERMINATOR_CHARS = " \t/(:";
1234    
1235        /**Extra case-insensitive patterns matched in UA names, "|"-separated, or null if none.
1236         * Essentially, anything alphanumeric-ish ending in "bot", or
1237         * anything alphanumeric-ish containing "spider".
1238         */
1239        private static final String UA_BOT_PATTERNS = "([a-z0-9._-]*bot)|([a-z0-9._-]*spider[a-z0-9._-]*)";
1240    
1241        /**Case-insensitive regex match for all non-empty UA names from spiderUAName1stWordsLC; null if not checking UA names.
1242         * Made public to enable some unit testing.
1243         */
1244        public static final Pattern UA_REGEX;
1245        /**Initialise UA_REGEX. */
1246        static
1247            {
1248            if(!CHECK_FOR_SPIDERS_BY_UA) { UA_REGEX = null; }
1249            else
1250                {
1251                final StringBuilder sb = new StringBuilder(32 + (spiderUAName1stWordsLC.size()<<4));
1252                // First build the simple compound regex name1|name2|...
1253                // possibly with some additional common patterns.
1254                if(null != UA_BOT_PATTERNS) { sb.append(UA_BOT_PATTERNS); }
1255                for(final String s : spiderUAName1stWordsLC)
1256                    {
1257                    if((s == null) || (s.length() == 0)) { continue; }
1258                    if(sb.length() > 0) { sb.append('|'); }
1259                    sb.append(s);
1260                    }
1261                // Now wrap and add terminator...
1262                sb.insert(0, "^(");
1263                sb.append(")[").append(UA_TERMINATOR_CHARS).append("]?.*$");
1264                // Now compile the regex...
1265                UA_REGEX = Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
1266                }
1267            }
1268    
1269        /**Name of Boolean attribute in request we cache result of requestProbablyFromSpider() by. */
1270        private static final String _rPFS_CACHE_PNAME = "org.hd.pg2k._rPFS_CACHE";
1271    
1272        /**LRU cache from (common) whole UAs to "bot"ness to save some repeated/slow String parsing; never null when checking for bots by UA.
1273         * We're prepared to discard all of this under memory stress
1274         * as we only have to work this out at most once per request for example.
1275         */
1276        private static final MemoryTools.SimpleProbabilisticCache<String,Boolean> _isBot_cache = !CHECK_FOR_SPIDERS_BY_UA ? null :
1277            MemoryTools.SimpleProbabilisticCache.<String,Boolean>create(512, "_isBot_cache");
1278    
1279        /**Attempt to determine quickly if the current request is probably from a spider/bot (ie not a human).
1280         * NB: This does not attempt to distinguish between good spiders (ie bona fide SEs)
1281         * and bad/broken/rude bots/scrapers/spiders.
1282         * <p>
1283         * This tries to distinguish human from non-human,
1284         * at least in part to conserve (prime interactive) resources for humans,
1285         * and does not claim to be perfect.
1286         * <p>
1287         * This returns true if there is no (valid) referring page
1288         * (and this visitor is not new to the site, ie has recently pulled another page),
1289         * though some browsers/firewalls may routinely block this info,
1290         * and some referrals, eg to target="_blank", may show no Referer value.
1291         * <p>
1292         * This should be quick as most or all requests may be tested with this.
1293         * <p>
1294         * TODO: Should expand this to test against well-known IP addresses.
1295         * <p>
1296         * TODO: Should include a behavioural element, eg mindless, rapid, pauseless browsing.
1297         *
1298         * @param request  the incoming request; never null
1299         *
1300         * @return true if the requesting client is probably a bot, false if probably a human
1301         */
1302        public static boolean requestProbablyFromSpider(final HttpServletRequest request)
1303            {
1304            // If we already worked this out, return result already computed/cached!
1305            final Object cachedResult = request.getAttribute(_rPFS_CACHE_PNAME);
1306            if(cachedResult instanceof Boolean)
1307                { return(((Boolean) cachedResult).booleanValue()); }
1308    
1309            // First check for lack of a valid "Referer" header
1310            // AND the the client not being 'new' to this site.
1311            if((null == request.getAttribute(ThroughputMonitorFilterBase.REQ_ATTR_NAME_NEW_CLIENT_FLAG)) &&
1312               "".equals(requestProbablyReferredFromExternalSite(request)))
1313                {
1314    if(IsDebug.isDebug) { System.out.println("[Client has no/unparsable Referer and is not new: probably a spider/robot.]"); }
1315                request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1316                return(true);
1317                }
1318    
1319            // Now check for a known bot User-Agent...
1320            if(CHECK_FOR_SPIDERS_BY_UA)
1321                {
1322                final Enumeration<?> aeEn = request.getHeaders("User-Agent");
1323                final boolean hasNoUA = (aeEn == null) || !aeEn.hasMoreElements();
1324                if(hasNoUA)
1325                    {
1326    if(IsDebug.isDebug) { System.out.println("[Client has no UA: assumed to be a spider/robot.]"); }
1327                    request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1328                    return(true); /* Rude/suspicious. */
1329                    }
1330                else
1331                    {
1332                    // Check all such UA headers if more than one for some reason...
1333                    while(aeEn.hasMoreElements())
1334                        {
1335                        final String wholeUA = (String) aeEn.nextElement();
1336    
1337                        final int lenUA = wholeUA.length();
1338                        if((lenUA < 2) || (lenUA > 512))
1339                            {
1340                            // Strange-sized UA is suspicious...
1341                            // And we don't want our cache/memory full of huge UA strings.
1342    if(IsDebug.isDebug) { System.out.println("[Client has empty/tiny/huge (ie rude/broken/silly/dangerous) UA: assumed to be a spider/robot.]"); }
1343                            request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1344                            return(true);
1345                            }
1346    
1347    //final long t1 = System.nanoTime();
1348                        final boolean isBot;
1349                        final Boolean b = _isBot_cache.get(wholeUA);
1350                        if(b != null) { isBot = b.booleanValue(); }
1351                        else
1352                            {
1353                            isBot = UA_REGEX.matcher(wholeUA).matches();
1354                            // Cache match result for this whole User-Agent value.
1355                            _isBot_cache.put(wholeUA, isBot ? Boolean.TRUE : Boolean.FALSE);
1356    
1357                            // When we see a (new-ish) bot UA then log it and where it came from...
1358                            if(isBot) { System.out.println("[INFO: Probable bot/spider UA seen (full UA='"+TextUtils.sanitiseForXML(wholeUA, 256, false)+"'); client IP: "+request.getRemoteAddr()+".]"); }
1359                            }
1360    //final long t2 = System.nanoTime(); System.out.println("[UA match time: "+(t2-t1)+"ns]");
1361    
1362                        if(isBot)
1363                            {
1364                            request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1365                            return(true);
1366                            }
1367                        }
1368                    }
1369                }
1370    
1371            // TODO: check known spider/bot IP addresses...
1372    
1373            request.setAttribute(_rPFS_CACHE_PNAME, Boolean.FALSE);
1374            return(false); // Probably human...
1375            }
1376    
1377        /**Attempt to detect if the current request has been referred from an external Web site.
1378         * This simply checks if the "Referer" is apparently from any of our
1379         * sites or their aliases.
1380         * <p>
1381         * Note that since this info can be forged,
1382         * or disabled/knobbled for security reasons,
1383         * this is not completely reliable.
1384         * <p>
1385         * A missing "Referer" will be taken as indicating an "external" reference,
1386         * and will catch most well-behaved spiders as a result.
1387         * <p>
1388         * Since we may have to do some string parsing this may not be very fast,
1389         * but it should not be very slow either.
1390         * <p>
1391         * Even if there is more than one "Referer" header we will only look at one.
1392         *
1393         * @param request  the incoming request; never null
1394         *
1395         * @return  null if referred from one of our sites/hosts,
1396         *          "" if unparsable or no referring URL,
1397         *          else normalised (lowercase, stripped of common prefixes, etc)
1398         *          referring host's name
1399         */
1400        public static String requestProbablyReferredFromExternalSite(final HttpServletRequest request)
1401            {
1402            final Enumeration<?> rEn = request.getHeaders("Referer");
1403            final boolean noRef = ((rEn == null) || !(rEn.hasMoreElements())); // No Referer...
1404            if(noRef) { return(""); }
1405    
1406            // Get the referring URL...
1407            final String ref = (String) rEn.nextElement();
1408    
1409            try
1410                {
1411                // Attempt to parse it for the hostname.
1412                // If we fail, ie ref is ill-formed, then count this as "external".
1413                final URL url = new URL(ref);
1414    
1415                // If we can't find virtual host details for the host
1416                // then assume that this is an external host.
1417                final String rawHostName = url.getHost();
1418                final String normalisedHostName = HostUtils.normaliseVirtualHostName(rawHostName);
1419                if(null == VirtualHosts.getVirtualHostDetails(normalisedHostName, null))
1420                    {
1421                    final String ourName = request.getServerName();
1422                    if((null != ourName) &&
1423                            normalisedHostName.equals(HostUtils.normaliseVirtualHostName(ourName)))
1424                        {
1425                        return(null); // Our host name, even if not expected one, so treat as OK.
1426                        }
1427                    return(normalisedHostName); // OK, got the referring host, and it's not us.
1428                    }
1429                return(null); // This was apparently referred by us, so is OK.
1430                }
1431            catch(final Exception e)
1432                {
1433                return(""); /* Cannot parse referrer, so treat as if external. */
1434                }
1435            }
1436    
1437        /**Generate full URL for exhibit given exhibit name; never null.
1438         * This may take account of such factors as which servers
1439         * are up, how heavily loaded, etc,
1440         * in order to perform automatic load balancing
1441         * and give the best possible user experience.
1442         * <p>
1443         * In order to do this well we may need the request details,
1444         * so they should be supplied if possible.
1445         * These should be the client's request to a Gallery site,
1446         * else null.
1447         */
1448        public static URL makeExhibitURL(final CharSequence exhibitName,
1449                                         final HttpServletRequest request,
1450                                         final DataSourceBean vars)
1451            throws MalformedURLException
1452            {
1453            return(new URL("http://" +
1454                           chooseMirrorHostToBalanceLoad(request, vars) +
1455                           makeExhibitRRURL(exhibitName)));
1456            }
1457    
1458        /**Generate root-relative URL for exhibit given exhibit name; never null.
1459         */
1460        public static String makeExhibitRRURL(final CharSequence exhibitName)
1461            //throws MalformedURLException
1462            {
1463            return("/" + WebConsts.BASE_PATH_EXHIBITS + "/" + exhibitName);
1464            }
1465    
1466        /**Generate full URL for thumbnail/sample given exhibit name and standard/small selector; never null.
1467         */
1468        public static URL makeThumbnailURL(final CharSequence exhibitName, final boolean std)
1469            throws MalformedURLException
1470            {
1471            return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeThumbnailRRURL(exhibitName, std)));
1472            }
1473    
1474        /**Generate root-relative URL for thumbnail/sample given exhibit name and standard/small selector; never null.
1475         */
1476        public static String makeThumbnailRRURL(final CharSequence exhibitName, final boolean std)
1477            //throws MalformedURLException
1478            {
1479            return("/" + WebConsts.BASE_PATH_TN + "/" + (std ? WebConsts.PATH_TN_STD : WebConsts.PATH_TN_SML) + "/" + exhibitName);
1480            }
1481    
1482        /**Generate full URL for catalogue page given exhibit name; never null.
1483         * This should always generate the canonical/"official" form of the URL,
1484         * eg not including mirrors or aliases.
1485         * <p>
1486         * The media-type suffix (eg ".html" or ".wml") must be supplied.
1487         */
1488        public static URL makeCatPageURL(final CharSequence exhibitName, final String mediaTypeSuffix)
1489            throws MalformedURLException
1490            {
1491            return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeCatPageRRURL(exhibitName, mediaTypeSuffix)));
1492            }
1493    
1494        /**Generate root-relative URL for catalogue page given exhibit name; never null.
1495         * This should take account of such factors as which servers
1496         * are up, how heavily loaded, etc.
1497         * <p>
1498         * The media-type suffix (eg ".html" or ".wml") must be supplied.
1499         */
1500        public static String makeCatPageRRURL(final CharSequence exhibitName, final String mediaTypeSuffix)
1501            {
1502            return("/" + WebConsts.BASE_PATH_CATPAGE + "/" + exhibitName + mediaTypeSuffix);
1503            }
1504    
1505        /**Returns true if this Web server is overloaded (eg for bandwidth or CPU).
1506         * If this server is positively overloaded then this routine returns true.
1507         * <p>
1508         * If not overloaded or not possible to tell, this returns false.
1509         * <p>
1510         * (If the context is null then this routine may have to return false.)
1511         * <p>
1512         * This may gather information from a number of sources,
1513         * but in the main the ThroughputFilter's data will be used.
1514         * We may explicitly set the overload flag here
1515         * if we detect the system to be overloaded
1516         * to make it quicker for us and others on a subsequent call;
1517         * this will get overwritten by TMF when it next gets to assess load.
1518         * <p>
1519         * If true then the UI and application should attempt to use less bandwidth
1520         * and CPU time than normal, perhaps by using cheaper algorithms than
1521         * usual (eg less comprehensive searches) or a less-graphics-intensive UI.
1522         * <p>
1523         * This may default to true while the server is starting up to ensure that
1524         * as little extra CPU (for example) as possible is consumed while
1525         * the server is compiling JSPs, loading classes, etc.
1526         * <p>
1527         * This is intended to be relatively cheap to call,
1528         * since it may get called frequently,
1529         * for example especially when the system is busy,
1530         * and/or at start-up before JIT compilation (eg HotSpot) has kicked in.
1531         */
1532        public static boolean isOverloaded(final ServletContext ctxt)
1533            {
1534            if(ctxt != null)
1535                {
1536                // If the ThroughputFilter thinks that we are overloaded,
1537                // then we report the system as overloaded.
1538                final Object overloadFlag =
1539                        ctxt.getAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME);
1540                // We take anything but the presence of the value Boolean.FALSE
1541                // (eg the absence of any value at all) as indication of a problem,
1542                // eg that the system may be too busy even to clear the flag,
1543                // as well as a short-term cache of overload status computed in this routine
1544                // until the TF computes/sets a new value.
1545                if((!(overloadFlag instanceof Boolean)) || ((Boolean) overloadFlag).booleanValue())
1546                    { return(true); }
1547                }
1548    
1549            // If the system is known to be at or over our CPU load limit then we're overloaded.
1550            if(ThreadUtils.cpuHeavilyLoaded())
1551                {
1552                // Effectively cache overload status for a while (until the next TF update).
1553                if(ctxt != null) { ctxt.removeAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME); }
1554                return(true);
1555                }
1556    
1557            // We can't see any problems, given the available data.
1558            return(false);
1559            }
1560    
1561        /**Private flag for isLightlyLoaded() to note time when we were last non lightly loaded.
1562         * Once we discover that we are not lightly loaded we set this to the current time,
1563         * and then do not re-test for a little while in case the testing itself adds to the load.
1564         * <p>
1565         * Initially zero.
1566         * <p>
1567         * Marked volatile for thread-safe lock-free access.
1568         */
1569        private static volatile long _lastNotLightlyLoaded;
1570    
1571        /**Time that we last reset the target load average to zero to restart our load ramp-up.
1572         * Initially 'now' in encourage a gentle start.
1573         * <p>
1574         * Marked volatile for thread-safe lock-free access.
1575         */
1576        private static volatile long _lastResetLA = System.currentTimeMillis();
1577    
1578        /**Time over which to ramp up load limit in isLightlyLoaded(), ms; strictly positive.
1579         * Should be at least the 60s time claimed in the documentation for
1580         * OperatingSystemMXBean.getSystemLoadAverage()
1581         * to dampen CPU-load flapping,
1582         * especially when emerging from power-conservation mode.
1583         */
1584        private static final int LOAD_AVERAGE_RAMP_UP_TIME = 180000 + Rnd.fastRnd.nextInt(60000); // 3+ minutes.
1585    
1586        /**Returns true if the site seems to be lightly loaded (CPU, bandwidth, etc).
1587         * In case of difficulties/confusion this reports false.
1588         * <p>
1589         * This routine is quite careful and conservative,
1590         * and will only return true if this server and the host system
1591         * both seem to be lightly loaded and stable by all the appropriate metrics.
1592         * <p>
1593         * This never reports the system to be lightly loaded if it is low on power
1594         * (eg on a nearly-expired battery) so as to avoid unnecessary background work.
1595         * <p>
1596         * Where this can check 'uptime' then it tries to enforce a modest CPU ramp-up
1597         * over approximately the 60s-or-so of OperatingSystemMXBean.getSystemLoadAverage()
1598         * to limit rapid fluctuations in CPU load (and, for example, power consumption).
1599         * <p>
1600         * This routine is designed to be called frequently,
1601         * ie is reasonably fast and efficient.
1602         */
1603        public static boolean isLightlyLoaded(final ServletContext ctxt)
1604            {
1605            final long now = System.currentTimeMillis();
1606            // Don't re-test within ~1s of finding the system to be significantly loaded.
1607            if(now - _lastNotLightlyLoaded < 1013) { return(false); }
1608    
1609            // Treat the system as stressed and thus NOT lightly loaded if (temporarily) short of power
1610            // so as to discourage unnecessary background/speculative work.
1611            // Also forces load-limit ramp-up to restart from scratch.
1612            if(GenUtils.mustConservePower())
1613                { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1614    
1615            // We check for memory stress; we are not lightly loaded if memory is hurting.
1616            if(MemoryTools.isMemoryStressed())
1617                { _lastNotLightlyLoaded = now; return(false); }
1618    
1619            final double loadFrac = ThreadUtils.loadFraction();
1620            if(loadFrac >= 0) // -ve indicates load average not available so we don't test.
1621                {
1622                // Now we check the whole-host-operating-system (time-averaged) load.
1623                // This should be well under full capacity to be considered lightly loaded,
1624                // ie the run-queue length must be much less than the number of available CPUs.
1625                // Note that where this value is not available getSystemLoadAverage() returns a -ve value
1626                // and we then regard the system as not busy by this metric, as a reasonable fallback.
1627                // This does not take into account any stress on I/O nor other subsystems.
1628                final float targetBasicThreshold = LocalProps.getLightLoadMax();
1629                // If the system is well over over our lower load limit, our fault or not,
1630                // then restart our load ramp-up to give the system a chance to recover.
1631                if(loadFrac >= 2 * targetBasicThreshold)
1632                    { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1633                final long timeSinceLoadRampUpStart = (now - _lastResetLA);
1634                // If still in load ramp-up phase then reduce 'uptime' threshold accordingly.
1635                final float targetThreshold = (timeSinceLoadRampUpStart >= LOAD_AVERAGE_RAMP_UP_TIME) ?
1636                        targetBasicThreshold :
1637                        (targetBasicThreshold * Math.max(0, Math.min(1, timeSinceLoadRampUpStart / (float) LOAD_AVERAGE_RAMP_UP_TIME)));
1638                final boolean aboveThreshold = loadFrac >= targetThreshold;
1639                if(aboveThreshold)
1640                    { _lastNotLightlyLoaded = now; return(false); }
1641                }
1642    
1643            if(ctxt != null)
1644                {
1645                // If the ThroughputFilter doesn't think that we are lightly loaded,
1646                // then we immediately report the system as not lightly loaded.
1647                final Object underloadFlag =
1648                        ctxt.getAttribute(WebConsts.BANDWIDTH_LIGHTLOAD_ATTR_NAME);
1649                // We take the absence of any value as indication of a problem,
1650                // eg that the system may be too busy even to set the flag.
1651                if((!(underloadFlag instanceof Boolean)) ||
1652                       (!((Boolean) underloadFlag).booleanValue()))
1653                    { _lastNotLightlyLoaded = now; return(false); }
1654    
1655                // We double-check that some problem is not making us seem overloaded at the same time...
1656                if(isOverloaded(ctxt)) { _lastNotLightlyLoaded = now; return(false); }
1657    
1658                return(true); // Seems lightly loaded...
1659                }
1660    
1661            // Site status is unclear, so assume not quiet for now.
1662            // However, this 'false' is not cacheable, ie is not a real result.
1663            return(false);
1664            }
1665    
1666        /**If true then exhibitHasThumbnail() will try to cache its results.
1667         * This should avoid us having to load thumbnails into memory
1668         * just to see if they exist or not.
1669         */
1670        public static final boolean TN_AVAIL_CACHE = true;
1671    
1672        /**The target thumbnail availability cache minimum size, strictly positive.
1673         * Should be large enough that most thumbnail availability requests
1674         * will be caught by it regardless of exhibit set size,
1675         * but small enough to limit memory requirements to something reasonable.
1676         * Should be *much* larger than the full set of thumbnails ever seen
1677         * on one page and/or that might be in our "popular" page set
1678         * so as to avoid thrashing fruitlessly.
1679         * <p>
1680         * We are prepared to give up all cache entries rather than cause an OOME.
1681         */
1682        private static final int TN_AVAIL_CACHE_SIZE_MIN_TARGET = 131 +
1683            5*WebConsts.SINGLE_PAGE_CONTACT_SHEET_TN_COUNT +
1684            SystemVariables.MAX_DIFF_EXHIBIT_NAME_VALUES;
1685    
1686        /**Private key used by exhibitHasThumbnail(); never null. */
1687        private static final DataSourceBean.UnlinkedKey tnHTMLCacheKey = new DataSourceBean.UnlinkedKey("tnHTMLCacheKey");
1688    
1689        /**Test if the given exhibits have thumbnails (locally) available; never null.
1690         * This is suitable for a bulk check, eg before rendering a table,
1691         * and may be internally parallelised to overcome I/O latency, etc.
1692         *
1693         * @param exhibitNames  non-null list of full exhibit names;
1694         *     not altered by this routine and must not be changed by the caller
1695         *     while this routine is running
1696         *
1697         * @return  a bit in the result set for each thumbnail that definitely
1698         *     exists in the requested size; an unset bit may represent "not known"
1699         */
1700        @SuppressWarnings("unchecked")
1701        public static BitSet exhibitsHaveThumbnail(final DataSourceBean dataSource,
1702                        final List<Name.ExhibitFull> exhibitNames,
1703                        final boolean standard,
1704                        final boolean forceCreation)
1705            {
1706            final int n = exhibitNames.size();
1707    
1708            // Only try the parallel fetch if:
1709            //   * We have an extant cache of thumbnail state.
1710            //   * There is more than one item to check.
1711            //   * We're not starved of resources (eg power).
1712            final MemoryTools.SimpleLRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte> tnExistenceMap;
1713            if(TN_AVAIL_CACHE && (n > 1) &&
1714               /* (!GenUtils.mustConservePower()) && */
1715               (null != (tnExistenceMap = (MemoryTools.SimpleLRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte>) dataSource.getUnlinkedValue(tnHTMLCacheKey))))
1716                {
1717                // Find all the exhibits whose thumbnail state is not cached at all,
1718                // and try to start one or more background thread(s) to check their status.
1719                // We expect this activity to be largely I/O bound.
1720    
1721                // Tasks to wait for the completion of...
1722                final List<Future<?>> tasks = new LinkedList<Future<?>>();
1723    
1724                for(final Name.ExhibitFull en : exhibitNames)
1725                    {
1726                    // If we have any tn state already cached then skip this entry...
1727                    if(null != tnExistenceMap.get(en)) { continue; }
1728    
1729                    // Try to fetch this missing entry into cache concurrently...
1730                    tasks.add(ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
1731                        public final void run()
1732                            { exhibitHasThumbnail(dataSource, en, standard, forceCreation); }
1733                        }));
1734                    }
1735    
1736                // Wait for any tasks to complete.
1737                for(final Future<?> task : tasks)
1738                    {
1739                    try { task.get(); }
1740                    catch(final Exception e) { e.printStackTrace(); /* Absorb but log any error. */ }
1741                    }
1742                }
1743    
1744            // Fetch all values through the cache using the caller's thread.
1745            final BitSet result = new BitSet(n);
1746            for(int i = n; --i >= 0; )
1747                {
1748                if(exhibitHasThumbnail(dataSource, exhibitNames.get(i), standard, forceCreation))
1749                    { result.set(i); }
1750                }
1751    
1752            return(result);
1753            }
1754    
1755        /**Test if the given exhibit has a thumbnail (locally) available.
1756         * Does not try to force one to be made if one is not extant (or in cache)
1757         * unless the forceCreation argument is true.
1758         * <p>
1759         * Since this is likely to be testing what is in (local) cache,
1760         * its results can be considered at best a (good) hint.
1761         * <p>
1762         * We do not cache entirely negative answers
1763         * (ie that an exhibit has no thumbnails)
1764         * as this may change if we overcome (eg) a temporary resource issue.
1765         * But we assume the converse,
1766         * ie that once we have a thumbnail then it will always be available.
1767         * <p>
1768         * Always returns false for exhibit types that do not support thumbnails.
1769         *
1770         * @param dataSource  the data source; never null
1771         * @param exhibitName  the full exhibit name; must be valid (eg non-null)
1772         * @param standard  if true, tests for the presence of a standard-size
1773         * @param forceCreation  if true, we can try to force creation/fetch
1774         *     of the underlying thumbnail if not already available locally
1775         *
1776         * @return true if exhibit definitely has/had one/both thumbnails,
1777         *     false if unknown or thumbnails are not currently available
1778         */
1779        @SuppressWarnings("unchecked")
1780        public static boolean exhibitHasThumbnail(final DataSourceBean dataSource,
1781                                                  final Name.ExhibitFull exhibitName,
1782                                                  final boolean standard,
1783                                                  final boolean forceCreation)
1784            {
1785            if((dataSource == null) || (exhibitName == null))
1786                { throw new IllegalArgumentException(); }
1787    
1788            // If we are cacheing the availability of thumbnails
1789            // then ensure the cache map exists here.
1790            // This is a thread-safe size-limited LRU map.
1791            // Note that this is *not* linked to the AEP
1792            // so that we may retain a little stale information indefinitely,
1793            // especially though AEP changes when much has to be recomputed.
1794            MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte> tnExistenceMap;
1795            if(TN_AVAIL_CACHE)
1796                {
1797                while((tnExistenceMap = (MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte>) dataSource.getUnlinkedValue(tnHTMLCacheKey)) == null)
1798                    {
1799                    // Size the table to suit the exhibit set and allow growth of the exhibit set...
1800                    int nExhibits = 0;
1801                    try { nExhibits = dataSource.getAllExhibitProperties(-1).aeid.length; } catch(final IOException e) { /* Ignore. */ }
1802                    dataSource.putIfAbsentUnlinkedValue(tnHTMLCacheKey, MemoryTools.SimpleLRUMapAutoSizeForHitRate.<Name.ExhibitFull, Byte>create(0, Math.max(2*TN_AVAIL_CACHE_SIZE_MIN_TARGET, 1001+nExhibits), "exhibitHasThumbnail"));
1803                    }
1804                }
1805    
1806            // We store availability as a bit-mask
1807            //   * bit 0 is 1 iff the small thumbnail is available
1808            //   * bit 1 is 1 iff the std thumbnail is available
1809            byte availability = 0; // Assume none available by default.
1810    
1811            // Only use cached positive values.
1812            // A negative cached result has us check again...
1813            // We cache negative results mainly to maintain correct hit-rate stats.
1814            final Byte cachedAvailability;
1815            if(!TN_AVAIL_CACHE || (null == (cachedAvailability = tnExistenceMap.get(exhibitName))) || (cachedAvailability.byteValue() <= 0))
1816                {
1817                // Hauling the thumbnail pair into view can be very expensive,
1818                // eg may involve disc fetches or image decoding or worse,
1819                // and may simply displace other items better kept in cache/memory.
1820                final ExhibitThumbnails tns = dataSource.getThumbnails(exhibitName, forceCreation);
1821    
1822                if(tns != null)
1823                    {
1824                    if(tns.getSmall() != null)
1825                        { availability |= 1; }
1826                    if(tns.getStandard() != null)
1827                        { availability |= 2; }
1828                    }
1829    
1830                if(TN_AVAIL_CACHE)
1831                    {
1832                    // Byte.valueOf() avoids creating distinct instances.
1833                    tnExistenceMap.put(exhibitName, Byte.valueOf(availability));
1834    if(IsDebug.isDebug && ((tnExistenceMap.size() & 0x3ff) == 0)) { dataSource.log("    tnExistenceMap: " + tnExistenceMap.toString()); }
1835                    }
1836                }
1837            // Use the cached (positive) value...
1838            else
1839                { availability = cachedAvailability.byteValue(); }
1840    
1841            final boolean tnAvailable = (0 != (availability & (standard ? 2 : 1)));
1842            return(tnAvailable);
1843            }
1844    
1845        /**Get thumbnail image URL to embed in HTML page (usually JPEG/GIF/PNG); null if none available.
1846         * By preference uses purpose-built thumbnail, else tries to
1847         * use image itself if small enough.
1848         * <p>
1849         * Has to be passed a dataSource and a full exhibit name.
1850         * <p>
1851         * Returns null if no suitable thumbnail URL can be generated.
1852         * <p>
1853         * This may cache its results,
1854         * in particular assuming that once a particular thumbnail becomes available
1855         * that it does not go away again.
1856         *
1857         * @param tnDim  (output argument) if not null and result is not null,
1858         *     is filled in with the thumbnail dimensions
1859         * @param dontCreateTn  if true, don't force creation of a thumbnail if
1860         *     one is not already readily available
1861         */
1862        public static String makeHTMLInlineImageThumbnailURL(final DataSourceBean dataSource,
1863                                                             final Name.ExhibitFull exhibitName,
1864                                                             final boolean std,
1865                                                             final boolean relativeURL,
1866                                                             final java.awt.Dimension tnDim,
1867                                                             final boolean dontCreateTn)
1868            throws IOException
1869            {
1870            final AllExhibitProperties aep = dataSource.getAllExhibitProperties(-1);
1871    
1872            // Get full exhibit details...
1873            final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
1874            // Stop if exhibit has gone away or is invalid.
1875            if(esa == null) { return(null); }
1876    
1877            if(WebUtils.canInlineInHTMLPage((ExhibitMIME.getInputFileType(esa.getCharSequence()))))
1878                {
1879                // Get the exhibit immutable computable properties if possible.
1880                final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
1881                final java.awt.Dimension xyDim = (epc == null) ? null : epc.getXyDimensions();
1882                // Compute what thumbnail dimensions should be...
1883                final java.awt.Dimension thumbnailXyDim = (xyDim == null) ? null : ExhibitThumbnails.computeThumbnailDimensions(xyDim, std);
1884    
1885                // Is this in fact a 2D image?
1886                if(thumbnailXyDim != null)
1887                    {
1888                    // Pass dimensions back to caller.
1889                    if(tnDim != null)
1890                        {
1891                        tnDim.width  = thumbnailXyDim.width;
1892                        tnDim.height = thumbnailXyDim.height;
1893                        }
1894    
1895                    // Is this a small enough (in bytes) exhibit to be its own thumbnail?
1896                    final boolean smallExhibit = (esa.length <=
1897                        (std ? ExhibitThumbnails.STD_ABS_MAX_BYTES : ExhibitThumbnails.SML_ABS_MAX_BYTES));
1898                    // Can this be its own thumbnail?
1899                    // It must be small enough in bytes,
1900                    // and possibly small enough in XxY pixels too.
1901                    final boolean canBeOwnThumbnail =
1902                        (smallExhibit && (WebConsts.ALLOW_BROWSER_IMAGE_SCALE || thumbnailXyDim.equals(xyDim)));
1903    
1904                    // See if we have thumbnails available
1905                    // (if eager, force one to be made,
1906                    // else see if one already exists if we can't inline directly).
1907                    // If we force them to be created
1908                    // then we expect them to remain cached!
1909                    // Shall we force creation?
1910                    final boolean forceCreation = !dontCreateTn &&
1911                        (WebConsts.EAGER_TN_USE || !canBeOwnThumbnail);
1912    
1913                    // Check for availability of real thumbnail.
1914                    final boolean tnAvailable = exhibitHasThumbnail(dataSource,
1915                                                                    exhibitName,
1916                                                                    std,
1917                                                                    forceCreation);
1918    
1919                    final boolean canShowThumbnail = tnAvailable || canBeOwnThumbnail;
1920    
1921    //System.err.println("[sE/cBOT/cST: "+smallExhibit+"/"+canBeOwnThumbnail+"/"+canShowThumbnail+".]");
1922    
1923                    if(canShowThumbnail)
1924                        {
1925                        // Chose the (relative) URL to use
1926                        // (always serve from same host as this page).
1927                        // Show true thumbnail by preference...
1928                        if(relativeURL)
1929                            { return(tnAvailable ? WebUtils.makeThumbnailRRURL(exhibitName, std) : WebUtils.makeExhibitRRURL(exhibitName)); }
1930                        else
1931                            { return((tnAvailable ? WebUtils.makeThumbnailURL(exhibitName, std) : WebUtils.makeExhibitURL(exhibitName, null, dataSource)).toString()); }
1932                        }
1933                    }
1934                }
1935    
1936            return(null); // Can't make a thumbnail.
1937            }
1938    
1939        /**Find the set of exhibits with the same subject as the indicated one.
1940         * Given a List of String exhibit names sorted by
1941         * ExhibitPropsGlobalImmutable.SMART_ORDER
1942         * (or possibly just by ExhibitPropsGlobalImmutable.SUBJECT_ORDER),
1943         * the SUBJECT_ORDER comparator,
1944         * and the index to a given item in that List,
1945         * finds all the items surrounding that have the same SUBJECT_ORDER,
1946         * ie that differ only in attribute words and are variants on the same
1947         * exhibit.  (The result will always contain the input item.)
1948         * <p>
1949         * The List must not contain nulls, all entries must be valid exhibit
1950         * names, and the List must be sorted implicitly with the comparator
1951         * passed in.  The input index must be within bounds.
1952         * <p>
1953         * This does not alter the List passed in.  The return value is
1954         * an unmodifiable sublist of the input.
1955         * <p>
1956         * This returns the sublist of variants on the indicated exhibit;
1957         * this will degenerate to just containing the input exhibit name if there
1958         * are no other exhibit names for the same exhibit.
1959         * <p>
1960         * This assumes that the number of variants of any given exhibit is
1961         * small, and so a linear search is used.
1962         * <p>
1963         * If the List does not support efficient bidirectional movement and seeks
1964         * then this routine will be very inefficient.
1965         */
1966        public static List<Name.ExhibitFull> getExhibitVariantRange(final List<Name.ExhibitFull> allExhibitNames,
1967                                                          final Comparator<Name.ExhibitFull> comparator,
1968                                                          final int thisExhibitIndex)
1969            {
1970            final Name.ExhibitFull thisExhibit = allExhibitNames.get(thisExhibitIndex);
1971            final int listSize = allExhibitNames.size();
1972    
1973            int first, last;
1974    
1975            // Search backwards for first matching item.
1976            for(first = thisExhibitIndex; first > 0; --first)
1977                {
1978                if(comparator.compare(thisExhibit, allExhibitNames.get(first-1)) != 0)
1979                    { break; } // Stop when we hit an item with a different subject.
1980                }
1981    
1982            // Search forwards for last matching item.
1983            for(last = thisExhibitIndex; last < listSize-1; ++last)
1984                {
1985                if(comparator.compare(thisExhibit, allExhibitNames.get(last+1)) != 0)
1986                    { break; } // Stop when we hit an item with a different subject.
1987                }
1988    
1989            return(Collections.unmodifiableList(allExhibitNames.subList(first, last+1)));
1990            }
1991    
1992        /**Computes a minimal human-readable immutable unique prefix of an exhibit short name to distinguish given exhibit from most others.
1993         * Uniqueness is not guaranteed, but this is meant to give a reasonably
1994         * short result that a human can read in the title of a page, for example.
1995         * <p>
1996         * If inputs are bad then this will try to fail gently with an empty-string
1997         * result rather than an exception to make it robust if called directly from
1998         * JSP code, for example.
1999         * <p>
2000         * This is passed a List of exhibits sorted in a "smart" order that
2001         * sorts mainly on the file component of the name,
2002         * probably in a case-insensitive way.
2003         * <p>
2004         * This routine finds a short word prefix that (case-insensitively)
2005         * is different from neighbouring exhibits
2006         * and is thus (depending on the overall sort order)
2007         * probably the shortest unique prefix amongst the entire collection.
2008         * <p>
2009         * (If no unique value is possible, the entire prefix is returned.)
2010         * <p>
2011         * If the List of size 0 we return the empty string and do not use
2012         * the index parameter at all.  This simplifies use where the list
2013         * may transiently be empty during start-up.
2014         * <p>
2015         * This routine may run very slowly if the List argument does not support
2016         * efficient random seeking.
2017         * <p>
2018         * The result is designed to be used in headings and other display text
2019         * such as image alt/title attributes.
2020         * <p>
2021         * TODO: fix inefficient double-parsing of main words...
2022         *
2023         * @param exhibits  sorted (smart-ish sorted) list of exhibit names
2024         *     (String value); must not change while routine is running
2025         * @param ourIndex  index (strictly positive) of the exhibit whose
2026         *     abbreviated name we wish to produce; must be within the List
2027         * @return "" in case of invalid arguments,
2028         *     else some whole-word prefix of the name
2029         */
2030        public static CharSequence minimalUniqueENTitlePrefix(final List<Name.ExhibitFull> exhibits,
2031                                                              final int ourIndex)
2032            {
2033            // Treat bad arguments gently as the outputs are likely to
2034            // be displayed in HTML directly.
2035            if(exhibits.size() == 0) { return(""); }
2036            if((ourIndex < 0) || (ourIndex >= exhibits.size())) { return(""); }
2037    
2038            // Set of attribute words we use for parsing names.
2039            // In this case, deliberately empty.
2040            final Set<String> noAttrWords = Collections.emptySet();
2041    
2042            // Dummy "empty" enumeration we use for missing neighbours.
2043            final Enumeration<?> emptyEn = Collections.enumeration(Collections.emptyList());
2044    
2045            // Get our name and previous/next names
2046            // as word enumerations (or empty enumerations if not present).
2047            // for this purpose we pretend that there are no attribute words...
2048            final ListIterator<Name.ExhibitFull> liFwd = exhibits.listIterator(ourIndex);
2049            final CharSequence ourNameFull = (liFwd.next());
2050            // FIXME: inefficient via full name and tokenizer...
2051            final CharSequence ourNameMainWords = ExhibitName.getMainWordsComponent(
2052                                            ourNameFull, noAttrWords).toString();
2053            final Enumeration<?> ourNameWords = ExhibitName.getMainWords(
2054                                            ourNameFull, noAttrWords);
2055    
2056            // Default to empty enumerations.
2057            Enumeration<?> nextNameWords = emptyEn;
2058            Enumeration<?> prevNameWords = emptyEn;
2059    
2060            // Now search forwards for different name to compare against...
2061            // Note that we implicitly start from just after ourName above.
2062            while(liFwd.hasNext())
2063                {
2064                final CharSequence n = (liFwd.next());
2065                // FIXME: inefficient via full name and tokenizer...
2066                final CharSequence nextNameMainWords =
2067                    ExhibitName.getMainWordsComponent(n, noAttrWords);
2068                if(!TextUtils.contentEqualsIgnoreCase(nextNameMainWords, ourNameMainWords))
2069                    {
2070                    nextNameWords = ExhibitName.getMainWords(n, noAttrWords);
2071                    break;
2072                    }
2073                }
2074    
2075            // Now search backwards for different name to compare against...
2076            final ListIterator<Name.ExhibitFull> liBack = exhibits.listIterator(ourIndex);
2077            while(liBack.hasPrevious())
2078                {
2079                final CharSequence n = (liBack.previous());
2080                // FIXME: inefficient via full name and tokenizer...
2081                final CharSequence prevNameMainWords =
2082                    ExhibitName.getMainWordsComponent(n, noAttrWords);
2083                if(!TextUtils.contentEqualsIgnoreCase(prevNameMainWords, ourNameMainWords))
2084                    {
2085                    prevNameWords = ExhibitName.getMainWords(n, noAttrWords);
2086                    break;
2087                    }
2088                }
2089    
2090            // Result word prefix (preserves case, ends with word separator).
2091            final StringBuilder result = new StringBuilder();
2092    
2093            // Iterate until we run out of words or both neighbours have
2094            // (or have different words).
2095            while(ourNameWords.hasMoreElements())
2096                {
2097                final String ourNameWord = (String) ourNameWords.nextElement();
2098                result.append(ourNameWord).append(ExhibitName.WORD_SEP);
2099    
2100                // Take next word, if extant, on both prev and next sides.
2101                final String nextNameWord = nextNameWords.hasMoreElements() ?
2102                    (String) nextNameWords.nextElement() : "";
2103                final String prevNameWord = prevNameWords.hasMoreElements() ?
2104                    (String) prevNameWords.nextElement() : "";
2105    
2106                // If neighbour has another word but it is different to ours,
2107                // pretend neighbour was truncated at previous word.
2108                // We ignore case, since users probably will.
2109                if(nextNameWord.equalsIgnoreCase(ourNameWord))
2110                    { continue; } // Looks the same so cannot stop yet.
2111                else
2112                    { nextNameWords = emptyEn; } // No longer a contender.
2113    
2114                if(prevNameWord.equalsIgnoreCase(ourNameWord))
2115                    { continue; } // Looks the same so cannot stop yet.
2116    //            else
2117    //                { prevNameWords = emptyEn; } // No longer a contender.
2118    
2119                break; // Done!
2120                }
2121    
2122            // Attempt to return as space-saving Name, else a String, either being interned.
2123            return(Name.createOrStringFallback(result, null));
2124            }
2125    
2126       /**Get BufferedImage containing expanded image loaded as static resource from WAR; never null.
2127        * Loaded on first use and cached statically,
2128        * optionally via a SoftReference to allow the system to reclaim memory.
2129        * <p>
2130        * There may be an adverse effect on system behaviour if many large images
2131        * are cached in memory; this may be mitigated by storing them via
2132        * a SoftReference so that the memory can be recycled automatically.
2133        * <p>
2134        * This method is internally synchronized to allow only one image load/decode
2135        * to happen at once to conserve CPU and memory (and other) resources.
2136        * <p>
2137        * If the image is indexed and forceToARGBTrueColour is true then
2138        * we expand it to true-colour to make it possible to add markings.
2139        * <p>
2140        * This uses our built-in mediahandler classes to decode the image,
2141        * so the image type must be one that we have a decoder for.
2142        * <p>
2143        * This may not handle alpha correctly in all cases.
2144        * <p>
2145        * Under memory pressure this may release cached resources
2146        * whether hard or soft cached.
2147        * <p>
2148        * <strong>Beware:</strong> since a reference is returned rather than
2149        * a copy, be careful not to adjust the returned image unintentionally.
2150        *
2151        * @param context  gives context for WAR from which to load the raw
2152        *     base clickable-map image
2153        * @param resourceRRURL  root-relative URL (eg "/my/image.gif") of
2154        *     source image in WAR; must not be null and must be a type
2155        *     (and with a file extension) that we have a MIME mediahandler for
2156        * @param forceToARGBTrueColour  if true, and the source image is not
2157        *     ARGB true-colour, then we force conversion to ARGB true-colour
2158        *     before cacheing to make it easier to draw on the image
2159        * @param cacheViaSoftReference  if true then we attempt to cache the
2160        *     image via a SoftReference; if at any point this is true when the
2161        *     image needs to be (re)cached, the image remains permanently cached
2162        *     via a strong reference thereafter
2163        * @param copyResult  if true, we force the result to be a copy of
2164        *     the cached value to avoid accidentally changing the cached copy;
2165        *     this may force a change in colour model and/or discarding properties
2166        *
2167        * @return image, possibly a copy, possibly with a converted colour model
2168        *
2169        * @throws java.lang.IllegalStateException if the image is not loadable
2170        */
2171        public static BufferedImage getAndCacheStaticImage(
2172                                                final boolean copyResult,
2173                                                final String resourceRRURL,
2174                                                final boolean forceToARGBTrueColour,
2175                                                final ServletContext context,
2176                                                final boolean cacheViaSoftReference)
2177            throws IllegalStateException
2178            {
2179            if((context == null) ||
2180               (resourceRRURL == null))
2181                { throw new IllegalArgumentException(); }
2182    
2183            synchronized(_gACSI_cache)
2184                {
2185                // An item in the cache is one of:
2186                //   * null (ie completely absent)
2187                //   * BufferedImage
2188                //   * SoftReference (possibly cleared) to BufferedImage
2189                final Object rawItem = _gACSI_cache.get(resourceRRURL);
2190                BufferedImage result = (!(rawItem instanceof SoftReference))
2191                    ? (BufferedImage) rawItem
2192                    : (BufferedImage) (((SoftReference<?>) rawItem).get());
2193    
2194                if(result == null) // Needs fetching.
2195                    {
2196                    final InputStream is =
2197                        context.getResourceAsStream(resourceRRURL);
2198                    if(is == null)
2199                        { throw new IllegalStateException("cannot get InputSteam for image to cache from " + resourceRRURL); }
2200    
2201                    // Find correct handler given name of file.
2202                    final ExhibitMIME.ExhibitTypeParameters etp =
2203                        ExhibitMIME.getInputFileType(resourceRRURL);
2204                    if(etp == null)
2205                        { throw new IllegalStateException("cannot get MIME type for image to cache from " + resourceRRURL); }
2206                    if(etp.handler == null)
2207                        { throw new IllegalStateException("cannot get handler for image to cache from " + resourceRRURL); }
2208    
2209                    try {
2210                        // Decode the image.
2211                        result = etp.handler.decodeImage(is);
2212                        if(result == null)
2213                            { throw new IllegalStateException("cannot get decode image to cache from " + resourceRRURL); }
2214    
2215                        // Force to ARGB if required.
2216                        if(forceToARGBTrueColour)
2217                            { result = ImageUtils.convertToTrueColourARGB(result, false); }
2218    
2219                        // Cache the result.
2220                        if(cacheViaSoftReference)
2221                            { _gACSI_cache.put(resourceRRURL, new SoftReference<BufferedImage>(result)); }
2222                        else
2223                            { _gACSI_cache.put(resourceRRURL, result); }
2224                        }
2225                    catch(final IOException e)
2226                        { throw new IllegalStateException("cannot decode image to cache from " + resourceRRURL + ": IOException: " + e.getMessage()); }
2227                    }
2228    
2229                // If forced to copy the result, do so.
2230                // TODO: Maybe try to preserve relevant image properties?
2231                if(copyResult)
2232                    {
2233                    // Coerce data into original colour model.
2234                    // Discard any properties of the original.
2235                    final ColorModel cm = ImageUtils.extractColorModelOrRGB(result);
2236                    final int width = result.getWidth();
2237                    final int height = result.getHeight();
2238                    final WritableRaster raster = cm.createCompatibleWritableRaster(
2239                        width, height);
2240                    final BufferedImage copiedResult =
2241                        new BufferedImage(cm, raster, false, null);
2242    
2243                    // Actually copy the pixels...
2244                    copiedResult.setRGB(0, 0, width, height,
2245                        result.getRGB(0, 0, width, height, null, 0, width),
2246                                        0, width);
2247    
2248                    return(copiedResult);
2249                    }
2250    
2251                return(result);
2252                }
2253            }
2254    
2255        /**Private cache for getAndCacheStaticImage(); never null.
2256         * Is a map from root-relative URL to a BufferedImage
2257         * or SoftReference to a BufferedImage.
2258         * <p>
2259         * Thread-safe LRU-managed limited-size memory-sensitive map.
2260         * <p>
2261         * We are prepared to discard everything if very short of memory.
2262         * <p>
2263         * All getAndCacheStaticImage() activity is synchronized on this instance.
2264         */
2265        private static final MemoryTools.SimpleLRUMapAutoSizeForHitRate<String,Object> _gACSI_cache =
2266            MemoryTools.SimpleLRUMapAutoSizeForHitRate.<String,Object>create(0, 1024, "_gACSI_cache");
2267    
2268    
2269        /**Generate a unique key for the given HTTP request; returns null if not possible to generate.
2270         * For example, we use this to help limit the number of times that
2271         * a given user is asked to vote.
2272         * <p>
2273         * Generates a string starting with the given prefix and some
2274         * leading portion of the client's IP address...
2275         * <p>
2276         * It is not fatal if this conflates users,
2277         * but it is more useful that this never thinks one user is more than one
2278         * to avoid pestering them too much
2279         * (or letting a spider inject too much noise for example).
2280         *
2281         * @param prefix  unique prefix to the generated key; non-empty, non-null
2282         * @param request  the user's request
2283         */
2284        public static String generateUserDataPointID(final String prefix,
2285                                                     final HttpServletRequest request)
2286            {
2287            if((prefix == null) || (prefix.length() == 0)) { return(null); }
2288            if(request == null) { return(null); }
2289    
2290            final StringBuilder sb = new StringBuilder(prefix.length() + 21);
2291            sb.append(prefix);
2292    
2293            try
2294                {
2295                // Get the IP address
2296                final InetAddress ia = InetAddress.getByName(request.getRemoteAddr());
2297    
2298                // Use all but the final octet to construct the data-point ID.
2299                // Assume that this will distinguish most genuine users.
2300                final byte[] addr = ia.getAddress();
2301                for(int i = 0; i < addr.length-1; ++i)
2302                    { sb.append(addr[i] & 0xff).append('.'); }
2303    
2304                return(sb.toString());
2305                }
2306            catch(final UnknownHostException e)
2307                {
2308                return(null); // Could not understand the client's address.
2309                }
2310            }
2311    
2312        /**Name of additional parameter to record if this is a vote "for" ("pro").
2313         * Value of the parameter is to be "true" or "false".
2314         */
2315        public static final String VOTE_PRO_PARAM_NAME = "pro";
2316    
2317        /**Name of additional parameter to record if this is a vote "against" ("con").
2318         * Value of the parameter is to be "true" or "false".
2319         */
2320        public static final String VOTE_CON_PARAM_NAME = "con";
2321    
2322        /**Create and post the handler for a vote if possible and returns the unique listenerID.
2323         * This replaces any existing listener for this voter.
2324         * <p>
2325         * This returns null if we could not create a listener.
2326         * <p>
2327         * If the handler is invoked,
2328         * then this registers a dummy (inactive) handler
2329         * to delay the next request to the user to vote.
2330         *
2331         * @param exhibitFullName  valid exhibit name; never null
2332         * @param request  client's HTTP request; never null
2333         * @param vars  where stats updates are posted; never null
2334         *
2335         * @return listenerID, or null if one could not be generated
2336         */
2337        public static String createAndFileVoteListener(final Name.ExhibitFull exhibitFullName,
2338                                                       final HttpServletRequest request,
2339                                                       final SimpleVariablePipelineIF vars)
2340            {
2341    //        if(!ExhibitName.validNameSyntax(exhibitFullName)) { return(null); }
2342            if(exhibitFullName == null) { return(null); }
2343            if(vars == null) { return(null); }
2344    
2345            final String dpID = generateUserDataPointID(WebConsts.VOTER_DATA_POINT_PREFIX, request);
2346            if(dpID == null) { return(null); }
2347    
2348            // Compute expiry time (with a random element).
2349            final long expireBy = System.currentTimeMillis() + WebConsts.VOTE_MIN_REQUEST_GAP_MS +
2350                    Rnd.fastRnd.nextInt(WebConsts.VOTE_MIN_REQUEST_GAP_MS);
2351    
2352            // Capture the visitor's IP address for later (in case they vote).
2353            InetAddress va = null;
2354            try { va = InetAddress.getByName(request.getRemoteAddr()); }
2355            catch(final UnknownHostException e) { } // Ignore errors...
2356            final InetAddress voterIPAddr = va;
2357    
2358            // Create new listener.
2359            // Pass in a weak reference to the vars pipeline to avoid blocking GC...
2360            final StatsSink.AbstractStatsListener newAsl =
2361                    (new VoteHandler(dpID, expireBy, exhibitFullName, new WeakReference<SimpleVariablePipelineIF>(vars), voterIPAddr));
2362    
2363            // Get the listener ID...
2364            final String listenerID = newAsl.uniqueListenerID;
2365    
2366            // File the request!
2367            StatsSink.addListenerForDataPoint(newAsl);
2368    
2369            return(listenerID);
2370            }
2371    
2372    
2373        /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
2374         * The list is sorted:
2375         * <ol>
2376         * <li>By dictionary-order i18n localised title.
2377         * </ol>
2378         * <p>
2379         * We may flag categories as good or bad if significantly so.
2380         *
2381         * @param asList  if true, entries are preceded by &lt;li&gt;
2382         *     else they are followed by &lt;br /&gt\r\n;
2383         */
2384        public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
2385                                                               final LocaleBeanBase localeBean,
2386                                                               final boolean asList)
2387            throws IOException
2388            {
2389            return(getCategoryListSortedAsHTML(dsb, localeBean, asList ? null : "<br />\r\n"));
2390            }
2391    
2392        /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
2393         * The list is sorted:
2394         * <ol>
2395         * <li>By dictionary-order i18n localised title.
2396         * </ol>
2397         * <p>
2398         * We may flag categories as good or bad if significantly so.
2399         *
2400         * @param entrySeparator  text (followed by CRLF) to terminate entries;
2401         *     null if entries are to be wrapped with li tags
2402         */
2403        public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
2404                                                               final LocaleBeanBase localeBean,
2405                                                               final String entrySeparator)
2406            throws IOException
2407            {
2408            if((dsb == null) || (localeBean == null))
2409                { throw new IllegalArgumentException(); }
2410    
2411            final boolean asList = (null == entrySeparator);
2412    
2413            // Private data on each category, for sorting.
2414            final class PerCat implements Comparable<PerCat>
2415                {
2416                PerCat(final String cat,
2417                       final String i18nTitle,
2418                       final Boolean isGood,
2419                       final int entriesInCat)
2420                    {
2421                    category = cat;
2422                    title = i18nTitle;
2423                    good = isGood;
2424                    numEntries = entriesInCat;
2425                    }
2426    
2427                final String category;
2428                final String title;
2429                final Boolean good;
2430                final int numEntries;
2431    
2432                /**Compares this object with the specified object for order.
2433                 * Order is:
2434                 * <ol>
2435                 * <li>By dictionary-order i18n localised title.
2436                 * </ol>
2437                 */
2438                public int compareTo(final PerCat other)
2439                    {
2440                    // Sort by localised title, case insensitively.
2441                    final int tComp = String.CASE_INSENSITIVE_ORDER.compare(title, other.title);
2442                    if(tComp != 0) { return(tComp); }
2443    
2444                    // Officially break ties by underlying canonical category name.
2445                    // This should never really be needed.
2446                    return(category.compareTo(other.category));
2447                    }
2448                }
2449    
2450            final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2451    
2452            // Get the list of categories.
2453            final Map<String,Integer> categories = aep.getCategoryExhibitCounts();
2454    
2455            // Create sorted set of details.
2456            // We don't expend huge effort on getting the category good/bad status.
2457            final SortedSet<PerCat> cs = new TreeSet<PerCat>();
2458            for(final String cat : categories.keySet())
2459                {
2460                cs.add(new PerCat(
2461                    cat,
2462                    GenUtils.computeSectionTitle(aep, cat, localeBean),
2463                    aep.isCategoryGood(cat, dsb, false),
2464                    categories.get(cat)
2465                    ));
2466                }
2467    
2468            // Convert to nicely-formatted HTML.
2469            final StringBuilder sb = new StringBuilder(79 * categories.size());
2470            for(final PerCat pc : cs)
2471                {
2472                if(asList) { sb.append("<li>"); }
2473    
2474                // If category significantly good/bad then mark it so.
2475                if(pc.good != null)
2476                    {
2477                    final String proIcon = "smile.gif";
2478                    final String conIcon = "frown.gif";
2479                    sb.append("<img src=\"/_static/icon/").
2480                        append(pc.good.booleanValue() ? proIcon : conIcon).
2481                        append("\" width=15 height=15> ");
2482                    }
2483    
2484                // Open link to category RRURL.
2485                sb.append("<a href=\"").
2486                    append(WebConsts.VIRTUAL_COLLECTIONS_BYCATEGORY_ROOT).
2487                    append(pc.category).
2488                    append("/\">");
2489                sb.append(pc.title);
2490                // Close link to category.
2491                sb.append("</a>");
2492    
2493                // Insert exhibit count.
2494                sb.append(" <i>(").append(pc.numEntries).append(")</i>");
2495    
2496                // Finish the point
2497                if(!asList) { sb.append(entrySeparator); }
2498                else { sb.append("</li>\r\n"); }
2499                }
2500    
2501            return(sb.toString());
2502            }
2503    
2504        /**Returns true if this seems to be a slave disconnected from the master.
2505         * This instance may, for example, not wish to collect votes from users
2506         * if the votes may get discarded without getting to the master.
2507         */
2508        public static final boolean isDisconnectedSlave(final DataSourceBean dsb)
2509            {
2510            if(dsb == null) { throw new IllegalArgumentException(); }
2511    
2512            // If definitely a master then this is not 'disconnected' by definition.
2513            if(Boolean.FALSE.equals(dsb.isSlave()))
2514                { return(false); }
2515    
2516            // This instance is treated as disconnected if it is a slave with no xfer key
2517            // since that implies that it won't be allowed to send updates (eg votes) home.
2518            if(!LocalProps.hasXferKey())
2519                { return(true); }
2520    
2521            // This instance is to be treated as NOT disconnected
2522            // if it can see at least one other system via the system variables,
2523            // ie at least two systems in total.
2524            // This uses the availablity of client-count as a measure of connectivity.
2525            final SimpleVarStats stats = VarTools.generateSimpleStats(dsb,
2526                                    SystemVariables.ThroughputMonitorFilter_CLIENT_COUNT,
2527                                    0); /* Minimum possible life. */
2528            return((stats == null) || (stats.getSystemCount() < 2));
2529            }
2530    
2531        /**Returns true if this request is apparently a precacheing request, eg from a "Web accelerator".
2532         * This is true if a client (such as FireFox) is "reading ahead"
2533         * but it may be the case that no real human gets to see the content.
2534         * <p>
2535         * See https://developer.mozilla.org/en/Link_prefetching_FAQ
2536         */
2537        public static boolean isPrecacheRequest(final HttpServletRequest request)
2538            {
2539            // "X-Moz: prefetch" header covers FF1--3.5 and Google's Web Accelerator.
2540            return("prefetch".equalsIgnoreCase(request.getHeader("X-Moz")));
2541            }
2542    
2543    
2544        /**Private key used by getCatPageExhibitMetaDataHTML(); never null. */
2545        private static final AEPLinkedKey metadataCacheKey = new AEPLinkedKey("metadataCacheKey");
2546    
2547        /**Static dictionary used by getCatPageExhibitMetaDataHTML() for compression of in-memory data; never null.
2548         * The static dictionary content should be reviewed after any major format changes,
2549         * though this is not a correctness issue, only a matter of compression efficiency.
2550         */
2551        public static final Compact7BitString.StaticDictionary sDictMD = new Compact7BitString.StaticDictionary("getCatPageExhibitMetaDataHTML",
2552            Arrays.asList(new String[]{
2553                "JPEG",    /* MANUALLY ADDED: count=16817, saving=50451, meanFirstPos=126 */
2554                "javax_imageio_1", /* count=17761, saving=248654, meanFirstPos=37 */
2555                "ColorSpaceType", /* count=17752, saving=230776, meanFirstPos=67 */
2556                "Compression", /* count=17684, saving=176840, meanFirstPos=106 */
2557                "NumChannels", /* count=16942, saving=169420, meanFirstPos=86 */
2558                "metadata", /* count=17939, saving=125573, meanFirstPos=9 */
2559                "TypeName", /* count=16826, saving=117782, meanFirstPos=120 */
2560                "compact", /* count=18684, saving=112104, meanFirstPos=3 */
2561                "Chroma", /* count=17755, saving=88775, meanFirstPos=53 */
2562                "value", /* count=18684, saving=74736, meanFirstPos=87 */
2563                "image", /* count=16951, saving=67804, meanFirstPos=23 */
2564                "YCbCr", /* count=15240, saving=60960, meanFirstPos=73 */
2565                "name", /* count=17751, saving=53253, meanFirstPos=71 */
2566                "\"</", /* count=18684, saving=37368, meanFirstPos=72 */
2567                "></", /* count=17805, saving=35610, meanFirstPos=95 */
2568                "</", /* count=18684, saving=18684, meanFirstPos=10 */
2569                "=\"", /* count=18684, saving=18684, meanFirstPos=70 */
2570                "> ", /* count=18684, saving=18684, meanFirstPos=68 */
2571                "><", /* count=18684, saving=18684, meanFirstPos=4 */
2572                "dd", /* count=18684, saving=18684, meanFirstPos=13 */
2573                "dl", /* count=18684, saving=18684, meanFirstPos=1 */
2574                "dt", /* count=18684, saving=18684, meanFirstPos=5 */
2575                "BackgroundIndex", /* count=808, saving=11312, meanFirstPos=103 */
2576                "BlackIsZero", /* count=934, saving=9340, meanFirstPos=89 */
2577                "accessionData", /* count=745, saving=8940, meanFirstPos=9 */
2578                "stream", /* count=984, saving=4920, meanFirstPos=23 */
2579                "GRAY", /* count=1157, saving=3471, meanFirstPos=73 */
2580                "CRC32", /* count=745, saving=2980, meanFirstPos=61 */
2581                "RGB", /* count=1353, saving=2706, meanFirstPos=73 */
2582                "TRUE", /* count=837, saving=2511, meanFirstPos=91 */
2583                "date", /* count=745, saving=2235, meanFirstPos=23 */
2584                "hash", /* count=745, saving=2235, meanFirstPos=59 */
2585                "size", /* count=745, saving=2235, meanFirstPos=41 */
2586                "sampleRate", /* count=174, saving=1566, meanFirstPos=88 */
2587                "MD5", /* count=745, saving=1490, meanFirstPos=82 */
2588                "encoding", /* count=174, saving=1218, meanFirstPos=70 */
2589                "frames", /* count=162, saving=810, meanFirstPos=37 */
2590                "audio", /* count=178, saving=712, meanFirstPos=56 */
2591                "57024", /* count=173, saving=692, meanFirstPos=47 */
2592                "PCM_SIGNED", /* count=55, saving=495, meanFirstPos=78 */
2593                "ULAW", /* count=112, saving=336, meanFirstPos=75 */
2594                "PCM_UNSIGNED", /* count=7, saving=77, meanFirstPos=77 */
2595                "BI_RGB", /* count=6, saving=30, meanFirstPos=74 */
2596                "1136915", /* MANUALLY ADDED: count ~ 10 as prefix. */
2597                }));
2598    
2599        /**Generates HTML form of exhibit metadata, "" if no such metadata for the specified exhibit; never null.
2600         * The result is keyed to both the DataSourceBean and the exhibitName.
2601         * <p>
2602         * Cached values are discarded when the AEP changes.
2603         * <p>
2604         * The computed HTML is locale-invariant, which is why cacheing is viable.
2605         * <p>
2606         * The toString() method should be called on the result to get the String HTML text,
2607         * which may be a String or some other internal representation.
2608         *
2609         * @param dsb  valid non-null DataSourceBean
2610         * @param exhibitName  valid non-null exhibit full name
2611         */
2612        @SuppressWarnings("unchecked")
2613        public static Object getCatPageExhibitMetaDataHTML(final DataSourceBean dsb,
2614                                                           final Name.ExhibitFull exhibitName)
2615            {
2616            assert((dsb != null) && ExhibitName.validNameSyntax(exhibitName));
2617    
2618            // Get existing cache map, or create new one.
2619            // The cache is a size-limited, thread-safe Map
2620            // from full exhibit name to HTML formatted metadata ("" if none).
2621            // The items can be large and possibly moderately expensive to (re)compute
2622            // though all in-memory (no disc access for example) so a miss is not that bad.
2623            // Races here may result in some wasted work but no errors.
2624            MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object> cachedMetaDataMap;
2625            while((cachedMetaDataMap = (MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey)) == null)
2626                {
2627                // Limit size to ~10 per 1MB of heap, but minimum a few tens to cover popular pages.
2628                final int maxCacheSize = Math.max(32, (int) Math.min(16384, Runtime.getRuntime().totalMemory() >> 17));
2629                dsb.putIfAbsentAEPLinkedValue(metadataCacheKey, MemoryTools.SimpleProbabilisticCache.<Name.ExhibitFull, Object>create(maxCacheSize, metadataCacheKey.comment));
2630                }
2631            final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2632            if(cachedMetaData != null) { return(cachedMetaData); }
2633    
2634            // Need to (re)compute metadata for this exhibit
2635            // eg for the first time and/or after an AEP load/change.
2636            try
2637                {
2638                final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2639    
2640                final String result = getCatPageExhibitMetaDataHTMLRaw(exhibitName, aep);
2641                assert(result != null);
2642    if(IsDebug.isDebug) { System.out.println("INFO: getCatPageExhibitMetaDataHTML() result size of "+result.length()+" chars"); }
2643    
2644                // We do not intern() the result
2645                // since we expect each non-"" metadata value to be unique.
2646                // Providing that the system is not hideously short of memory
2647                // then cache in an a compact form if possible else as a plain String.
2648                if(!MemoryTools.isMemoryStressed())
2649                    {
2650                    try { cachedMetaDataMap.put(exhibitName, Compact7BitString.convertToCompact7BitString(result, sDictMD)); }
2651                    catch(final IllegalArgumentException e) { cachedMetaDataMap.put(exhibitName, result); }
2652                    }
2653    
2654                // Return the uncompressed result to save the caller a little time...
2655                return(result);
2656                }
2657            catch(final IOException e)
2658                {
2659                // Give up without cacheing anything in case of error.
2660                // e.printStackTrace(); // Absorb the error and don't whinge...
2661                return(""); // Cannot compute the value right now...
2662                }
2663            }
2664    
2665        /**Computes the raw data for getCatPageExhibitMetaDataHTML() without cacheing; never null.
2666         * @return "" if there is no metadata for this exhibit
2667         */
2668        public static String getCatPageExhibitMetaDataHTMLRaw(final Name.ExhibitFull exhibitName, final AllExhibitProperties aep)
2669            {
2670            if(null == exhibitName) { throw new IllegalArgumentException(); }
2671            final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
2672            final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
2673    
2674            final Node metadata = (epc == null) ? null : epc.getMetadata();
2675            final AccessionData accessionMetadata = epl.getAccessionMetadata();
2676            if((metadata != null) || (accessionMetadata != null))
2677                {
2678                final StringBuilder sb = new StringBuilder(2048);
2679                if(metadata != null)
2680                    { sb.append(TextUtils.toXML(metadata, true, true)); }
2681                if(accessionMetadata != null)
2682                    { sb.append(TextUtils.toXML(accessionMetadata.getAsDOM(), true, true)); }
2683                return(sb.toString());
2684                }
2685    
2686            // No metadata at all.
2687            return("");
2688            }
2689    
2690        /**As for getCatPageExhibitMetaDataHTML(), but will compute a missing value asynchronously; never null.
2691         * If the value is already computed then it is available immediately,
2692         * else this attempts to spin off task compute the value,
2693         * and get() will block until the value is ready/computed.
2694         * <p>
2695         * If the target thread pool is full the computation will be done synchronously,
2696         * ie in this thread blocking this call until complete.
2697         * <p>
2698         * The toString() method should be called on the result to get the String text.
2699         */
2700        @SuppressWarnings("unchecked")
2701        public static Future<?> getCatPageExhibitMetaDataHTMLFuture(
2702                                                 final DataSourceBean dsb,
2703                                                 final Name.ExhibitFull exhibitName)
2704            {
2705            // Try first to return any extant cached value without blocking.
2706            // If present, we need not start any thread at all.
2707            final MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object> cachedMetaDataMap = (MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey);
2708            if(null != cachedMetaDataMap)
2709                {
2710                final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2711                // Return already-finished Future with value, if any.
2712                if(null != cachedMetaData)
2713                    { return(ThreadUtils.makeCompletedFuture(cachedMetaData)); }
2714                }
2715    
2716            // If we did not find a cached value,
2717            // then set up the task to compute the value asynchronously.
2718            final Callable<?> callable = new Callable<Object>(){
2719                public final Object call() throws Exception
2720                    { return(getCatPageExhibitMetaDataHTML(dsb, exhibitName)); }
2721                };
2722            // Start the (CPU-intensive) thread immediately.
2723            final Future<?> result = ThreadUtils.computeIntensiveThreadPool.submit(callable);
2724    
2725            // Return the handle for the caller to retrieve the value...
2726            return(result);
2727            }
2728    
2729        /**Hyphenate long HTML text (that contains zero or more `-' characters and little or no whitespace).
2730         * Replaces hyhens ('-') with spaces to allow a browser to wrap the text.
2731         * <p>
2732         * Usually used with exhibit names or fragments of such names.
2733         */
2734        public static final String hyphenateHTMLText(final String s)
2735            {
2736            return(s.replace('-', ' '));
2737            }
2738    
2739        /**LRU thread-safe private cache mapping from exhibit type and Accept header to acceptability of that MIME type for inlining in XHTML mobile text.
2740         * We assume that the Accept headers will be more or less constant for a given device,
2741         * and probably constant between instances of the device,
2742         * so we have enough entries to cover the likely different <em>types</em> of device
2743         * using the Gallery at any one time.
2744         * <p>
2745         * We take care to avoid using huge Accept values in keys to avoid DoS-style issues.
2746         * <p>
2747         * We're prepared to discard this entirely when short of memory.
2748         */
2749        private static final MemoryTools.SimpleProbabilisticCache<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean> _inlineableInXHTML = MemoryTools.SimpleProbabilisticCache.<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean>create(512, "_inlineableInXHTML");
2750    
2751        /**Returns true if the given MIME-type can always be inlined in an XHTML (mobile) page.
2752         * If the type argument is null, this returns false.
2753         * <p>
2754         * This always allows JPEG and GIF,
2755         * but may also allow other (image) types listed in an incoming "Accept" header.
2756         */
2757        public static boolean canInlineInXHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType,
2758                                                   final HttpServletRequest request)
2759            {
2760            if(exhibitType == null) { return(false); }
2761    
2762            // Allow commonly-supported GIF (89a) and JPEG types always.
2763            switch(exhibitType.type)
2764                {
2765                case ExhibitMIME.ET_JPEG:
2766                case ExhibitMIME.ET_GIF: // Should really check GIF version.
2767                    return(true);
2768                }
2769            // No header to analyse, so cannot allow more than the basic types.
2770            if(request == null) { return(false); }
2771    
2772            // We are prepared to test for a limited selection of other inlineable types.
2773            switch(exhibitType.type)
2774                {
2775                case ExhibitMIME.ET_PNG:
2776                case ExhibitMIME.ET_SWF:
2777                case ExhibitMIME.ET_BMP:
2778                // TODO: WBMP, SVG?
2779                    {
2780                    // Look for any Accept headers.
2781                    final Enumeration<?> headers = request.getHeaders("Accept");
2782                    // No Accept headers to analyse, so cannot allow more than the basic types.
2783                    if(headers == null) { return(false); }
2784    
2785                    // Allow for possibility of multiple Accept headers (probably rare though).
2786                    while(headers.hasMoreElements())
2787                        {
2788                        final String h = (String) headers.nextElement();
2789                        // We refuse to process gigantic header values at all
2790                        // to preserve performance and avoid DoS-style problems.
2791                        if(h.length() >= 1024) { continue; }
2792    
2793                        // Make cache lookup key and attempt cache lookup.
2794                        final Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String> key =
2795                            new Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>(exhibitType, h);
2796                        final Boolean cachedResult = _inlineableInXHTML.get(key);
2797                        if(Boolean.TRUE.equals(cachedResult)) { return(true); }
2798                        if(Boolean.FALSE.equals(cachedResult)) { continue; /* Try other headers. */ }
2799    System.out.println("INFO: new XHTML Accept header '"+TextUtils.sanitiseForXML(h, 256, true)+"' from User-Agent: "+TextUtils.sanitiseForXML(request.getHeader("User-Agent"), 256, true));
2800    
2801                        // Do explicit search though header for specified MIME type.
2802                        for(final String t : h.split(","))
2803                            {
2804                            // Allow for direct MIME-type match,
2805                            // or with trailing q factor, eg "image/png;q=0.9",
2806                            // or just a generic catch-all */*.
2807                            final String trimmed = t.trim(); // Remove padding whitespace.
2808                            if(trimmed.equals("*/*") ||
2809                               trimmed.equals(exhibitType.mimeType) ||
2810                               (trimmed.startsWith(exhibitType.mimeType) && trimmed.substring(exhibitType.mimeType.length()).startsWith(";")))
2811                                {
2812                                _inlineableInXHTML.put(key, Boolean.TRUE); // Cache success.
2813                                return(true);
2814                                }
2815                            }
2816    
2817                        // Cache failure (against this header).
2818                        _inlineableInXHTML.put(key, Boolean.FALSE);
2819                        }
2820                    }
2821                }
2822            return(false);
2823            }
2824    
2825        /**Returns true if the given MIME-type can always be inlined in an HTML page.
2826         * If the argument is null, this returns false.
2827         */
2828        public static boolean canInlineInHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType)
2829            {
2830            if(exhibitType == null) { return(false); }
2831            switch(exhibitType.type)
2832                {
2833                case ExhibitMIME.ET_JPEG:
2834                case ExhibitMIME.ET_GIF:
2835                case ExhibitMIME.ET_PNG: // Most HTML browsers will accept PNG now.
2836                case ExhibitMIME.ET_SWF: // Most browsers have a Flash plug-in now.
2837                case ExhibitMIME.ET_BMP: // Embedded BMP should be widely supported.
2838                case ExhibitMIME.ET_HTMLFRAG:
2839                    return(true);
2840                }
2841            return(false);
2842            }
2843    
2844        /**Private key used by getCategoryTreeFilterBean(); never null. */
2845        private static final DataSourceBean.AEPLinkedKey _getCategoryTreeFilterBeanKey = new DataSourceBean.AEPLinkedKey("_getCategoryTreeFilterBeanKey");
2846    
2847        /**Get selected by-category TreeFilterBean from entire exhibit set; never null.
2848         * Used for the "by category" exhibit tree view and elsewhere.
2849         * <p>
2850         * This data is cached linked to the DSB
2851         * (which in passing ensures that it can be dropped automatically under extreme memory stress).
2852         * <p>
2853         * The category name is primarily checked for syntactic validity,
2854         * not for actual presence in the AEP.
2855         */
2856        @SuppressWarnings("unchecked")
2857        public static TreeFilterBean getCategoryTreeFilterBean(final DataSourceBean dsb,
2858                                                               final CharSequence category)
2859            {
2860            if((dsb == null) || !ExhibitName.validNameInitialComponentSyntax(category))
2861                { throw new IllegalArgumentException(); }
2862    
2863            // Map from category name to TreeFilerBean.
2864            // Created as necessary on first use (after AEP change).
2865            ConcurrentMap<String,TreeFilterBean> trees;
2866            while(null == (trees = (ConcurrentMap<String,TreeFilterBean>) dsb.getAEPLinkedValue(_getCategoryTreeFilterBeanKey)))
2867                { dsb.putIfAbsentAEPLinkedValue(_getCategoryTreeFilterBeanKey, new ConcurrentHashMap<String,TreeFilterBean>()); }
2868    
2869            // Atomically ensure that the right tree filter bean for category exists, else create it.
2870            TreeFilterBean tfb;
2871            final String categoryAsString = category.toString();
2872            while(null == (tfb = trees.get(categoryAsString)))
2873                {
2874                // Create the filter bean.
2875                tfb = new TreeFilterBean();
2876                // Give it a name for improved diagnostics.
2877                tfb.setName("category:"+categoryAsString);
2878                // Although this is assumed to be expensive to (re)compute,
2879                // allow it to be dumped unless there's loads of memory free right now
2880                // AND this is a multiprocessor (ie, assumed 'big') host.
2881                // In an unstressed system this may hang around indefinitely
2882                // providing quick access to the pages based on them.
2883                // Note that being DSB-linked allows this to be dumped under extreme memory stress anyway.
2884                tfb.setMemorySensitiveCache((ThreadUtils.AVAILABLE_PROCESSORS == 1) || !MemoryTools.lotsFree());
2885                // Set the filter for the right category.
2886                tfb.setExpr(new FilterExpr(null, new BuiltInFilters.filtByCategory(new String[]{categoryAsString})));
2887                // Store the new bean in the cache, iff no one else got there first.
2888                trees.putIfAbsent(MemoryTools.intern(categoryAsString), tfb);
2889                }
2890    
2891            return(tfb);
2892            }
2893    
2894        /**Flags for User-Agent pattern matching checking for mobile phones. */
2895        private static final int MOBILE_REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
2896    
2897        /**Mobile-browser detection regex 1 c/o detectmobilebrowser.com 2010/06/30. */
2898        private static final Pattern MOBILE_REGEX_DMB_1_20100630 = Pattern.compile(".*(android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*", MOBILE_REGEX_FLAGS);
2899    
2900        /**Mobile-browser detection regex 2 c/o detectmobilebrowser.com 2010/06/30. */
2901        private static final Pattern MOBILE_REGEX_DMB_2_20100630 = Pattern.compile("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", MOBILE_REGEX_FLAGS);
2902    
2903        /**Return true if client appears to be a mobile device (browser sniffing).
2904         * This attempts to detect a small-display, CPU- and bandwidth- constrained device,
2905         * that might benefit from being sent small XHTML pages rather than standard HTML.
2906         * <p>
2907         * This examines the User-Agent and is unlikely to be completely reliable.
2908         * <p>
2909         * Thanks for the regex to http://detectmobilebrowser.com/
2910         */
2911        public static boolean isBrowserOnMobileDevice(final HttpServletRequest request)
2912            {
2913            final String ua = request.getHeader("User-Agent");
2914            return(MOBILE_REGEX_DMB_1_20100630.matcher(ua).matches() ||
2915                   MOBILE_REGEX_DMB_2_20100630.matcher(ua.substring(0,4)).matches());
2916            }
2917    
2918    
2919        /**If true then allow some "sneaky" browser concurrency.
2920         * For browsers that do not always pipeline by default
2921         * but do allow fetches from different-named hosts in parallel
2922         * (eg the biggies such as IE6/IE7, FF1/1.5/2 as of 2006Q4),
2923         * then we can possibly help throughput by fetching some images (etc)
2924         * from a different name for this same host.
2925         * <p>
2926         * The trick is that where the user has arrived at the site
2927         * with a name other than the local mirror name,
2928         * and the URI that we were going to use was a rrURL (root-relative URL),
2929         * then we can prepend the local mirror name to make a new absolute URL
2930         * that the browser may be prepared to fetch from concurrently.
2931         * <p>
2932         * This has to be done completely consistently for any given item,
2933         * eg a static "page-furniture" image, to avoid defeating cacheing.
2934         */
2935        public static final boolean ALLOW_SNEAKY_HTTP_CONCURRENCY = true;
2936    
2937        /**Get optional prefix for rrURL for extra "sneaky" browser concurrency.
2938         * This can only apply if:
2939         * <ul>
2940         * <li>ALLOW_SNEAKY_HTTP_CONCURRENCY is true.
2941         * <li>This host has a mirror prefix
2942         *     (ie so that there is an "alternate" name for this host.
2943         * <li>The incoming request hostname is not this local mirror hostname.
2944         * <li>We do recognise the form (a vhost for) the URL in the request.
2945         * </ul>
2946         * else this routine always returns "".
2947         * <p>
2948         * This basically only works if the user is using a "generic" URL,
2949         * but that is more likely to be a machine far away (ie with large RTT),
2950         * so extra concurrency to try to overcome latency is especially helpful.
2951         * <p>
2952         * This slightly inflates the HTML that the client will see
2953         * but only if using a generic URL.
2954         * <p>
2955         * May inflate the number of concurrent connections back to this host,
2956         * but usually only by 1 or 2 at most.
2957         * <p>
2958         * Note that this scheme <em>does not</em> rely on any other host being up,
2959         * nor having exactly the same content as us.
2960         * <p>
2961         * This <em>is not</em> a technique for distributing load.
2962         *
2963         * @return "" or the http://mirror-... name (with no trailing slash) for this host
2964         */
2965        public static String getOptionalSneakyConcurrencyRRURLPrefix(final HttpServletRequest request)
2966            {
2967            if(!ALLOW_SNEAKY_HTTP_CONCURRENCY)
2968                { return(""); /* The default, ie no prefix. */}
2969    
2970            final String mirrorTag = LocalProps.getMirrorTag();
2971            if(null == mirrorTag)
2972                { return(""); /* No reliable client-visible alternate hostname for us. */ }
2973    
2974            // If we don't understand the hostname in the query then don't add the prefix.
2975            // TODO: could cache this against/in the request if too time-consuming...
2976            final String serverName = request.getServerName();
2977            // Our preferred form of the hostname is all-lower-case.
2978            final String serverNameLC = serverName.toLowerCase();
2979            final VirtualHosts.VirtualHost vHost =
2980                VirtualHosts.getVirtualHostDetails(serverNameLC, null);
2981            if(null == vHost)
2982                { return(""); /* Don't recognise the name in this request. */ }
2983    
2984            final String localMirrorName = HostUtils.makeMirrorNameGeneric(mirrorTag);
2985            if(localMirrorName.equals(request.getServerName()))
2986                { return(""); /* Client is already using the alternate hostname for us. */ }
2987    
2988            // Return suitable prefix to make rrURL into absolute URL.
2989            return("http://" + localMirrorName);
2990            }
2991    
2992        /**Name of generic GenProps property for news-flash HTML segment. */
2993        private static final String GEN_NEWSFLASH_HTML = "newsflash.HTML";
2994    
2995        /**Get "newsflash" HTML for the main site front page, or "" if none; never null.
2996         * This is retrieved from the GenProps.
2997         */
2998        public static String getNewsflashHTML(final GenProps gp)
2999            {
3000            if(gp == null) { return(""); /* Be kind to the caller. */ }
3001            final String result = gp.getGen().get(GEN_NEWSFLASH_HTML);
3002            if(result == null) { return(""); }
3003            return(result);
3004            }
3005    
3006        /**Prefix of all user testimonials in the global properties. */
3007        private static final String TEST_PNAME_PREFIX = "org.hd.org.pg2k.testimonial.";
3008    
3009        /**Logically immutable cached lists of testimonal Strings by language (not locale); never null.
3010         * Private to getUserTestimonial().
3011         * <p>
3012         * No null keys, not null/empty values.
3013         * <p>
3014         * Small fixed size.
3015         * <p>
3016         * Thread-safe (and supporting reasonable concurrency if possible).
3017         */
3018        private static final Map<String, List<String>> _gUT_cache = new ConcurrentHashMap<String, List<String>>();
3019    
3020        /**Get short user quote/testimonial at random from those available for the given locale; never null but may be "".
3021         * This finds a quote, if any, suitable for the current locale
3022         * (infact, currently just the language is matched),
3023         * picked randomly from those available, or "" if none is available.
3024         * <p>
3025         * The text is HTML/XML safe, and is pure 7-bit printable ASCII,
3026         * with any non-ASCII characters encoded as HTML/XML entities.
3027         *
3028         * @param l  the required locale; never null
3029         */
3030        public static String getUserTestimonial(final LocaleBeanBase l)
3031            {
3032            if(l == null) { throw new IllegalArgumentException(); }
3033    
3034            // If the cache is completely empty then try to populate it now.
3035            // Other calls while we are builing the cache are safe
3036            // but may see empty entries until we are done.
3037            if(_gUT_cache.isEmpty())
3038                {
3039                try
3040                    {
3041                    // We build a map of by-language lists,
3042                    // which we then wrap up as immutable and post in the cache map (atomically).
3043                    final Map<String, ArrayList<String>> m = new HashMap<String, ArrayList<String>>();
3044                    final ResourceBundle gp = ResourceBundle.getBundle(CoreConsts.GLOBAL_PROPS_NAME);
3045                    // Common prefix for all testimonial properties.
3046                    final int prefixLength = TEST_PNAME_PREFIX.length();
3047                    // Regex pattern to match the tail of each testimonial property.
3048                    final Pattern p = Pattern.compile("^[a-z][a-z][.].*");
3049                    // Search the global properties...
3050                    for(final String key : gp.keySet())
3051                        {
3052                        // Skip entries not of interest to us.
3053                        if(!key.startsWith(TEST_PNAME_PREFIX)) { continue; }
3054                        // We expect to find 2 lower-case letters and then another dot
3055                        // and then a unique number/ID (which we don't use).
3056                        final String tail = key.substring(prefixLength);
3057                        if(!p.matcher(tail).matches())
3058                            {
3059                            System.err.println("WARNING: bad testimonial property name prefix "+key+" in "+CoreConsts.GLOBAL_PROPS_NAME+" properties");
3060                            continue;
3061                            }
3062                        final String lang = tail.substring(0, 2); // Extract the language code...
3063                        // Add this to the end of the appropriate List, or create one if need be.
3064                        ArrayList<String> list = m.get(lang);
3065                        if(list == null)
3066                            {
3067                            list = new ArrayList<String>();
3068                            m.put(lang, list);
3069                            }
3070                        list.add(gp.getString(key));
3071                        }
3072                    // Copy immutable versions of the by-language lists into the cache.
3073                    for(final String lang : m.keySet())
3074                        {
3075                        final ArrayList<String> arrayList = m.get(lang);
3076                        arrayList.trimToSize(); // Save space since this won't ever be expanded...
3077                        _gUT_cache.put(lang, Collections.unmodifiableList(arrayList)); // Atomically update cache.
3078                        }
3079                    }
3080                catch(final Throwable t)
3081                    {
3082                    System.err.println("ERROR: problem retrieving testimonial texts.");
3083                    t.printStackTrace();
3084                    return(""); // Nothing will be ready yet...
3085                    }
3086                }
3087    
3088            // Retrieve all quotes for the specified language.
3089            final List<String> quotes = _gUT_cache.get(l.getLocale().getLanguage());
3090            if(quotes != null)
3091                {
3092                assert(!quotes.isEmpty()); // Should be no empty lists.
3093                final int qSize = quotes.size();
3094                if(qSize == 1) { return(quotes.get(0)); }
3095                return(quotes.get(Rnd.fastRnd.nextInt(qSize)));
3096                }
3097    
3098            return(""); // Nothing available.
3099            }
3100    
3101    
3102        /**Private key used by getTrialData(); never null. */
3103        private static final AEPLinkedKey trailDataCacheKey = new AEPLinkedKey("trailDataCacheKey");
3104    
3105        /**Get the TrailData for a given trail exhibit (by full name); null if none.
3106         * This caches the result in the DSB, linked to the AEP,
3107         * so never retaining data for expired trails.
3108         * <p>
3109         * (This may negatively cache failure to load TrailData (for a while) for efficiency.)
3110         * <p>
3111         * Safe to apply to an arbitrary/unvetted exhibit name, even an invalid/null value.
3112         */
3113        @SuppressWarnings("unchecked")
3114        public static TrailData getTrailData(final DataSourceBean dsb,
3115                                             final Name.ExhibitFull trailExhibitFullName)
3116            {
3117            if((dsb == null) || (trailExhibitFullName == null)) { return(null); }
3118    
3119            // Get cache (or atomically create on first access, eg after an AEP change).
3120            ConcurrentMap<Name.ExhibitFull,TrailData> cache;
3121            while(null == (cache = (ConcurrentMap<Name.ExhibitFull,TrailData>) dsb.getAEPLinkedValue(trailDataCacheKey)))
3122                { dsb.putIfAbsentAEPLinkedValue(trailDataCacheKey, new ConcurrentHashMap<Name.ExhibitFull,TrailData>()); }
3123    
3124            // Return value immediately if already cached...
3125            final TrailData cachedValue = cache.get(trailExhibitFullName);
3126            if(cachedValue != null) { return(cachedValue); }
3127    
3128    //        // If the exhibit name is invalid (eg from bogus Web/unsafe input)
3129    //        // then don't both even trying to compute a value to save time and risk.
3130    //        if(!ExhibitName.validNameFinalComponentSyntax(trailExhibitName)) { return(null); }
3131    
3132            try
3133                {
3134                // Compute and cache value if possible.
3135                final TrailData value = TrailData.readTrailFromExhibit(dsb,
3136                        trailExhibitFullName, dsb.getLogger());
3137                if(value != null) { cache.putIfAbsent(trailExhibitFullName, value); }
3138                return(value);
3139                }
3140            catch(final Exception e)
3141                {
3142                // Report but absorb any error to avoid it propagating upwards...
3143                e.printStackTrace();
3144                return(null);
3145                }
3146            }
3147    
3148    
3149        /**Private key used by approxWordCount(); never null. */
3150        private static final AEPLinkedKey _awc_CacheKey = new AEPLinkedKey("_awc_CacheKey");
3151    
3152        /**Compute (crude) estimate of words in catalogue page for given exhibit; non-negative.
3153         * This is designed to be reasonably fast, though not necessarily amazingly accurate,
3154         * and is intended to help decide how many ad blocks a page may reasonably support.
3155         * <p>
3156         * This may cache its results against the AEP instance.
3157         * <p>
3158         * This counts 'non-furniture' words, ie those originating from the data itself,
3159         * including the exhibit name, exhibit description, tree AKA/description, etc,
3160         * with different constituents possibly weighted differently.
3161         * <p>
3162         * For simplicity, this does its computations based on the default site language,
3163         * even if there may be significant variation in apparent word count
3164         * for other localisations.
3165         *
3166         * @return zero in case of difficulty (eg exhibit does not exist),
3167         *     else approximate (positive) word count
3168         */
3169        public static final int approxWordCount(final DataSourceBean dsb,
3170                                                final Name.ExhibitFull fullExhibitName)
3171            {
3172            if((dsb == null) || (fullExhibitName == null))
3173                { throw new IllegalArgumentException(); }
3174    
3175            // Get existing (thread-safe) cache, or atomically create it if necessary.
3176            // The cache is a size-limited thread-safe Map with reasonably-fast get()
3177            // from full exhibit name to (positive) word count.
3178            // Races here may result in some wasted work but no errors.
3179            // Cap relative to heap size: about 8k (2^13) entries for 1GB (2^30) heap.
3180            MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer> cachedCounts;
3181            while((cachedCounts = GenUtils.<MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer>>cast(dsb.getAEPLinkedValue(_awc_CacheKey))) == null)
3182                { dsb.putIfAbsentAEPLinkedValue(_awc_CacheKey, MemoryTools.SimpleProbabilisticCache.<String, Integer>create(Math.max(128, (int) (Runtime.getRuntime().totalMemory() >>> 17)), _awc_CacheKey.comment)); }
3183            // Return the cached word count, if present.
3184            final Integer cachedCount = cachedCounts.get(fullExhibitName);
3185            if(cachedCount != null) { return(cachedCount.intValue()); }
3186    
3187            // Running word count for this exhibit.
3188            int wordCount = 0;
3189    
3190            // Count words in its name main component, roughly.
3191            wordCount += ExhibitName.getMainWordsCount(fullExhibitName, Collections.<String>emptySet());
3192            assert(wordCount > 0);
3193    
3194            // Get a default-locale LocaleBean
3195            // to extract the descriptive/AKA text with.
3196            final LocaleBeanBase lb = new LocaleBean();
3197    
3198            try
3199                {
3200                // Count words in any extant per-exhibit description text.
3201                final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
3202    
3203                // Return (uncached) zero for non-extant exhibit.
3204                if(null == aep.aeid.getStaticAttr(fullExhibitName))
3205                    { return(0); }
3206    
3207                final ExhibitPropsLoadable epl = aep
3208                        .getExhibitPropsLoadable(fullExhibitName);
3209                final String description = epl.getDescription();
3210                // Use the slow-but-sensible (eg for line-end handling) tokeniser.
3211                if(description != null)
3212                    { wordCount += (new StringTokenizer(description)).countTokens(); }
3213    
3214                // Get AKA/treedesc text with as little markup as we can easily manage.
3215                final String akaText = GenUtils.getLocalisedTreeDesc(aep,
3216                        fullExhibitName, lb, true, true, false, false).toString();
3217                // Only count real spaces as word boundaries
3218                // and halve the raw word count to allow for markup, repetition, etc.
3219                // FIXME: use find() or somesuch to avoid redundant construction of substrings.
3220                if(!akaText.isEmpty())
3221                    { wordCount += (akaText.split(" +").length >>> 1); }
3222    
3223                // Add (at low weighting) word count of section text.
3224                final CharSequence sectionDesc = GenUtils.getLocalisedSectionDesc(aep,
3225                        ExhibitName.getCategoryComponent(fullExhibitName),
3226                        lb);
3227                // Only count real spaces as word boundaries
3228                // and quarter the raw word count to allow for markup and lack of uniqueness
3229                // ie the fact that descriptive text is shared between related exhibits.
3230                // FIXME: use find() or somesuch to avoid redundant construction of substrings.
3231                if(sectionDesc != null)
3232                    { wordCount += (sectionDesc.toString().split(" +").length >>> 2); }
3233                }
3234            catch(final IOException e)
3235                {
3236                // Shouldn't happen, but return what we have without cacheing...
3237    if(IsDebug.isDebug) { System.out.println("[WARNING: partial word count "+wordCount+" on cat page for "+fullExhibitName+".]"); }
3238                return(wordCount);
3239                }
3240    
3241            // Cache the (complete) approx word count for next time...
3242            cachedCounts.put(fullExhibitName, Integer.valueOf(wordCount));
3243    
3244    if(IsDebug.isDebug) { System.out.println("[Approx word count "+wordCount+" on cat page for "+fullExhibitName+".]"); }
3245            return(wordCount);
3246            }
3247    
3248        /**Events to be examined by isPopularCatalogueEntry(); never null.
3249         * These must all have VLONG data stored.
3250         */
3251        private static final List<SimpleVariableDefinition> _iPCE_vars = Arrays.asList(new SimpleVariableDefinition[] {
3252            SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
3253            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
3254            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
3255            });
3256    
3257        /**Returns true iff the named exhibit and/or catalogue page is popular (well visited/downloaded).
3258         * Uses the history to decide if a catalogue page and its exhibit
3259         * are frequently visited/downloaded
3260         * (wrt other catalogue pages globally and locally).
3261         *
3262         * @param vars  source of event history; never null
3263         * @param exhibitFullName  full exhibit name; never null
3264         * @return true if popular, false otherwise
3265         */
3266        public static boolean isPopularCatalogueEntry(final SimpleVariablePipelineIF vars,
3267                                                      final CharSequence exhibitFullName)
3268            {
3269            if((vars == null) || (exhibitFullName == null)) { throw new IllegalArgumentException(); }
3270    
3271            final String shortName = ExhibitName.getFileComponent(exhibitFullName).toString();
3272            // TODO: allow for shortened unique-key form in future...
3273    
3274            for(final SimpleVariableDefinition var : _iPCE_vars)
3275                {
3276                // Look for activity yesterday and today.
3277                final EventVariableValue eventsToday = vars.getEventValue(var, EventPeriod.VLONG, true);
3278                if(eventsToday.getRank(shortName) < (eventsToday.getTotalDistinctValues()/2))
3279                    { return(true); }
3280                final EventVariableValue eventsYesterday = vars.getEventValue(var, EventPeriod.VLONG, false);
3281                if(eventsYesterday.getRank(shortName) < (eventsYesterday.getTotalDistinctValues()/3))
3282                    { return(true); }
3283    
3284                // Now we look into the full collected history for this value.
3285                final EventVariableValue[] all = vars.getEventValues(var, EventPeriod.VLONG, 0, null);
3286                // If no "all" history at all or not enough to be significant
3287                // then the item may just not be especially popular...
3288                if(all.length < 1) { continue; }
3289                final EventVariableValue allEVV = all[0];
3290                if(allEVV == null) { continue; }
3291                if(allEVV.getRank(shortName) < (allEVV.getTotalDistinctValues()/4))
3292                    { return(true); }
3293                }
3294    
3295            return(false); // Not popular apparently.
3296            }
3297    
3298    
3299        /**Private key used by findLatestCodeBundle(); never null. */
3300        private static final AEPLinkedKey findLatestCodeBundleKey = new AEPLinkedKey("findLatestCodeBundleKey");
3301    
3302        /**Name of the section/dir in which code/doc bundles are filed. */
3303        private static final String CODE_SECTION_DIR = "code";
3304    
3305        /**Returns full exhibit name for latest version of a code bundle, or null if none.
3306         * This locates the latest (with a major-minor-micro versioning) bundle
3307         * in the 'code' section, for the given prefix, or null of none.
3308         * <p>
3309         * The author and extension are ignored for selection purposes.
3310         * <p>
3311         * For example, for the prefix/argument 'javadoc',
3312         * if the code section includes the files
3313         * 'javadoc-1-2-3-DHD.zip' and javadoc-1-10-1-ANON.zip'
3314         * this will return 'code/javadoc-1-10-1-ANON.zip'.
3315         * <p>
3316         * To be found a bundled archive name must be exactly of the form:<br />
3317         * <code>prefix-major-minor-micro-AUTH.XTN</code><br />
3318         * where the major, minor and micro components are (small, non-negative) integers.
3319         * There must be no attributes present.
3320         * <p>
3321         * (Note that a '-' is appended to the supplied prefix.)
3322         * <p>
3323         * This may cache the results against the AEP instance,
3324         * since the lookup may happen may times
3325         * and we may have to search through a fair amount of data for each lookup.
3326         * Note: this does not cache negative results
3327         * in part to bound the amount of space that can be consumed.
3328         *
3329         * @param dsb  current data source; never null
3330         * @param prefix  legitimate short-name as bundle name; never null nor empty
3331         */
3332        @SuppressWarnings("unchecked")
3333        public static Name.ExhibitFull findLatestCodeBundle(final DataSourceBean dsb, final String prefix)
3334            {
3335            if(dsb == null) { throw new IllegalArgumentException(); }
3336            if((prefix == null) || (prefix.length() < 1)) { throw new IllegalArgumentException(); }
3337    
3338            // Look up the prefix in our (thread-safe) cache from prefix to full exhibit name.
3339            // This is AEP-linked, so is automatically discarded when a new AEP appears.
3340            ConcurrentMap<String,Name.ExhibitFull> cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
3341            Name.ExhibitFull result = null;
3342            if(cache != null)
3343                {
3344                result = cache.get(prefix);
3345                if(result != null) { return(result); /* Return cached result! */ }
3346                }
3347    
3348            // Do the lookup.
3349            // First quickly filter for only plausible candidates
3350            // with the correct prefix and in the correct section.
3351            final String fullNamePrefix = CODE_SECTION_DIR + '/';
3352            final String shortNamePrefix = prefix + "-";
3353            final AllExhibitProperties aep;
3354            try { aep = dsb.getAllExhibitProperties(-1); } catch(final IOException e) { return(null); /* Shouldn't really happen anyway... */ }
3355            final Name.ExhibitFull candidates[] = aep.select(new AEPFilter() {
3356                /* (non-Javadoc)
3357                 * @see org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter#accept(org.hd.d.pg2k.svrCore.AllExhibitProperties, java.lang.String)
3358                 */
3359                //@Override
3360                public boolean accept(final AllExhibitProperties aep, final Name.ExhibitFull fullExhibitName)
3361                    {
3362                    // Quickly filter for the correct section.
3363    //                if(!TextUtils.contentEquals(ExhibitName.getCategoryComponent(fullExhibitName), CODE_SECTION_DIR)) { return(false); }
3364                    if(!TextUtils.startsWith(fullExhibitName, fullNamePrefix)) { return(false); }
3365                    final Name.ExhibitShort shortName = fullExhibitName.getShortName();
3366                    // Quickly filter for the correct bundle name.
3367                    if(!TextUtils.startsWith(shortName, shortNamePrefix)) { return(false); }
3368                    // OK, we can check more thoroughly later..
3369                    return(true);
3370                    }
3371                }, null, 0);
3372            if(candidates.length == 0)
3373                {
3374    System.err.println("WARNING: no candidate bundles with prefix "+prefix);
3375                return(null); /* No candidates... */
3376                }
3377    
3378            // Now find the highest-versioned syntactically-valid candidate, if any...
3379            int bestMajor = -1;
3380            int bestMinor = -1;
3381            int bestMicro = -1;
3382            for(final Name.ExhibitFull c : candidates)
3383                {
3384                final Name.ExhibitShort shortName = c.getShortName();
3385                assert(TextUtils.startsWith(shortName, shortNamePrefix)) : "should have the correct bundle name";
3386                // Rip off prefix and tokenise remainder (and ignore trailing AUTH.XTN part as last token).
3387                final int snpl = shortNamePrefix.length();
3388                final String[] tokens = shortName.subSequence(snpl, shortName.length()).toString().split(ExhibitName.WORD_SEPS);
3389                if(tokens.length != 4)
3390                    {
3391    System.err.println("WARNING: badly-named (wrong token count "+tokens.length+") candidate bundle for prefix "+prefix+": "+c);
3392                    continue; /* Invalid format. */
3393                    }
3394                try
3395                    {
3396                    final int maj = Integer.parseInt(tokens[0], 10);
3397                    assert(maj >= 0);
3398                    final int min = Integer.parseInt(tokens[1], 10);
3399                    assert(min >= 0);
3400                    final int mic = Integer.parseInt(tokens[2], 10);
3401                    assert(mic >= 0);
3402                    if(maj < bestMajor) { continue; /* Too old. */ }
3403                    if(maj > bestMajor)
3404                        { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
3405                    if(min < bestMinor) { continue; /* Too old. */ }
3406                    if(min > bestMinor)
3407                        { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
3408                    if(mic < bestMicro) { continue; /* Too old. */ }
3409                    if(mic > bestMicro)
3410                        { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
3411                    }
3412                catch(final NumberFormatException e)
3413                    {
3414    System.err.println("WARNING: badly-named (NumberFormatException + "+e.getMessage()+") candidate bundle for prefix "+prefix+": "+c);
3415                    continue; /* Invalid format. */
3416                    }
3417                }
3418    
3419            // If the result is positive then cache it,
3420            // creating a new (thread-safe) cache if necessary.
3421            // Negative results are not cached.
3422            if(result != null)
3423                {
3424                while(cache == null)
3425                    {
3426                    dsb.putIfAbsentAEPLinkedValue(findLatestCodeBundleKey, new ConcurrentHashMap<String, Name.ExhibitFull>());
3427                    // Retrieve whatever the current cache now is (there may have been a race).
3428                    cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
3429                    }
3430                // Update the cached value if not already done...
3431                cache.putIfAbsent(prefix, result);
3432    
3433    if(IsDebug.isDebug) { System.out.println("INFO: found bundle for prefix "+prefix+" as "+result); }
3434                }
3435    
3436    else { System.err.println("WARNING: found no bundle for prefix "+prefix); }
3437    
3438            // Return the result!
3439            return(result);
3440            }
3441        }