001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.webSvr.util;
031
032 import java.awt.image.BufferedImage;
033 import java.awt.image.ColorModel;
034 import java.awt.image.WritableRaster;
035 import java.io.IOException;
036 import java.io.InputStream;
037 import java.lang.ref.SoftReference;
038 import java.lang.ref.WeakReference;
039 import java.net.InetAddress;
040 import java.net.MalformedURLException;
041 import java.net.URL;
042 import java.net.UnknownHostException;
043 import java.util.ArrayList;
044 import java.util.Arrays;
045 import java.util.BitSet;
046 import java.util.Collection;
047 import java.util.Collections;
048 import java.util.Comparator;
049 import java.util.Enumeration;
050 import java.util.HashMap;
051 import java.util.HashSet;
052 import java.util.Iterator;
053 import java.util.LinkedList;
054 import java.util.List;
055 import java.util.ListIterator;
056 import java.util.Map;
057 import java.util.ResourceBundle;
058 import java.util.Set;
059 import java.util.SortedSet;
060 import java.util.StringTokenizer;
061 import java.util.TreeSet;
062 import java.util.concurrent.Callable;
063 import java.util.concurrent.ConcurrentHashMap;
064 import java.util.concurrent.ConcurrentMap;
065 import java.util.concurrent.Future;
066 import java.util.regex.Pattern;
067
068 import javax.servlet.ServletContext;
069 import javax.servlet.http.HttpServletRequest;
070
071 import org.hd.d.pg2k.svrCore.AccessionData;
072 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
073 import org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter;
074 import org.hd.d.pg2k.svrCore.Compact7BitString;
075 import org.hd.d.pg2k.svrCore.CoreConsts;
076 import org.hd.d.pg2k.svrCore.ExhibitName;
077 import org.hd.d.pg2k.svrCore.ExhibitPropsComputable;
078 import org.hd.d.pg2k.svrCore.ExhibitPropsComputableMutable;
079 import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
080 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
081 import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
082 import org.hd.d.pg2k.svrCore.GenUtils;
083 import org.hd.d.pg2k.svrCore.HostUtils;
084 import org.hd.d.pg2k.svrCore.ImageUtils;
085 import org.hd.d.pg2k.svrCore.LocaleBeanBase;
086 import org.hd.d.pg2k.svrCore.MemoryTools;
087 import org.hd.d.pg2k.svrCore.Name;
088 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
089 import org.hd.d.pg2k.svrCore.Rnd;
090 import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
091 import org.hd.d.pg2k.svrCore.TextUtils;
092 import org.hd.d.pg2k.svrCore.ThreadUtils;
093 import org.hd.d.pg2k.svrCore.Tuple;
094 import org.hd.d.pg2k.svrCore.VarTools;
095 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
096 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME.ExhibitTypeParameters;
097 import org.hd.d.pg2k.svrCore.location.GeoProximity;
098 import org.hd.d.pg2k.svrCore.location.GeoUtils;
099 import org.hd.d.pg2k.svrCore.location.LoadBalancingUtils;
100 import org.hd.d.pg2k.svrCore.props.GenProps;
101 import org.hd.d.pg2k.svrCore.props.LocalProps;
102 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
103 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
104 import org.hd.d.pg2k.svrCore.vars.InstanceID;
105 import org.hd.d.pg2k.svrCore.vars.SimpleVarStats;
106 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
107 import org.hd.d.pg2k.svrCore.vars.SimpleVariablePipelineIF;
108 import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
109 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
110 import org.hd.d.pg2k.webSvr.catalogue.TrailData;
111 import org.hd.d.pg2k.webSvr.exhibit.BuiltInFilters;
112 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
113 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean.AEPLinkedKey;
114 import org.hd.d.pg2k.webSvr.exhibit.FilterExpr;
115 import org.hd.d.pg2k.webSvr.exhibit.TreeFilterBean;
116 import org.hd.d.pg2k.webSvr.virtualHosts.VirtualHosts;
117 import org.hd.d.tmf.ThroughputMonitorFilterBase;
118 import org.w3c.dom.Node;
119
120 import ORG.hd.d.IsDebug;
121
122 /**Web-server-related utility functions.
123 * This is for algorithms only of interest to Web apps, often JSPs.
124 * <p>
125 * One advantage of having code here rather than in-line in a JSP
126 * is that is is pre-compiled off-line for speed and robustness;
127 * code here is also easier to test.
128 */
129 public final class WebUtils
130 {
131 /**Prevent construction of an instance. */
132 private WebUtils() { }
133
134
135 /**Name of event/variable to which we post a voter's approximate geo location. */
136 public static final SimpleVariableDefinition VOTER_LOCATION_STATS_EVENT_DEF = SystemVariables.GENSTATS_STRING_GLOBAL_EVENT;
137
138 /**Prefix of event value for voter's approximate geo location. */
139 public static final String VOTER_LOCATION_STATS_EVENT_PREFIX = "voterLocation=";
140
141
142 /**Private limited-size cache for chooseMirrorHostToBalanceLoad() for best mirror for given client IP.
143 * Map from client IP address to full mirror host name and time mirror was computed.
144 * <p>
145 * A lock can be held on this object to make multiple operations atomic.
146 * <p>
147 * Entries in this map go stale very quickly
148 * (in a matter of tens of seconds at most)
149 * and are mainly here to avoid repeated recalculations while preparing
150 * one HTML page to serve to the client for example.
151 * <p>
152 * This has a fixed maximum size to limit memory consumption
153 * and to avoid the need for explicit cleaning of old entries
154 * (they can just hang around until displaced).
155 * We vary this maximum size a little to try to avoid deadly embraces.
156 * <p>
157 * We allow this to be entirely discarded when memory is very stressed.
158 * <p>
159 * A size limit of a few thousand to allow for a few mega-proxies,
160 * and a time limit of some tens of seconds,
161 * is probably about right.
162 */
163 private static final MemoryTools.SimpleLRUMapAutoSizeForHitRate<InetAddress,Tuple.Pair<String,Long>> _cache_cMHTBL_byClient =
164 MemoryTools.SimpleLRUMapAutoSizeForHitRate.<InetAddress, Tuple.Pair<String,Long>>create(0, 1001 + Rnd.fastRnd.nextInt(601), "_cache_cMHTBL_byClient");
165
166 /**Maximum time an entry in _cache_cMHTBL_byClient is valid in ms; strictly positive.
167 * Should be no more than the maximum staleness of the mirror liveness variables,
168 * and is mainly intended to avoid repeated recalculations during the preparation
169 * of a single HTML page for a user giving multiple different values on that one page...
170 */
171 private static final int MAX_cache_cMHTBL_byClient_AGE_MS = 1001 +
172 VarTools.MIN_AGE_MS/3;
173
174 /**Lock to prevent multiple simultaneous polls of the main or "loopback" URLs at once.
175 * Otherwise multiple incoming requests may trigger multiple outgoing polls
176 * which actually overwhelm (or trigger throttling by) the remote machine(s).
177 */
178 private static final Object _lock_fallback_URL_poll = new Object();
179
180 /**Fraction of time we redistribute load to possibly non-optimal mirrors; strictly positive.
181 * A higher value means that we redistribute traffic
182 * to possibly-non-optimal mirrors less often.
183 * <p>
184 * A little bit of redistributed traffic makes the system more robust
185 * and primed for action, but may give some unlucky users poor performance.
186 * <p>
187 * We make this probability lower (this value higher)
188 * as the potential (maximum) cost of a wrong choice goes up.
189 * The biggest error we can make for most user is to send them to
190 * the furthest corner of the planet when there is a same-COUNTRY server available.
191 */
192 private static final int REDIST_FRAC = GeoProximity.COUNTRY.getCloseness() + Rnd.fastRnd.nextInt(GeoProximity.COUNTRYGROUP.getCloseness());
193
194 /**If true, always select the fastest available mirror; don't use any stochastic factor. */
195 private static final boolean ALWAYS_SELECT_FASTEST_MIRROR = false;
196
197 /**If true, then ignore mirrors not close to the user unless we're desperate to shed load.
198 * This can take precedence over ALWAYS_SELECT_FASTEST_MIRROR if the fastest mirrors are remote.
199 */
200 private static final boolean IGNORE_REMOTE_MIRRORS_UNLESS_OVERLOADED = true;
201
202 /**Returns true if we should ignore remote (proximity 'NONE') mirrors as candidates right now.
203 * We will generally ignore 'NONE' proximity mirrors
204 * unless we're busy or trying to conserve power or otherwise needing to reduce workload.
205 */
206 private static final boolean ignoreRemoteMirrorsNow(final ServletContext ctxt)
207 {
208 if(!IGNORE_REMOTE_MIRRORS_UNLESS_OVERLOADED) { return(false); } /* Never ignore. */
209
210 // Desperate to shed load, thus don't ignore any otherwise-good mirrors.
211 if(GenUtils.mustConservePower()) { return(false); }
212 if(WebUtils.isOverloaded(ctxt)) { return(false); }
213
214 return(true); // Not desperate to shed load, so can be picky about mirrors...
215 }
216
217 /**Get name (and optional port) of server for best user experience; never null/empty.
218 * This is a hostname[:port] suitable to insert into the URL
219 * for retrieving an exhibit or another page, etc.
220 * <p>
221 * This will attempt to pick one based on a number of factors
222 * such as current load, client proximity, which mirrors are up,
223 * which mirrors are one the same AEP version as us, etc.
224 * <p>
225 * If this cannot chose a suitable server/mirror name
226 * it returns the generic "main" name, CoreConsts.MAIN_DATA_HOST.
227 * <p>
228 * Though either or both arguments can be null,
229 * this is most likely to give useful non-default answers
230 * when both values are non-null.
231 * <p>
232 * A couple of items of hardwired policy need to be pointed out:
233 * <ul>
234 * <li>If the client for this request looks as if it is from a spider
235 * (and therefore may cache the results for a long time)
236 * then we always return the generic main hostname for speed
237 * and for long-term robustness.
238 * <li>If there is no request/IP address for the client
239 * (so we could do no more than statistical load balancing at best)
240 * then we always return the generic main hostname for speed
241 * and in the hope that we will be doing
242 * DNS-based load/proximity stuff sometime.
243 * </ul>
244 * <p>
245 * Though we may sometimes randomise the selected mirror to spread load around,
246 * we try to avoid egregious selections, eg on the other side of the planet.
247 *
248 * @param request the incoming request or null if none available
249 * @param vars access to the system variables or null if not available
250 *
251 * @return a "mirror" subdomain of CoreConsts.MAIN_DATA_HOST,
252 * or by default CoreConsts.MAIN_DATA_HOST
253 * thus relying on DNS-based balancing
254 */
255 public static String chooseMirrorHostToBalanceLoad(final HttpServletRequest request,
256 final DataSourceBean vars)
257 {
258 // The default host name to fall back to
259 // in the absence of any available/suitable mirrors.
260 // We must not cache the fallbackHost by IP address
261 // because multiple clients may share a single IP address (eg at a large proxy)
262 // and the fallbackHost may be computed depending on client-specific features.
263 String fallbackHost = CoreConsts.MAIN_DATA_HOST;
264
265 // Get the client's IP address if possible.
266 // If the IP address is available
267 // then see if we have a valid cached result for this exact client IP and return it immediately!
268 // This is the fastest-possible path through the routine.
269 InetAddress clientIP = null;
270 if(request != null)
271 {
272 try
273 {
274 clientIP = InetAddress.getByName(request.getRemoteAddr());
275 assert(clientIP != null);
276
277 // Look in cache for non-stale entry for this exact client IP...
278 // (Fall through to do normal computation if not present.)
279 synchronized(_cache_cMHTBL_byClient)
280 {
281 final Tuple.Pair<String,Long> entry = _cache_cMHTBL_byClient.get(clientIP);
282 if(entry != null)
283 {
284 if(entry.second.longValue() >= System.currentTimeMillis())
285 { return(entry.first); }
286 // Remove the useless stale entry that will otherwise keep being held onto LRU!
287 _cache_cMHTBL_byClient.remove(clientIP);
288 }
289 }
290 }
291 catch(final UnknownHostException e) { /* Cannot parse client address... */ }
292 }
293 // No request and thus no client IP address,
294 // so always return the generic/fallback hostname.
295 else
296 { return(fallbackHost); }
297
298
299 // If the client looks like it might cache the result for a long time
300 // then we give it the generic domain for robustness (and speed).
301 // The most obvious case is spidering of pages to go in a search engine
302 // for recall years later when specific mirrors may have come and gone.
303 if((request != null) && requestProbablyFromSpider(request))
304 { return(fallbackHost); }
305
306
307 // If we can extract the host from the original request URL,
308 // and it is a legitimate name/alias for the site,
309 // then make it (and any non-standard port) the fallback.
310 if(request != null)
311 {
312 try
313 {
314 final URL requestURL = new URL(request.getRequestURL().toString());
315 final String ourName = requestURL.getHost();
316 final int ourPort = requestURL.getPort();
317 final String fullName = ((ourPort == 80) || (ourPort == -1)) ? ourName :
318 (ourName + ':' + ourPort);
319 if(!fullName.equals(fallbackHost) &&
320 (null != VirtualHosts.getVirtualHostDetails(ourName, null)))
321 { fallbackHost = fullName; }
322 }
323 catch(final MalformedURLException e)
324 {
325 e.printStackTrace(); // Shouldn't really happen...
326 }
327 }
328
329 //System.out.println("chooseMirrorHostToBalanceLoad(): *** START CALC *** : clientIP = " + clientIP);
330
331
332 // In the case that we have the client's request available
333 // and the default/canonical domain/host seems to be unavailable,
334 // then we'll use the hostname the client specified for us
335 // in its request URL as the fallback.
336 // (This also allows off-line testing of the Gallery!)
337 if((request != null) && (fallbackHost.equals(CoreConsts.MAIN_DATA_HOST)))
338 {
339 // Prevent multiple redundant polls on the main/own URLs from occuring at once,
340 // which may have the effect of ovewhelming the remote site.
341 synchronized(_lock_fallback_URL_poll)
342 {
343 final Boolean mainDomainIsUp =
344 LoadBalancingUtils.testIfHTTPServerIsUp(CoreConsts.MAIN_DATA_HOST, true);
345
346 // Try the client's suggested hostname
347 // if the main host is definitely down
348 // (because otherwise we are stuck!).
349 // Security note: we must not trust this client-supplied
350 // information nor cache it.
351 if(Boolean.FALSE.equals(mainDomainIsUp))
352 {
353 try
354 {
355 // If the main host is not available,
356 // but the name under which the client arrived at this server *is*,
357 // then make the client's name for us the fallback hostname.
358 // We don't cache these client-supplied names and their liveness results,
359 // since we cannot necessarily trust them and they may be unbounded in number.
360 final URL requestURL = new URL(request.getRequestURL().toString());
361 final String ourName = requestURL.getHost();
362 final int ourPort = requestURL.getPort();
363 final String fullName = ((ourPort == 80) || (ourPort == -1)) ? ourName :
364 (ourName + ':' + ourPort);
365 // Don't bother testing liveness
366 // if the client's request hostname is already the fallback host...
367 // Nor if this does not look like one of our valid virtual names
368 // (to avoid pestering third-party sites by mistake/proxy).
369 if(!fullName.equals(fallbackHost) &&
370 (null != VirtualHosts.getVirtualHostDetails(ourName, null)))
371 {
372 // Do not look these up asynchronously
373 // nor cache the results.
374 if(Boolean.TRUE.equals(LoadBalancingUtils.testIfHTTPServerIsUp(fullName, false)))
375 {
376 System.err.println("WARNING: chooseMirrorHostToBalanceLoad(): having to fall back to our host as " + fullName);
377 fallbackHost = fullName;
378 }
379 }
380 }
381 catch(final IOException e) { } // Ignore/absorb.
382 }
383 }
384 }
385
386
387 // If we don't have access to variables to check for live mirrors
388 // then return the fallback host immediately...
389 // Don't cache this result,
390 // as we didn't have all information to hand that we should have had.
391 if(vars == null)
392 { return(fallbackHost); /* Cannot find a working mirror, and can't cache this result. */ }
393
394
395 // See what mirrors *claim* to be available...
396 // We will check in a moment if they actually are...
397 final Map<String,Long> liveMirrors = LoadBalancingUtils.getActiveMirrors(vars, true);
398 // If there are no mirrors claiming to be live
399 // then return immediately with the generic name.
400 if(liveMirrors.size() < 1)
401 { return(fallbackHost); /* Cannot find a working mirror, and can't cache this result. */ }
402
403 if(LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): mirror bandwidths: " + liveMirrors); }
404
405
406 // Filter out any mirrors not using the same AEP version as us.
407 // Else the user may, for example, get a 404 on a new exhibit!
408 // TODO: consider ignoring this filter if we're desperate to reduce load.
409 try
410 {
411 final AllExhibitProperties aep = vars.getAllExhibitProperties(-1);
412 final Long ourHash = new Long(aep.longHash);
413
414 // First collect all those mirrors whose hash is correct...
415 // Usually it will be all of them.
416 final Set<InstanceID> goodMirrorIDs = new HashSet<InstanceID>(liveMirrors.size());
417 final SimpleVariableValue hashes =
418 vars.getVariable(SystemVariables.ThroughputMonitorFilter_AEP_LONGHASH);
419 if(hashes != null)
420 {
421 // Zap any stale values...
422 final SimpleVariableValue h2 = hashes.removeAllKeysOlder(System.currentTimeMillis() - VarTools.MIN_AGE_MS);
423 final Map<InstanceID,SimpleVariableValue> globalMap = h2.getGlobalMap();
424 if(globalMap != null)
425 {
426 for(final InstanceID id : globalMap.keySet())
427 {
428 if(ourHash.equals(globalMap.get(id).getValue()))
429 { goodMirrorIDs.add(id); /* Found a good mirror... */ }
430 }
431 }
432 }
433
434 // If none of the candidate mirrors has the right AEP hash
435 // then return immediately but don't cache this result.
436 if(goodMirrorIDs.size() < 1)
437 {
438 if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): no other mirror on same AEP hash as us"); }
439 return(fallbackHost); /* Cannot find a suitable mirror, and can't cache this result. */
440 }
441
442 if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): mirrors on same AEP hash as us: " + new ArrayList<InstanceID>(goodMirrorIDs)); }
443
444 // Convert list of good IDs to list of good mirrors...
445 final SimpleVariableValue activeMirrors =
446 vars.getVariable(SystemVariables.ThroughputMonitorFilter_ACTIVE_MIRROR_NAME);
447 final Set<String> goodMirrorTags = new HashSet<String>(goodMirrorIDs.size());
448 if(activeMirrors != null)
449 {
450 final Map<InstanceID,SimpleVariableValue> globalMap = activeMirrors.getGlobalMap();
451 if(globalMap != null)
452 {
453 for(final InstanceID id : goodMirrorIDs)
454 {
455 final SimpleVariableValue svv = globalMap.get(id);
456 if(svv != null)
457 {
458 final Object value = svv.getValue();
459 if(value instanceof String)
460 { goodMirrorTags.add((String) value); }
461 }
462 }
463 }
464 }
465
466 // Iterate over live mirrors, zapping any unsuitable ones.
467 for(final Iterator<String> it = liveMirrors.keySet().iterator(); it.hasNext(); )
468 {
469 final String mirror = it.next();
470 if(!goodMirrorTags.contains(mirror)) { it.remove(); }
471 }
472
473 // If none of the candidate mirrors has the right AEP hash,
474 // then return immediately but don't cache this result.
475 if(liveMirrors.size() < 1)
476 {
477 if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): no mirror on same AEP hash as us from candidate set"); }
478 return(fallbackHost); /* Cannot find a suitable mirror, and can't cache this result. */
479 }
480 }
481 catch(final Exception e)
482 {
483 // Could not filter by hash for some reason...
484 // Whinge, but continue, as this is not essential...
485 e.printStackTrace();
486 }
487
488
489 // Sort the mirrors into bandwidth order and drop any known to be dead
490 // (or unreachable by us).
491 final List<String> orderedList = LoadBalancingUtils.orderMirrorTagsBestBandwidthFirst(liveMirrors);
492 // If they've all been filtered out
493 // then return immediately with the fallback name.
494 if(orderedList.size() < 1)
495 { return(fallbackHost); /* Cannot find a working mirror, and can't cache this result. */ }
496
497
498 // If this host is a mirror
499 // and this mirror appears at all in the list of surviving candidates
500 // and the user has explicitly named this mirror alias as the hostname
501 // then return this mirror name as the preferred mirror
502 // so that the user's explicit mirror choice is respected.
503 if(request != null)
504 {
505 final String mirrorTag = LocalProps.getMirrorTag();
506 if((mirrorTag != null) && orderedList.contains(mirrorTag))
507 {
508 try
509 {
510 final URL requestURL = new URL(request.getRequestURL().toString());
511 final String hostName = requestURL.getHost();
512 final String mirrorName = LoadBalancingUtils.makeMirrorNameFromTag(mirrorTag);
513 if(mirrorName.equalsIgnoreCase(hostName))
514 { return(mirrorName); }
515 }
516 catch(final MalformedURLException e)
517 { /* Ignore/absorb this error... */ }
518 }
519 }
520
521
522 // Filter out any mirrors with no proximity to the user
523 // unless we are desperate to shed load
524 // in which case we will consider all legitimate/running mirrors.
525 // Note that if removing non-proximal mirrors empties the list
526 // then we use it as-was.
527 String clientLocationQuick = null; // Quick attempt to locate user.
528 if(IGNORE_REMOTE_MIRRORS_UNLESS_OVERLOADED &&
529 (clientIP != null) &&
530 ignoreRemoteMirrorsNow((vars == null) ? null : vars.getServletContext()))
531 {
532 clientLocationQuick = GeoUtils.getRegionByAddress(clientIP, true); // Quick lookup, minimum resources.
533 if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): considering ignoring mirrors with client proximity 'NONE' for client@"+clientLocationQuick); }
534 // Only filter out mirrors if we have extracted a fairly definite client location.
535 if(GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationQuick) ||
536 GeoUtils.isSyntaticallyValidRegistryName(clientLocationQuick))
537 {
538 final List<String> toRemove = new ArrayList<String>(orderedList.size());
539 if(IsDebug.isDebug || LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): ignoring mirrors with client proximity 'NONE'"); }
540 for(final String tag : orderedList)
541 {
542 final GeoUtils.CCTLD mirrorCC;
543 try { mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); }
544 catch(final IllegalArgumentException e) { e.printStackTrace(); continue; /* FIXME: shouldn't happen... */ }
545 // Kill this tag if no proximity (or unknown proximity) to client.
546 if(GeoUtils.computeProximity(clientLocationQuick, mirrorCC) == GeoProximity.NONE)
547 { toRemove.add(tag); }
548 }
549 // If we've selected to remove nothing or everything then leave the list intact.
550 // In part this helps ensure that the answer may be cacheable.
551 if(!toRemove.isEmpty() && (toRemove.size() < orderedList.size()))
552 { orderedList.removeAll(toRemove); }
553 }
554 }
555
556
557 // If we always select the fastest mirror,
558 // or if we only have one candidate,
559 // then work through the list fastest-first for speed/simplicity...
560 if(ALWAYS_SELECT_FASTEST_MIRROR || (orderedList.size() == 1))
561 {
562 //System.out.println("chooseMirrorHostToBalanceLoad(): mirrors to try in order: " + orderedList);
563
564 // One at a time,
565 // from first (best) to worst (last)
566 // check if we know the mirror to be alive or dead,
567 // of if we have an expired cache entry,
568 // then check it explicitly and update the cache accordingly.
569 // Return the first (fastest) live mirror found.
570 final String result = LoadBalancingUtils.findFirstWorkingMirror(orderedList);
571 if(result != null)
572 {
573 // File the result and return it...
574 if(clientIP != null)
575 { _cache_cMHTBL_byClient.put(clientIP, new Tuple.Pair<String, Long>(result, new Long(System.currentTimeMillis() + MAX_cache_cMHTBL_byClient_AGE_MS))); }
576 return(result);
577 }
578
579 // Cannot choose a specific mirror so return the fallback host name.
580 // If nothing else, and if this is the generic name,
581 // then this may achieve some DNS-based load balancing.
582 return(fallbackHost); // Cannot find a working mirror.
583 }
584
585 // Compute the client's location as a country code or
586 // (less good) a region/registry
587 // if there is actually a choice of mirrors...
588 // Compute a rough-and-ready quick value,
589 // and sometimes additionally try to compute a more accurate value,
590 // but more slowly.
591 String clientLocationSlow = null;
592 if(clientIP != null)
593 {
594 // Get approximate location first with a quick lookup (if not already done)...
595 // We'll be happy if this gives a country or a region/registry name.
596 // We don't need complete accuracy on this,
597 // just a rough indication for routing.
598 if(null == clientLocationQuick)
599 { clientLocationQuick = GeoUtils.getRegionByAddress(clientIP, true); }
600
601 // If we haven't got a country OR region code
602 // then do a full (slow, expensive) lookup.
603 // We could skip this if all available mirrors are in one country/region.
604 if(!GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationQuick) &&
605 !GeoUtils.isSyntaticallyValidRegistryName(clientLocationQuick))
606 { clientLocationSlow = GeoUtils.getRegionByAddress(clientIP, false); }
607
608 //System.out.println("chooseMirrorHostToBalanceLoad(): client location: " + clientLocation);
609 }
610
611
612 // If we appear not to have any sort of valid country code or region
613 // then we're not going to be able to do better
614 // than just basing our choice on the available bandwidth.
615 final boolean bandwidthOnly =
616 (!GeoUtils.isSyntaticallyValidRegistryName(clientLocationQuick) &&
617 !GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationQuick)) &&
618 (!GeoUtils.isSyntaticallyValidRegistryName(clientLocationSlow) &&
619 !GeoUtils.CCTLD.isSyntaticallyValidCcTLD(clientLocationSlow));
620
621 // Choose from available mirrors based on more factors than just bandwidth if possible.
622 // We include a notion of proximity (ie RTT, packet loss, reliability, etc),
623 // and stochastically spread load amongst servers in proportion to their capacity,
624 // though having discarded the worst entries so as never to give too poor a performance.
625 final Map<String,Long> weightings = new HashMap<String, Long>(1 + 2*orderedList.size());
626 // By default, retain just the entries for items still in the ordered list of candidiates.
627 for(final String tag : orderedList)
628 { weightings.put(tag, liveMirrors.get(tag)); }
629
630 if(!bandwidthOnly)
631 {
632 // Create a new map of available bandwidth multiplied by proximity
633 // for all the remaining candidates
634 // TODO (possibly filtering out those with least-good proximity to the user)
635 // and use it to re-sort our ordered list.
636 // Compute two versions: one with our quick result and one with our slow result,
637 // and use whichever gives the highest "top" weighting.
638
639 // The best "top" weighting that we found so far.
640 // Intially this is the bandwidth of the fastest mirror.
641 final long bestWeight = liveMirrors.get(orderedList.get(0)).longValue();
642
643 // Compute weightings with quick lookup...
644 if(clientLocationQuick != null)
645 {
646 final Map<String,Long> weightingsQuick = new HashMap<String, Long>(1 + 2*orderedList.size());
647 for(final String tag : orderedList)
648 {
649 final GeoUtils.CCTLD mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); // Extract country of mirror.
650 final int proximity = GeoUtils.computeProximity(clientLocationQuick, mirrorCC).getCloseness();
651 weightingsQuick.put(tag, new Long(liveMirrors.get(tag).longValue() * proximity));
652 }
653 final ArrayList<String> tmp = new ArrayList<String>(orderedList);
654 Collections.sort(tmp, new LoadBalancingUtils.BWOrder(weightingsQuick));
655 final long topWeight = weightingsQuick.get(tmp.get(0)).longValue();
656 // If this is better than before, reorder the main list and save the weightings.
657 if(topWeight > bestWeight)
658 {
659 Collections.sort(orderedList, new LoadBalancingUtils.BWOrder(weightingsQuick));
660 weightings.clear();
661 weightings.putAll(weightingsQuick);
662 }
663 }
664
665 // Compute weightings with slow/accurate lookup...
666 if(clientLocationSlow != null)
667 {
668 final Map<String,Long> weightingsSlow = new HashMap<String, Long>(1 + 2*orderedList.size());
669 for(final String tag : orderedList)
670 {
671 final GeoUtils.CCTLD mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); // Extract country of mirror.
672 final int proximity = GeoUtils.computeProximity(clientLocationSlow, mirrorCC).getCloseness();
673 weightingsSlow.put(tag, new Long(liveMirrors.get(tag).longValue() * proximity));
674 }
675 final ArrayList<String> tmp = new ArrayList<String>(orderedList);
676 Collections.sort(tmp, new LoadBalancingUtils.BWOrder(weightingsSlow));
677 final long topWeight = weightingsSlow.get(tmp.get(0)).longValue();
678 // If this is better than before, reorder the main list and save the weightings.
679 if(topWeight > bestWeight)
680 {
681 Collections.sort(orderedList, new LoadBalancingUtils.BWOrder(weightingsSlow));
682 weightings.clear();
683 weightings.putAll(weightingsSlow);
684 }
685 }
686
687 //System.out.println("chooseMirrorHostToBalanceLoad(): mirror weightings: " + weightings);
688 }
689
690
691 // Sometimes, perturb our list so that the first item in the list
692 // will be picked in proportion to its weighting
693 // (it will be swapped from its usual rank with the top item)
694 // and all other entries will be left as they are.
695 // This should distribute some load to all mirrors.
696 // We don't do this all the time because generally
697 // we want to direct requests to the best (closest*fastest) mirror.
698 // We don't promote/select a host with no proximity to the user.
699 long totalWeight = 0;
700 for(final Long w : weightings.values())
701 { totalWeight += w.longValue(); }
702 if((totalWeight > Integer.MAX_VALUE) || (totalWeight < weightings.size()))
703 { System.err.println("WARNING: weightings overflowed/underflowed in chooseMirrorHostToBalanceLoad(): " + totalWeight); }
704 else if(Rnd.fastRnd.nextInt(REDIST_FRAC) == 0) // Sometimes perturb...
705 {
706 // Pick the mirror to promote...
707 int r = Rnd.fastRnd.nextInt((int) totalWeight);
708 for(int i = 0; i < orderedList.size(); ++i)
709 {
710 final String tag = orderedList.get(i);
711 final int w = weightings.get(tag).intValue();
712 if(r < w)
713 {
714 if(i != 0)
715 {
716 final GeoUtils.CCTLD mirrorCC = new GeoUtils.CCTLD(tag.substring(0, 2)); // Extract country of mirror.
717 final GeoProximity qProx = (clientLocationQuick == null) ? GeoProximity.NONE :
718 GeoUtils.computeProximity(clientLocationQuick, mirrorCC);
719 final GeoProximity sProx = (clientLocationSlow == null) ? GeoProximity.NONE :
720 GeoUtils.computeProximity(clientLocationSlow, mirrorCC);
721 // If we can't be fairly sure that this mirror isn't a long way from the user
722 // then skip over this entry hoping that a later one might be OK.
723 if((qProx == GeoProximity.NONE) && (sProx == GeoProximity.NONE))
724 { continue; }
725
726 // This is the one we have selected to promote,
727 // and it isn't the top slot anyway,
728 // so swap it into position 0,
729 // and stop.
730 final String tmp = orderedList.get(0);
731 orderedList.set(0, tag);
732 orderedList.set(i, tmp);
733 if(LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): perturbed mirror list to: " + orderedList + " at probability " + (w / (float) totalWeight)); }
734 }
735 break;
736 }
737
738 r -= w;
739 }
740 }
741
742
743 // Go through the (possibly-reordered) mirror tag list in order,
744 // using the first available working mirror we find
745 // else drop back to the fallback host.
746 final String result = LoadBalancingUtils.findFirstWorkingMirror(orderedList);
747 if(result != null)
748 {
749 if(LoadBalancingUtils.LOG_BALANCING_DECISIONS) { System.out.println("chooseMirrorHostToBalanceLoad(): *** BALANCING CHOICE MADE *** |clientIP|locationQuick|locationSlow|mirror| = |" + clientIP + "|" + clientLocationQuick + "|" + clientLocationSlow + "|" + result + "|"); }
750
751 // File the result and return it...
752 if(clientIP != null)
753 { _cache_cMHTBL_byClient.put(clientIP, new Tuple.Pair<String, Long>(result, new Long(System.currentTimeMillis() + MAX_cache_cMHTBL_byClient_AGE_MS))); }
754 return(result);
755 }
756
757 // Cannot choose a specific mirror so return the fallback host name.
758 // If nothing else, and if this is the generic name,
759 // then this may achieve some DNS-based load balancing.
760 return(fallbackHost);
761 }
762
763 /**Handler for exhibit voting; holds no strong references to anything important. */
764 private static final class VoteHandler extends StatsSink.AbstractStatsListener
765 {
766 private final ExhibitFull exhibitFullName;
767 private final WeakReference<SimpleVariablePipelineIF> varsWR;
768 private final String dpID;
769 private final InetAddress voterIPAddr;
770 private final long expireBy;
771
772 private VoteHandler(
773 final String uniqueDataPointID,
774 final long expireBy,
775 final ExhibitFull exhibitFullName,
776 final WeakReference<SimpleVariablePipelineIF> varsWR,
777 final InetAddress voterIPAddr)
778 {
779 super(uniqueDataPointID, expireBy);
780 this.exhibitFullName = exhibitFullName;
781 this.varsWR = varsWR;
782 dpID = uniqueDataPointID;
783 this.voterIPAddr = voterIPAddr;
784 this.expireBy = expireBy;
785 }
786
787 @Override public final String handle(final Map<String, String[]> parameters)
788 {
789 // When we've done, redirect back to the exhibit page...
790 // Stick a random parameter value on the end to ensure that
791 // all common/broken browsers (eg IE6, FF1) reload the page.
792 final String result = WebUtils.makeCatPageRRURL(exhibitFullName, WebConsts.F_secondary_generated_HTML_suffix) + "?rnd="+(Rnd.fastRnd.nextLong() >>> 1);
793
794 // If the pipeline has gone away then return immediately...
795 final SimpleVariablePipelineIF vars = varsWR.get();
796 if(null == vars) { return(result); }
797
798 if(parameters != null)
799 {
800 // Allow for GET or POST plain/image style (.x, .y) values.
801 final boolean votePro =
802 (null != parameters.get(VOTE_PRO_PARAM_NAME)) ||
803 (null != parameters.get(VOTE_PRO_PARAM_NAME + ".x"));
804 final boolean voteCon =
805 (null != parameters.get(VOTE_CON_PARAM_NAME)) ||
806 (null != parameters.get(VOTE_CON_PARAM_NAME + ".x"));
807 if(votePro != voteCon) // Exactly one selected...
808 {
809 try
810 {
811 // Post vote event value to correct event stream...
812 /*if(IsDebug.isDebug)*/ { System.out.println("***VOTE registered (for="+votePro+"): " + exhibitFullName + " by " + dpID); }
813 vars.setVariable(new SimpleVariableValue(
814 (votePro ? SystemVariables.VOTE_PRO : SystemVariables.VOTE_CON),
815 exhibitFullName.getShortName().toString()));
816
817 if(voterIPAddr != null)
818 {
819 // Note location of voter (as ccTLD or region).
820 // We are prepared to spend some time on this
821 // as voting is rare and significant
822 // and we've probably already collected/cached
823 // any required data.
824 final String location = GeoUtils.getRegionByAddress(voterIPAddr, false);
825 vars.setVariable(new SimpleVariableValue(
826 VOTER_LOCATION_STATS_EVENT_DEF,
827 VOTER_LOCATION_STATS_EVENT_PREFIX + location));
828 }
829 }
830 catch(final IOException e)
831 { e.printStackTrace(); /* Just absorb errors. */ }
832 }
833 }
834
835 // Compute time before next vote will be requested
836 // if a user does take this opportunity to vote.
837 // A relatively long time after we expect the original voting opportunity to expire
838 // to act as a second-level screen against spiders
839 // and to avoid pestering a human voter too often.
840 // (We also make this time more unpredictable with a good random source.)
841 final long nextVote = expireBy + 11*WebConsts.VOTE_MIN_REQUEST_GAP_MS +
842 Rnd.goodRnd.nextInt(7*WebConsts.VOTE_MIN_REQUEST_GAP_MS);
843
844 // Install a dummy handler to postpone the next time that this user gets asked to vote.
845 // Assumes that the extant listener is removed *before* a call to handle()
846 // so that this new listener will not be removed on return.
847 StatsSink.addListenerForDataPoint(new StatsSink.AbstractStatsListener(dpID, nextVote){
848 /**Dummy handler that should never in fact be invoked. */
849 @Override public final String handle(final Map<String, String[]> parameters) { return(result); }
850 });
851
852 return(result);
853 }
854 }
855
856
857 /**Simple class to allow logging to the given servlet's log().
858 * This holds only a WeakReference to the ServletContext
859 * so as not to obstruct GC when all strong refs go away.
860 * <p>
861 * Stops logging when the referent becomes null.
862 */
863 public static final class ServletLogger implements SimpleLoggerIF
864 {
865 public ServletLogger(final ServletContext ctxt)
866 {
867 if(ctxt == null) { throw new IllegalArgumentException(); }
868 ctxtWR = new WeakReference<ServletContext>(ctxt);
869 }
870
871 /**Weak ref to servlet context; never null but the referent may be. */
872 private volatile WeakReference<ServletContext> ctxtWR;
873
874 /**Log the given message.
875 * If the weak reference to the context has died
876 * the log output is silently discarded.
877 */
878 public void log(final String message)
879 {
880 final ServletContext context = ctxtWR.get();
881 if(null == context) { return; }
882 context.log(message);
883 }
884 }
885
886
887 /**Simple class to allow logging to a given servlet's log() or System.out if none available.
888 * This allows a logger to be created at instance scope
889 * for (say) a Filter, and set with a context when the config is set
890 * and cleared when one is not available,
891 * all the while remaining a valid logger.
892 * <p>
893 * This holds only a WeakReference to the ServletContext
894 * so as not to obstruct GC when all strong refs go away.
895 */
896 public static final class ServletLoggerWithFallback implements SimpleLoggerIF
897 {
898 /**Weak ref to servlet context; may be null or the referent may be. */
899 private volatile WeakReference<ServletContext> ctxtWR;
900
901 /**Set context, or clear/remove it if null. */
902 public void setContext(final ServletContext context)
903 {
904 if(null == context) { ctxtWR = null; }
905 else { ctxtWR = new WeakReference<ServletContext>(context); }
906 }
907
908 /**Log the given message.
909 * Logs to the servlet context logger if available,
910 * else logs to System.out.
911 */
912 public void log(final String message)
913 {
914 final WeakReference<ServletContext> wr = ctxtWR;
915 final ServletContext context = (null == wr) ? null : wr.get();
916 if(context != null) { context.log(message); }
917 else { System.out.println(message); }
918 }
919 }
920
921
922 /**System variables tried, in order, for a "popular" exhibit; private to getPopularExhibit().
923 * We put the download var first to get a decent rate of update
924 * since this value changes quite frequently.
925 * <p />
926 * We don't put the vote var first so as to reduce the temptation
927 * to "throw" a vote to get an exhibit shown on the front page.
928 */
929 private static final SimpleVariableDefinition _gPE_vars[] = {
930 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
931 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
932 SystemVariables.VOTE_PRO,
933 SystemVariables.ACCESSPATTERN_CLICKTHROUGH,
934 SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
935 };
936
937 // /**Gets "popular" exhibit, possibly filtered by type; null if none available.
938 // * Tries to pick a "popular" exhibit by looking at one recently voted for,
939 // * or downloaded, etc, in the system variables,
940 // * and that has both thumbnails available where they are possible.
941 // * <p>
942 // * This rejects exhibits with a below-par (negative) rating.
943 // * <p>
944 // * Using the system variables should mean that this can pick up
945 // * values set from any mirror, etc, fairly quickly.
946 // * <p>
947 // * This cannot guarantee to return a non-null value,
948 // * but any value that it does return is a current, valid exhibit.
949 // *
950 // * @param dsb handle on the system variables and data; never null
951 // * @param type if not null only exhibits of this type are candidates
952 // * @deprecated Use {@link #getPopularExhibit(DataSourceBean,ExhibitMIME.ExhibitTypeParameters,Collection)} instead
953 // */
954 // @Deprecated
955 // public static String getPopularExhibit(final DataSourceBean dsb,
956 // final ExhibitMIME.ExhibitTypeParameters type)
957 // { return getPopularExhibit(dsb, type, null); }
958
959 /**Gets name of "popular" exhibit, possibly filtered by type; null if none available.
960 * Tries to pick a "popular" exhibit by looking at one recently voted for,
961 * or downloaded, etc, in the system variables,
962 * and that has both thumbnails available where they are possible.
963 * <p>
964 * This rejects exhibits with a below-par (negative) rating.
965 * <p>
966 * Using the system variables should mean that this can pick up
967 * values set from any mirror, etc, fairly quickly.
968 * <p>
969 * This cannot guarantee to return a non-null value,
970 * but any value that it does return is a current, valid exhibit.
971 *
972 * @param dsb handle on the system variables and data; never null
973 * @param type if not null only exhibits of this type are candidates
974 * @param excludeFullNames if non-null, any exhibits included by full name
975 * are not candidates to be returned
976 * @param beQuick if true then don't spend too long trying to calculate this
977 * but instead give up quickly if need be
978 * (so as not to block page generation for example)
979 */
980 public static Name.ExhibitFull getPopularExhibit(final DataSourceBean dsb,
981 final ExhibitMIME.ExhibitTypeParameters type,
982 final Collection<String> excludeFullNames,
983 final boolean beQuick)
984 {
985 if(dsb == null) { throw new IllegalArgumentException(); }
986
987 final long start = System.currentTimeMillis();
988
989 for(final SimpleVariableDefinition def : _gPE_vars)
990 {
991 assert(def != null);
992
993 // Half the time skip a "local" definition
994 // so as to get to see global popular items in the mix.
995 if(def.isLocal() && Rnd.fastRnd.nextBoolean())
996 { continue; }
997
998 try
999 {
1000 final SimpleVariableValue svv = dsb.getVariable(def);
1001 if(svv == null) { continue; }
1002
1003 // Specified variable must be of String type.
1004 assert(def.getType() == SimpleVariableDefinition.TYPE_STRING);
1005
1006 final String s = (String) svv.getValue();
1007 if(s == null) { continue; }
1008
1009 // Skip any null values.
1010 if(null == s) { continue; }
1011
1012 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
1013 final Name.ExhibitFull fullName = aep.aeid.getFullName(s);
1014
1015 // If this is in the exclusion list then skip it...
1016 if((excludeFullNames != null) && excludeFullNames.contains(fullName)) { continue; }
1017
1018 // Seems not to be a valid/extant exhibit, so give up...
1019 if(fullName == null) { continue; }
1020 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(fullName);
1021 if(esa == null) { continue; }
1022
1023 // If the type was specified and does not match, give up!
1024 final ExhibitTypeParameters actualType = (ExhibitMIME.getInputFileType(esa.getCharSequence()));
1025 if(actualType == null)
1026 { continue; /* Reject untyped exhibit. */ }
1027 if((type != null) && !type.equals(actualType))
1028 { continue; /* Reject wrong-type exhibit. */ }
1029
1030 // Skip if this exhibit may be "sensitive" somehow.
1031 final GenProps gp = dsb.getGenProps(-1);
1032 if(GenUtils.isSensitive(fullName, gp)) { continue; }
1033
1034 // If this exhibit type supports thumbnails
1035 // then reject anything without both immediately available.
1036 if(actualType.canPossiblyCreateThumbnailOfSameMIMEType())
1037 {
1038 final ExhibitThumbnails thumbnails = dsb.getThumbnails(fullName, false);
1039 if((thumbnails == null) ||
1040 (thumbnails.getSmall() == null) ||
1041 (thumbnails.getStandard() == null))
1042 { continue; /* Reject this. */ }
1043 }
1044
1045 // We allow use of a stale (and ignore a not-yet-computed) rating so as to be quick.
1046 final ExhibitPropsComputableMutable ePCM =
1047 aep.getExhibitPropsComputableMutable(fullName);
1048 // If we don't actually know (absent/stale rating) how good this exhibit is
1049 // then attempt to find out for next time in the background
1050 // unless the system is overloaded or (temporarily) conserving energy.
1051 if((ePCM == null) || ePCM.isStale())
1052 {
1053 if(!GenUtils.mustConservePower() && !WebUtils.isOverloaded(dsb.getServletContext()))
1054 {
1055 // Use 'discardable' task pool to ensure that we don't block.
1056 ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable() {
1057 /**Force full non-stale recomputation of EPCM of not-apparently-unpopular exhibit. */
1058 public void run() { aep.getExhibitPropsComputableMutable(fullName, false, gp, dsb, dsb.getScorerCache()); }
1059 });
1060 }
1061 }
1062 // Reject/skip anything with a definite below-par (non-positive) rating.
1063 if((ePCM != null) && (ePCM.getGoodness() <= 0)) { continue; }
1064
1065 return(fullName); // Got one!
1066 }
1067 catch(final IOException e)
1068 {
1069 // Silently ignore a probably-transient problem...
1070 }
1071
1072 // If urged to be quick by our caller
1073 // then abort if we've already taken too long trying
1074 // (a significant fraction of allowed page-generation time).
1075 if(beQuick && ((System.currentTimeMillis() - start) > WebConsts.MAX_PG_DOWNLOAD_MS/2))
1076 { break; }
1077 }
1078
1079 return(null); // Nothing found...
1080 }
1081
1082 /**If true then check for bots/spiders by UA (User-Agent); note that clients can forge their UA. */
1083 private static final boolean CHECK_FOR_SPIDERS_BY_UA = true;
1084
1085 /**Immutable Set of known spider/bot UA strings; should probably be moved to a text/properties file.
1086 * This is a set of lower-cased first (space/tab/bracket-delimited) words
1087 * from the UA strings.
1088 * <p>
1089 * The names consist only of non-regex-metacharacters in the set [a-z'_-],
1090 * so are safe to embed in a regex.
1091 */
1092 // @SuppressWarnings("unchecked")
1093 private static final Set<String> spiderUAName1stWordsLC = (!CHECK_FOR_SPIDERS_BY_UA) ? Collections.<String>emptySet() :
1094 Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(new String[]{
1095 "", /* Empty UA string... */
1096 "-",
1097 "alexibot",
1098 "appie",
1099 "aqua_products",
1100 "asterias",
1101 "b2w",
1102 "baiduspider",
1103 "backdoorbot",
1104 "becomebot",
1105 "blowfish",
1106 "bookmark",
1107 "botalot",
1108 "builtbottough",
1109 "bullseye",
1110 "bunnyslippers",
1111 "cheesebot",
1112 "cherrypicker",
1113 "cherrypickerelite",
1114 "cherrypickerse",
1115 "copernic",
1116 "copyrightcheck",
1117 "cosmos",
1118 "crescent",
1119 "curl",
1120 "dittospyder",
1121 "dumbot",
1122 "emailcollector",
1123 "emailsiphon",
1124 "emailwolf",
1125 "enterprise_search",
1126 "erocrawler",
1127 "extractorpro",
1128 "fairad",
1129 "faxobot",
1130 "findlinks",
1131 "flaming",
1132 "foobot",
1133 "freefind",
1134 "gaisbot",
1135 "getright",
1136 "gigabot",
1137 "googlebot-image",
1138 "grub",
1139 "grub-client",
1140 "harvest",
1141 "hatena",
1142 "hloader",
1143 "http",
1144 "httplib",
1145 "humanlinks",
1146 "ia_archiver",
1147 "indy",
1148 "infonavirobot",
1149 "iron33",
1150 "jennybot",
1151 "jetbot",
1152 "kalooga",
1153 "kenjin",
1154 "keyword",
1155 "larbin",
1156 "lexibot",
1157 "libweb",
1158 "libwww-perl",
1159 "linkextractorpro",
1160 "linkscan",
1161 "linkwalker",
1162 "lnspiderguy",
1163 "looksmart",
1164 "lwp-trivial",
1165 "lynx",
1166 "mata",
1167 "miixpc",
1168 "mister",
1169 "moget",
1170 "msiecrawler",
1171 "msnbot",
1172 "naver",
1173 "netants",
1174 "netmechanic",
1175 "nicerspro",
1176 "nutch",
1177 "offline",
1178 "omniexplorer_bot",
1179 "openbot",
1180 "openfind",
1181 "oracle",
1182 "perman",
1183 "port",
1184 "propowerbot",
1185 "prowebwalker",
1186 "psbot",
1187 "python-urllib",
1188 "queryn",
1189 "radiation",
1190 "repomonkey",
1191 "rma",
1192 "searchpreview",
1193 "sitesnagger",
1194 "sootle",
1195 "spankbot",
1196 "spanner",
1197 "stanford",
1198 "suzuran",
1199 "szukacz",
1200 "teleport",
1201 "teleportpro",
1202 "telesoft",
1203 "thenomad",
1204 "tocrawl",
1205 "true_robot",
1206 "turingos",
1207 "url",
1208 "url_spider_pro",
1209 "urly",
1210 "vci",
1211 "wbdbot",
1212 "webauto",
1213 "webbandit",
1214 "webcopier",
1215 "webenhancer",
1216 "websauger",
1217 "website",
1218 "webster",
1219 "webstripper",
1220 "webvac",
1221 "webzip",
1222 "wget",
1223 "www-collector-e",
1224 "xenu's",
1225 "yahooseeker",
1226 "zeus",
1227 })));
1228
1229 /**Set of characters taken as a main-part terminator in a User-Agent header, including whitespace.
1230 * This should be usable as the separator arg to StringTokenizer,
1231 * and in a regex when wrapped in "[]" square brackets.
1232 */
1233 private static final String UA_TERMINATOR_CHARS = " \t/(:";
1234
1235 /**Extra case-insensitive patterns matched in UA names, "|"-separated, or null if none.
1236 * Essentially, anything alphanumeric-ish ending in "bot", or
1237 * anything alphanumeric-ish containing "spider".
1238 */
1239 private static final String UA_BOT_PATTERNS = "([a-z0-9._-]*bot)|([a-z0-9._-]*spider[a-z0-9._-]*)";
1240
1241 /**Case-insensitive regex match for all non-empty UA names from spiderUAName1stWordsLC; null if not checking UA names.
1242 * Made public to enable some unit testing.
1243 */
1244 public static final Pattern UA_REGEX;
1245 /**Initialise UA_REGEX. */
1246 static
1247 {
1248 if(!CHECK_FOR_SPIDERS_BY_UA) { UA_REGEX = null; }
1249 else
1250 {
1251 final StringBuilder sb = new StringBuilder(32 + (spiderUAName1stWordsLC.size()<<4));
1252 // First build the simple compound regex name1|name2|...
1253 // possibly with some additional common patterns.
1254 if(null != UA_BOT_PATTERNS) { sb.append(UA_BOT_PATTERNS); }
1255 for(final String s : spiderUAName1stWordsLC)
1256 {
1257 if((s == null) || (s.length() == 0)) { continue; }
1258 if(sb.length() > 0) { sb.append('|'); }
1259 sb.append(s);
1260 }
1261 // Now wrap and add terminator...
1262 sb.insert(0, "^(");
1263 sb.append(")[").append(UA_TERMINATOR_CHARS).append("]?.*$");
1264 // Now compile the regex...
1265 UA_REGEX = Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
1266 }
1267 }
1268
1269 /**Name of Boolean attribute in request we cache result of requestProbablyFromSpider() by. */
1270 private static final String _rPFS_CACHE_PNAME = "org.hd.pg2k._rPFS_CACHE";
1271
1272 /**LRU cache from (common) whole UAs to "bot"ness to save some repeated/slow String parsing; never null when checking for bots by UA.
1273 * We're prepared to discard all of this under memory stress
1274 * as we only have to work this out at most once per request for example.
1275 */
1276 private static final MemoryTools.SimpleProbabilisticCache<String,Boolean> _isBot_cache = !CHECK_FOR_SPIDERS_BY_UA ? null :
1277 MemoryTools.SimpleProbabilisticCache.<String,Boolean>create(512, "_isBot_cache");
1278
1279 /**Attempt to determine quickly if the current request is probably from a spider/bot (ie not a human).
1280 * NB: This does not attempt to distinguish between good spiders (ie bona fide SEs)
1281 * and bad/broken/rude bots/scrapers/spiders.
1282 * <p>
1283 * This tries to distinguish human from non-human,
1284 * at least in part to conserve (prime interactive) resources for humans,
1285 * and does not claim to be perfect.
1286 * <p>
1287 * This returns true if there is no (valid) referring page
1288 * (and this visitor is not new to the site, ie has recently pulled another page),
1289 * though some browsers/firewalls may routinely block this info,
1290 * and some referrals, eg to target="_blank", may show no Referer value.
1291 * <p>
1292 * This should be quick as most or all requests may be tested with this.
1293 * <p>
1294 * TODO: Should expand this to test against well-known IP addresses.
1295 * <p>
1296 * TODO: Should include a behavioural element, eg mindless, rapid, pauseless browsing.
1297 *
1298 * @param request the incoming request; never null
1299 *
1300 * @return true if the requesting client is probably a bot, false if probably a human
1301 */
1302 public static boolean requestProbablyFromSpider(final HttpServletRequest request)
1303 {
1304 // If we already worked this out, return result already computed/cached!
1305 final Object cachedResult = request.getAttribute(_rPFS_CACHE_PNAME);
1306 if(cachedResult instanceof Boolean)
1307 { return(((Boolean) cachedResult).booleanValue()); }
1308
1309 // First check for lack of a valid "Referer" header
1310 // AND the the client not being 'new' to this site.
1311 if((null == request.getAttribute(ThroughputMonitorFilterBase.REQ_ATTR_NAME_NEW_CLIENT_FLAG)) &&
1312 "".equals(requestProbablyReferredFromExternalSite(request)))
1313 {
1314 if(IsDebug.isDebug) { System.out.println("[Client has no/unparsable Referer and is not new: probably a spider/robot.]"); }
1315 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1316 return(true);
1317 }
1318
1319 // Now check for a known bot User-Agent...
1320 if(CHECK_FOR_SPIDERS_BY_UA)
1321 {
1322 final Enumeration<?> aeEn = request.getHeaders("User-Agent");
1323 final boolean hasNoUA = (aeEn == null) || !aeEn.hasMoreElements();
1324 if(hasNoUA)
1325 {
1326 if(IsDebug.isDebug) { System.out.println("[Client has no UA: assumed to be a spider/robot.]"); }
1327 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1328 return(true); /* Rude/suspicious. */
1329 }
1330 else
1331 {
1332 // Check all such UA headers if more than one for some reason...
1333 while(aeEn.hasMoreElements())
1334 {
1335 final String wholeUA = (String) aeEn.nextElement();
1336
1337 final int lenUA = wholeUA.length();
1338 if((lenUA < 2) || (lenUA > 512))
1339 {
1340 // Strange-sized UA is suspicious...
1341 // And we don't want our cache/memory full of huge UA strings.
1342 if(IsDebug.isDebug) { System.out.println("[Client has empty/tiny/huge (ie rude/broken/silly/dangerous) UA: assumed to be a spider/robot.]"); }
1343 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1344 return(true);
1345 }
1346
1347 //final long t1 = System.nanoTime();
1348 final boolean isBot;
1349 final Boolean b = _isBot_cache.get(wholeUA);
1350 if(b != null) { isBot = b.booleanValue(); }
1351 else
1352 {
1353 isBot = UA_REGEX.matcher(wholeUA).matches();
1354 // Cache match result for this whole User-Agent value.
1355 _isBot_cache.put(wholeUA, isBot ? Boolean.TRUE : Boolean.FALSE);
1356
1357 // When we see a (new-ish) bot UA then log it and where it came from...
1358 if(isBot) { System.out.println("[INFO: Probable bot/spider UA seen (full UA='"+TextUtils.sanitiseForXML(wholeUA, 256, false)+"'); client IP: "+request.getRemoteAddr()+".]"); }
1359 }
1360 //final long t2 = System.nanoTime(); System.out.println("[UA match time: "+(t2-t1)+"ns]");
1361
1362 if(isBot)
1363 {
1364 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
1365 return(true);
1366 }
1367 }
1368 }
1369 }
1370
1371 // TODO: check known spider/bot IP addresses...
1372
1373 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.FALSE);
1374 return(false); // Probably human...
1375 }
1376
1377 /**Attempt to detect if the current request has been referred from an external Web site.
1378 * This simply checks if the "Referer" is apparently from any of our
1379 * sites or their aliases.
1380 * <p>
1381 * Note that since this info can be forged,
1382 * or disabled/knobbled for security reasons,
1383 * this is not completely reliable.
1384 * <p>
1385 * A missing "Referer" will be taken as indicating an "external" reference,
1386 * and will catch most well-behaved spiders as a result.
1387 * <p>
1388 * Since we may have to do some string parsing this may not be very fast,
1389 * but it should not be very slow either.
1390 * <p>
1391 * Even if there is more than one "Referer" header we will only look at one.
1392 *
1393 * @param request the incoming request; never null
1394 *
1395 * @return null if referred from one of our sites/hosts,
1396 * "" if unparsable or no referring URL,
1397 * else normalised (lowercase, stripped of common prefixes, etc)
1398 * referring host's name
1399 */
1400 public static String requestProbablyReferredFromExternalSite(final HttpServletRequest request)
1401 {
1402 final Enumeration<?> rEn = request.getHeaders("Referer");
1403 final boolean noRef = ((rEn == null) || !(rEn.hasMoreElements())); // No Referer...
1404 if(noRef) { return(""); }
1405
1406 // Get the referring URL...
1407 final String ref = (String) rEn.nextElement();
1408
1409 try
1410 {
1411 // Attempt to parse it for the hostname.
1412 // If we fail, ie ref is ill-formed, then count this as "external".
1413 final URL url = new URL(ref);
1414
1415 // If we can't find virtual host details for the host
1416 // then assume that this is an external host.
1417 final String rawHostName = url.getHost();
1418 final String normalisedHostName = HostUtils.normaliseVirtualHostName(rawHostName);
1419 if(null == VirtualHosts.getVirtualHostDetails(normalisedHostName, null))
1420 {
1421 final String ourName = request.getServerName();
1422 if((null != ourName) &&
1423 normalisedHostName.equals(HostUtils.normaliseVirtualHostName(ourName)))
1424 {
1425 return(null); // Our host name, even if not expected one, so treat as OK.
1426 }
1427 return(normalisedHostName); // OK, got the referring host, and it's not us.
1428 }
1429 return(null); // This was apparently referred by us, so is OK.
1430 }
1431 catch(final Exception e)
1432 {
1433 return(""); /* Cannot parse referrer, so treat as if external. */
1434 }
1435 }
1436
1437 /**Generate full URL for exhibit given exhibit name; never null.
1438 * This may take account of such factors as which servers
1439 * are up, how heavily loaded, etc,
1440 * in order to perform automatic load balancing
1441 * and give the best possible user experience.
1442 * <p>
1443 * In order to do this well we may need the request details,
1444 * so they should be supplied if possible.
1445 * These should be the client's request to a Gallery site,
1446 * else null.
1447 */
1448 public static URL makeExhibitURL(final CharSequence exhibitName,
1449 final HttpServletRequest request,
1450 final DataSourceBean vars)
1451 throws MalformedURLException
1452 {
1453 return(new URL("http://" +
1454 chooseMirrorHostToBalanceLoad(request, vars) +
1455 makeExhibitRRURL(exhibitName)));
1456 }
1457
1458 /**Generate root-relative URL for exhibit given exhibit name; never null.
1459 */
1460 public static String makeExhibitRRURL(final CharSequence exhibitName)
1461 //throws MalformedURLException
1462 {
1463 return("/" + WebConsts.BASE_PATH_EXHIBITS + "/" + exhibitName);
1464 }
1465
1466 /**Generate full URL for thumbnail/sample given exhibit name and standard/small selector; never null.
1467 */
1468 public static URL makeThumbnailURL(final CharSequence exhibitName, final boolean std)
1469 throws MalformedURLException
1470 {
1471 return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeThumbnailRRURL(exhibitName, std)));
1472 }
1473
1474 /**Generate root-relative URL for thumbnail/sample given exhibit name and standard/small selector; never null.
1475 */
1476 public static String makeThumbnailRRURL(final CharSequence exhibitName, final boolean std)
1477 //throws MalformedURLException
1478 {
1479 return("/" + WebConsts.BASE_PATH_TN + "/" + (std ? WebConsts.PATH_TN_STD : WebConsts.PATH_TN_SML) + "/" + exhibitName);
1480 }
1481
1482 /**Generate full URL for catalogue page given exhibit name; never null.
1483 * This should always generate the canonical/"official" form of the URL,
1484 * eg not including mirrors or aliases.
1485 * <p>
1486 * The media-type suffix (eg ".html" or ".wml") must be supplied.
1487 */
1488 public static URL makeCatPageURL(final CharSequence exhibitName, final String mediaTypeSuffix)
1489 throws MalformedURLException
1490 {
1491 return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeCatPageRRURL(exhibitName, mediaTypeSuffix)));
1492 }
1493
1494 /**Generate root-relative URL for catalogue page given exhibit name; never null.
1495 * This should take account of such factors as which servers
1496 * are up, how heavily loaded, etc.
1497 * <p>
1498 * The media-type suffix (eg ".html" or ".wml") must be supplied.
1499 */
1500 public static String makeCatPageRRURL(final CharSequence exhibitName, final String mediaTypeSuffix)
1501 {
1502 return("/" + WebConsts.BASE_PATH_CATPAGE + "/" + exhibitName + mediaTypeSuffix);
1503 }
1504
1505 /**Returns true if this Web server is overloaded (eg for bandwidth or CPU).
1506 * If this server is positively overloaded then this routine returns true.
1507 * <p>
1508 * If not overloaded or not possible to tell, this returns false.
1509 * <p>
1510 * (If the context is null then this routine may have to return false.)
1511 * <p>
1512 * This may gather information from a number of sources,
1513 * but in the main the ThroughputFilter's data will be used.
1514 * We may explicitly set the overload flag here
1515 * if we detect the system to be overloaded
1516 * to make it quicker for us and others on a subsequent call;
1517 * this will get overwritten by TMF when it next gets to assess load.
1518 * <p>
1519 * If true then the UI and application should attempt to use less bandwidth
1520 * and CPU time than normal, perhaps by using cheaper algorithms than
1521 * usual (eg less comprehensive searches) or a less-graphics-intensive UI.
1522 * <p>
1523 * This may default to true while the server is starting up to ensure that
1524 * as little extra CPU (for example) as possible is consumed while
1525 * the server is compiling JSPs, loading classes, etc.
1526 * <p>
1527 * This is intended to be relatively cheap to call,
1528 * since it may get called frequently,
1529 * for example especially when the system is busy,
1530 * and/or at start-up before JIT compilation (eg HotSpot) has kicked in.
1531 */
1532 public static boolean isOverloaded(final ServletContext ctxt)
1533 {
1534 if(ctxt != null)
1535 {
1536 // If the ThroughputFilter thinks that we are overloaded,
1537 // then we report the system as overloaded.
1538 final Object overloadFlag =
1539 ctxt.getAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME);
1540 // We take anything but the presence of the value Boolean.FALSE
1541 // (eg the absence of any value at all) as indication of a problem,
1542 // eg that the system may be too busy even to clear the flag,
1543 // as well as a short-term cache of overload status computed in this routine
1544 // until the TF computes/sets a new value.
1545 if((!(overloadFlag instanceof Boolean)) || ((Boolean) overloadFlag).booleanValue())
1546 { return(true); }
1547 }
1548
1549 // If the system is known to be at or over our CPU load limit then we're overloaded.
1550 if(ThreadUtils.cpuHeavilyLoaded())
1551 {
1552 // Effectively cache overload status for a while (until the next TF update).
1553 if(ctxt != null) { ctxt.removeAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME); }
1554 return(true);
1555 }
1556
1557 // We can't see any problems, given the available data.
1558 return(false);
1559 }
1560
1561 /**Private flag for isLightlyLoaded() to note time when we were last non lightly loaded.
1562 * Once we discover that we are not lightly loaded we set this to the current time,
1563 * and then do not re-test for a little while in case the testing itself adds to the load.
1564 * <p>
1565 * Initially zero.
1566 * <p>
1567 * Marked volatile for thread-safe lock-free access.
1568 */
1569 private static volatile long _lastNotLightlyLoaded;
1570
1571 /**Time that we last reset the target load average to zero to restart our load ramp-up.
1572 * Initially 'now' in encourage a gentle start.
1573 * <p>
1574 * Marked volatile for thread-safe lock-free access.
1575 */
1576 private static volatile long _lastResetLA = System.currentTimeMillis();
1577
1578 /**Time over which to ramp up load limit in isLightlyLoaded(), ms; strictly positive.
1579 * Should be at least the 60s time claimed in the documentation for
1580 * OperatingSystemMXBean.getSystemLoadAverage()
1581 * to dampen CPU-load flapping,
1582 * especially when emerging from power-conservation mode.
1583 */
1584 private static final int LOAD_AVERAGE_RAMP_UP_TIME = 180000 + Rnd.fastRnd.nextInt(60000); // 3+ minutes.
1585
1586 /**Returns true if the site seems to be lightly loaded (CPU, bandwidth, etc).
1587 * In case of difficulties/confusion this reports false.
1588 * <p>
1589 * This routine is quite careful and conservative,
1590 * and will only return true if this server and the host system
1591 * both seem to be lightly loaded and stable by all the appropriate metrics.
1592 * <p>
1593 * This never reports the system to be lightly loaded if it is low on power
1594 * (eg on a nearly-expired battery) so as to avoid unnecessary background work.
1595 * <p>
1596 * Where this can check 'uptime' then it tries to enforce a modest CPU ramp-up
1597 * over approximately the 60s-or-so of OperatingSystemMXBean.getSystemLoadAverage()
1598 * to limit rapid fluctuations in CPU load (and, for example, power consumption).
1599 * <p>
1600 * This routine is designed to be called frequently,
1601 * ie is reasonably fast and efficient.
1602 */
1603 public static boolean isLightlyLoaded(final ServletContext ctxt)
1604 {
1605 final long now = System.currentTimeMillis();
1606 // Don't re-test within ~1s of finding the system to be significantly loaded.
1607 if(now - _lastNotLightlyLoaded < 1013) { return(false); }
1608
1609 // Treat the system as stressed and thus NOT lightly loaded if (temporarily) short of power
1610 // so as to discourage unnecessary background/speculative work.
1611 // Also forces load-limit ramp-up to restart from scratch.
1612 if(GenUtils.mustConservePower())
1613 { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1614
1615 // We check for memory stress; we are not lightly loaded if memory is hurting.
1616 if(MemoryTools.isMemoryStressed())
1617 { _lastNotLightlyLoaded = now; return(false); }
1618
1619 final double loadFrac = ThreadUtils.loadFraction();
1620 if(loadFrac >= 0) // -ve indicates load average not available so we don't test.
1621 {
1622 // Now we check the whole-host-operating-system (time-averaged) load.
1623 // This should be well under full capacity to be considered lightly loaded,
1624 // ie the run-queue length must be much less than the number of available CPUs.
1625 // Note that where this value is not available getSystemLoadAverage() returns a -ve value
1626 // and we then regard the system as not busy by this metric, as a reasonable fallback.
1627 // This does not take into account any stress on I/O nor other subsystems.
1628 final float targetBasicThreshold = LocalProps.getLightLoadMax();
1629 // If the system is well over over our lower load limit, our fault or not,
1630 // then restart our load ramp-up to give the system a chance to recover.
1631 if(loadFrac >= 2 * targetBasicThreshold)
1632 { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1633 final long timeSinceLoadRampUpStart = (now - _lastResetLA);
1634 // If still in load ramp-up phase then reduce 'uptime' threshold accordingly.
1635 final float targetThreshold = (timeSinceLoadRampUpStart >= LOAD_AVERAGE_RAMP_UP_TIME) ?
1636 targetBasicThreshold :
1637 (targetBasicThreshold * Math.max(0, Math.min(1, timeSinceLoadRampUpStart / (float) LOAD_AVERAGE_RAMP_UP_TIME)));
1638 final boolean aboveThreshold = loadFrac >= targetThreshold;
1639 if(aboveThreshold)
1640 { _lastNotLightlyLoaded = now; return(false); }
1641 }
1642
1643 if(ctxt != null)
1644 {
1645 // If the ThroughputFilter doesn't think that we are lightly loaded,
1646 // then we immediately report the system as not lightly loaded.
1647 final Object underloadFlag =
1648 ctxt.getAttribute(WebConsts.BANDWIDTH_LIGHTLOAD_ATTR_NAME);
1649 // We take the absence of any value as indication of a problem,
1650 // eg that the system may be too busy even to set the flag.
1651 if((!(underloadFlag instanceof Boolean)) ||
1652 (!((Boolean) underloadFlag).booleanValue()))
1653 { _lastNotLightlyLoaded = now; return(false); }
1654
1655 // We double-check that some problem is not making us seem overloaded at the same time...
1656 if(isOverloaded(ctxt)) { _lastNotLightlyLoaded = now; return(false); }
1657
1658 return(true); // Seems lightly loaded...
1659 }
1660
1661 // Site status is unclear, so assume not quiet for now.
1662 // However, this 'false' is not cacheable, ie is not a real result.
1663 return(false);
1664 }
1665
1666 /**If true then exhibitHasThumbnail() will try to cache its results.
1667 * This should avoid us having to load thumbnails into memory
1668 * just to see if they exist or not.
1669 */
1670 public static final boolean TN_AVAIL_CACHE = true;
1671
1672 /**The target thumbnail availability cache minimum size, strictly positive.
1673 * Should be large enough that most thumbnail availability requests
1674 * will be caught by it regardless of exhibit set size,
1675 * but small enough to limit memory requirements to something reasonable.
1676 * Should be *much* larger than the full set of thumbnails ever seen
1677 * on one page and/or that might be in our "popular" page set
1678 * so as to avoid thrashing fruitlessly.
1679 * <p>
1680 * We are prepared to give up all cache entries rather than cause an OOME.
1681 */
1682 private static final int TN_AVAIL_CACHE_SIZE_MIN_TARGET = 131 +
1683 5*WebConsts.SINGLE_PAGE_CONTACT_SHEET_TN_COUNT +
1684 SystemVariables.MAX_DIFF_EXHIBIT_NAME_VALUES;
1685
1686 /**Private key used by exhibitHasThumbnail(); never null. */
1687 private static final DataSourceBean.UnlinkedKey tnHTMLCacheKey = new DataSourceBean.UnlinkedKey("tnHTMLCacheKey");
1688
1689 /**Test if the given exhibits have thumbnails (locally) available; never null.
1690 * This is suitable for a bulk check, eg before rendering a table,
1691 * and may be internally parallelised to overcome I/O latency, etc.
1692 *
1693 * @param exhibitNames non-null list of full exhibit names;
1694 * not altered by this routine and must not be changed by the caller
1695 * while this routine is running
1696 *
1697 * @return a bit in the result set for each thumbnail that definitely
1698 * exists in the requested size; an unset bit may represent "not known"
1699 */
1700 @SuppressWarnings("unchecked")
1701 public static BitSet exhibitsHaveThumbnail(final DataSourceBean dataSource,
1702 final List<Name.ExhibitFull> exhibitNames,
1703 final boolean standard,
1704 final boolean forceCreation)
1705 {
1706 final int n = exhibitNames.size();
1707
1708 // Only try the parallel fetch if:
1709 // * We have an extant cache of thumbnail state.
1710 // * There is more than one item to check.
1711 // * We're not starved of resources (eg power).
1712 final MemoryTools.SimpleLRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte> tnExistenceMap;
1713 if(TN_AVAIL_CACHE && (n > 1) &&
1714 /* (!GenUtils.mustConservePower()) && */
1715 (null != (tnExistenceMap = (MemoryTools.SimpleLRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte>) dataSource.getUnlinkedValue(tnHTMLCacheKey))))
1716 {
1717 // Find all the exhibits whose thumbnail state is not cached at all,
1718 // and try to start one or more background thread(s) to check their status.
1719 // We expect this activity to be largely I/O bound.
1720
1721 // Tasks to wait for the completion of...
1722 final List<Future<?>> tasks = new LinkedList<Future<?>>();
1723
1724 for(final Name.ExhibitFull en : exhibitNames)
1725 {
1726 // If we have any tn state already cached then skip this entry...
1727 if(null != tnExistenceMap.get(en)) { continue; }
1728
1729 // Try to fetch this missing entry into cache concurrently...
1730 tasks.add(ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
1731 public final void run()
1732 { exhibitHasThumbnail(dataSource, en, standard, forceCreation); }
1733 }));
1734 }
1735
1736 // Wait for any tasks to complete.
1737 for(final Future<?> task : tasks)
1738 {
1739 try { task.get(); }
1740 catch(final Exception e) { e.printStackTrace(); /* Absorb but log any error. */ }
1741 }
1742 }
1743
1744 // Fetch all values through the cache using the caller's thread.
1745 final BitSet result = new BitSet(n);
1746 for(int i = n; --i >= 0; )
1747 {
1748 if(exhibitHasThumbnail(dataSource, exhibitNames.get(i), standard, forceCreation))
1749 { result.set(i); }
1750 }
1751
1752 return(result);
1753 }
1754
1755 /**Test if the given exhibit has a thumbnail (locally) available.
1756 * Does not try to force one to be made if one is not extant (or in cache)
1757 * unless the forceCreation argument is true.
1758 * <p>
1759 * Since this is likely to be testing what is in (local) cache,
1760 * its results can be considered at best a (good) hint.
1761 * <p>
1762 * We do not cache entirely negative answers
1763 * (ie that an exhibit has no thumbnails)
1764 * as this may change if we overcome (eg) a temporary resource issue.
1765 * But we assume the converse,
1766 * ie that once we have a thumbnail then it will always be available.
1767 * <p>
1768 * Always returns false for exhibit types that do not support thumbnails.
1769 *
1770 * @param dataSource the data source; never null
1771 * @param exhibitName the full exhibit name; must be valid (eg non-null)
1772 * @param standard if true, tests for the presence of a standard-size
1773 * @param forceCreation if true, we can try to force creation/fetch
1774 * of the underlying thumbnail if not already available locally
1775 *
1776 * @return true if exhibit definitely has/had one/both thumbnails,
1777 * false if unknown or thumbnails are not currently available
1778 */
1779 @SuppressWarnings("unchecked")
1780 public static boolean exhibitHasThumbnail(final DataSourceBean dataSource,
1781 final Name.ExhibitFull exhibitName,
1782 final boolean standard,
1783 final boolean forceCreation)
1784 {
1785 if((dataSource == null) || (exhibitName == null))
1786 { throw new IllegalArgumentException(); }
1787
1788 // If we are cacheing the availability of thumbnails
1789 // then ensure the cache map exists here.
1790 // This is a thread-safe size-limited LRU map.
1791 // Note that this is *not* linked to the AEP
1792 // so that we may retain a little stale information indefinitely,
1793 // especially though AEP changes when much has to be recomputed.
1794 MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte> tnExistenceMap;
1795 if(TN_AVAIL_CACHE)
1796 {
1797 while((tnExistenceMap = (MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte>) dataSource.getUnlinkedValue(tnHTMLCacheKey)) == null)
1798 {
1799 // Size the table to suit the exhibit set and allow growth of the exhibit set...
1800 int nExhibits = 0;
1801 try { nExhibits = dataSource.getAllExhibitProperties(-1).aeid.length; } catch(final IOException e) { /* Ignore. */ }
1802 dataSource.putIfAbsentUnlinkedValue(tnHTMLCacheKey, MemoryTools.SimpleLRUMapAutoSizeForHitRate.<Name.ExhibitFull, Byte>create(0, Math.max(2*TN_AVAIL_CACHE_SIZE_MIN_TARGET, 1001+nExhibits), "exhibitHasThumbnail"));
1803 }
1804 }
1805
1806 // We store availability as a bit-mask
1807 // * bit 0 is 1 iff the small thumbnail is available
1808 // * bit 1 is 1 iff the std thumbnail is available
1809 byte availability = 0; // Assume none available by default.
1810
1811 // Only use cached positive values.
1812 // A negative cached result has us check again...
1813 // We cache negative results mainly to maintain correct hit-rate stats.
1814 final Byte cachedAvailability;
1815 if(!TN_AVAIL_CACHE || (null == (cachedAvailability = tnExistenceMap.get(exhibitName))) || (cachedAvailability.byteValue() <= 0))
1816 {
1817 // Hauling the thumbnail pair into view can be very expensive,
1818 // eg may involve disc fetches or image decoding or worse,
1819 // and may simply displace other items better kept in cache/memory.
1820 final ExhibitThumbnails tns = dataSource.getThumbnails(exhibitName, forceCreation);
1821
1822 if(tns != null)
1823 {
1824 if(tns.getSmall() != null)
1825 { availability |= 1; }
1826 if(tns.getStandard() != null)
1827 { availability |= 2; }
1828 }
1829
1830 if(TN_AVAIL_CACHE)
1831 {
1832 // Byte.valueOf() avoids creating distinct instances.
1833 tnExistenceMap.put(exhibitName, Byte.valueOf(availability));
1834 if(IsDebug.isDebug && ((tnExistenceMap.size() & 0x3ff) == 0)) { dataSource.log(" tnExistenceMap: " + tnExistenceMap.toString()); }
1835 }
1836 }
1837 // Use the cached (positive) value...
1838 else
1839 { availability = cachedAvailability.byteValue(); }
1840
1841 final boolean tnAvailable = (0 != (availability & (standard ? 2 : 1)));
1842 return(tnAvailable);
1843 }
1844
1845 /**Get thumbnail image URL to embed in HTML page (usually JPEG/GIF/PNG); null if none available.
1846 * By preference uses purpose-built thumbnail, else tries to
1847 * use image itself if small enough.
1848 * <p>
1849 * Has to be passed a dataSource and a full exhibit name.
1850 * <p>
1851 * Returns null if no suitable thumbnail URL can be generated.
1852 * <p>
1853 * This may cache its results,
1854 * in particular assuming that once a particular thumbnail becomes available
1855 * that it does not go away again.
1856 *
1857 * @param tnDim (output argument) if not null and result is not null,
1858 * is filled in with the thumbnail dimensions
1859 * @param dontCreateTn if true, don't force creation of a thumbnail if
1860 * one is not already readily available
1861 */
1862 public static String makeHTMLInlineImageThumbnailURL(final DataSourceBean dataSource,
1863 final Name.ExhibitFull exhibitName,
1864 final boolean std,
1865 final boolean relativeURL,
1866 final java.awt.Dimension tnDim,
1867 final boolean dontCreateTn)
1868 throws IOException
1869 {
1870 final AllExhibitProperties aep = dataSource.getAllExhibitProperties(-1);
1871
1872 // Get full exhibit details...
1873 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
1874 // Stop if exhibit has gone away or is invalid.
1875 if(esa == null) { return(null); }
1876
1877 if(WebUtils.canInlineInHTMLPage((ExhibitMIME.getInputFileType(esa.getCharSequence()))))
1878 {
1879 // Get the exhibit immutable computable properties if possible.
1880 final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
1881 final java.awt.Dimension xyDim = (epc == null) ? null : epc.getXyDimensions();
1882 // Compute what thumbnail dimensions should be...
1883 final java.awt.Dimension thumbnailXyDim = (xyDim == null) ? null : ExhibitThumbnails.computeThumbnailDimensions(xyDim, std);
1884
1885 // Is this in fact a 2D image?
1886 if(thumbnailXyDim != null)
1887 {
1888 // Pass dimensions back to caller.
1889 if(tnDim != null)
1890 {
1891 tnDim.width = thumbnailXyDim.width;
1892 tnDim.height = thumbnailXyDim.height;
1893 }
1894
1895 // Is this a small enough (in bytes) exhibit to be its own thumbnail?
1896 final boolean smallExhibit = (esa.length <=
1897 (std ? ExhibitThumbnails.STD_ABS_MAX_BYTES : ExhibitThumbnails.SML_ABS_MAX_BYTES));
1898 // Can this be its own thumbnail?
1899 // It must be small enough in bytes,
1900 // and possibly small enough in XxY pixels too.
1901 final boolean canBeOwnThumbnail =
1902 (smallExhibit && (WebConsts.ALLOW_BROWSER_IMAGE_SCALE || thumbnailXyDim.equals(xyDim)));
1903
1904 // See if we have thumbnails available
1905 // (if eager, force one to be made,
1906 // else see if one already exists if we can't inline directly).
1907 // If we force them to be created
1908 // then we expect them to remain cached!
1909 // Shall we force creation?
1910 final boolean forceCreation = !dontCreateTn &&
1911 (WebConsts.EAGER_TN_USE || !canBeOwnThumbnail);
1912
1913 // Check for availability of real thumbnail.
1914 final boolean tnAvailable = exhibitHasThumbnail(dataSource,
1915 exhibitName,
1916 std,
1917 forceCreation);
1918
1919 final boolean canShowThumbnail = tnAvailable || canBeOwnThumbnail;
1920
1921 //System.err.println("[sE/cBOT/cST: "+smallExhibit+"/"+canBeOwnThumbnail+"/"+canShowThumbnail+".]");
1922
1923 if(canShowThumbnail)
1924 {
1925 // Chose the (relative) URL to use
1926 // (always serve from same host as this page).
1927 // Show true thumbnail by preference...
1928 if(relativeURL)
1929 { return(tnAvailable ? WebUtils.makeThumbnailRRURL(exhibitName, std) : WebUtils.makeExhibitRRURL(exhibitName)); }
1930 else
1931 { return((tnAvailable ? WebUtils.makeThumbnailURL(exhibitName, std) : WebUtils.makeExhibitURL(exhibitName, null, dataSource)).toString()); }
1932 }
1933 }
1934 }
1935
1936 return(null); // Can't make a thumbnail.
1937 }
1938
1939 /**Find the set of exhibits with the same subject as the indicated one.
1940 * Given a List of String exhibit names sorted by
1941 * ExhibitPropsGlobalImmutable.SMART_ORDER
1942 * (or possibly just by ExhibitPropsGlobalImmutable.SUBJECT_ORDER),
1943 * the SUBJECT_ORDER comparator,
1944 * and the index to a given item in that List,
1945 * finds all the items surrounding that have the same SUBJECT_ORDER,
1946 * ie that differ only in attribute words and are variants on the same
1947 * exhibit. (The result will always contain the input item.)
1948 * <p>
1949 * The List must not contain nulls, all entries must be valid exhibit
1950 * names, and the List must be sorted implicitly with the comparator
1951 * passed in. The input index must be within bounds.
1952 * <p>
1953 * This does not alter the List passed in. The return value is
1954 * an unmodifiable sublist of the input.
1955 * <p>
1956 * This returns the sublist of variants on the indicated exhibit;
1957 * this will degenerate to just containing the input exhibit name if there
1958 * are no other exhibit names for the same exhibit.
1959 * <p>
1960 * This assumes that the number of variants of any given exhibit is
1961 * small, and so a linear search is used.
1962 * <p>
1963 * If the List does not support efficient bidirectional movement and seeks
1964 * then this routine will be very inefficient.
1965 */
1966 public static List<Name.ExhibitFull> getExhibitVariantRange(final List<Name.ExhibitFull> allExhibitNames,
1967 final Comparator<Name.ExhibitFull> comparator,
1968 final int thisExhibitIndex)
1969 {
1970 final Name.ExhibitFull thisExhibit = allExhibitNames.get(thisExhibitIndex);
1971 final int listSize = allExhibitNames.size();
1972
1973 int first, last;
1974
1975 // Search backwards for first matching item.
1976 for(first = thisExhibitIndex; first > 0; --first)
1977 {
1978 if(comparator.compare(thisExhibit, allExhibitNames.get(first-1)) != 0)
1979 { break; } // Stop when we hit an item with a different subject.
1980 }
1981
1982 // Search forwards for last matching item.
1983 for(last = thisExhibitIndex; last < listSize-1; ++last)
1984 {
1985 if(comparator.compare(thisExhibit, allExhibitNames.get(last+1)) != 0)
1986 { break; } // Stop when we hit an item with a different subject.
1987 }
1988
1989 return(Collections.unmodifiableList(allExhibitNames.subList(first, last+1)));
1990 }
1991
1992 /**Computes a minimal human-readable immutable unique prefix of an exhibit short name to distinguish given exhibit from most others.
1993 * Uniqueness is not guaranteed, but this is meant to give a reasonably
1994 * short result that a human can read in the title of a page, for example.
1995 * <p>
1996 * If inputs are bad then this will try to fail gently with an empty-string
1997 * result rather than an exception to make it robust if called directly from
1998 * JSP code, for example.
1999 * <p>
2000 * This is passed a List of exhibits sorted in a "smart" order that
2001 * sorts mainly on the file component of the name,
2002 * probably in a case-insensitive way.
2003 * <p>
2004 * This routine finds a short word prefix that (case-insensitively)
2005 * is different from neighbouring exhibits
2006 * and is thus (depending on the overall sort order)
2007 * probably the shortest unique prefix amongst the entire collection.
2008 * <p>
2009 * (If no unique value is possible, the entire prefix is returned.)
2010 * <p>
2011 * If the List of size 0 we return the empty string and do not use
2012 * the index parameter at all. This simplifies use where the list
2013 * may transiently be empty during start-up.
2014 * <p>
2015 * This routine may run very slowly if the List argument does not support
2016 * efficient random seeking.
2017 * <p>
2018 * The result is designed to be used in headings and other display text
2019 * such as image alt/title attributes.
2020 * <p>
2021 * TODO: fix inefficient double-parsing of main words...
2022 *
2023 * @param exhibits sorted (smart-ish sorted) list of exhibit names
2024 * (String value); must not change while routine is running
2025 * @param ourIndex index (strictly positive) of the exhibit whose
2026 * abbreviated name we wish to produce; must be within the List
2027 * @return "" in case of invalid arguments,
2028 * else some whole-word prefix of the name
2029 */
2030 public static CharSequence minimalUniqueENTitlePrefix(final List<Name.ExhibitFull> exhibits,
2031 final int ourIndex)
2032 {
2033 // Treat bad arguments gently as the outputs are likely to
2034 // be displayed in HTML directly.
2035 if(exhibits.size() == 0) { return(""); }
2036 if((ourIndex < 0) || (ourIndex >= exhibits.size())) { return(""); }
2037
2038 // Set of attribute words we use for parsing names.
2039 // In this case, deliberately empty.
2040 final Set<String> noAttrWords = Collections.emptySet();
2041
2042 // Dummy "empty" enumeration we use for missing neighbours.
2043 final Enumeration<?> emptyEn = Collections.enumeration(Collections.emptyList());
2044
2045 // Get our name and previous/next names
2046 // as word enumerations (or empty enumerations if not present).
2047 // for this purpose we pretend that there are no attribute words...
2048 final ListIterator<Name.ExhibitFull> liFwd = exhibits.listIterator(ourIndex);
2049 final CharSequence ourNameFull = (liFwd.next());
2050 // FIXME: inefficient via full name and tokenizer...
2051 final CharSequence ourNameMainWords = ExhibitName.getMainWordsComponent(
2052 ourNameFull, noAttrWords).toString();
2053 final Enumeration<?> ourNameWords = ExhibitName.getMainWords(
2054 ourNameFull, noAttrWords);
2055
2056 // Default to empty enumerations.
2057 Enumeration<?> nextNameWords = emptyEn;
2058 Enumeration<?> prevNameWords = emptyEn;
2059
2060 // Now search forwards for different name to compare against...
2061 // Note that we implicitly start from just after ourName above.
2062 while(liFwd.hasNext())
2063 {
2064 final CharSequence n = (liFwd.next());
2065 // FIXME: inefficient via full name and tokenizer...
2066 final CharSequence nextNameMainWords =
2067 ExhibitName.getMainWordsComponent(n, noAttrWords);
2068 if(!TextUtils.contentEqualsIgnoreCase(nextNameMainWords, ourNameMainWords))
2069 {
2070 nextNameWords = ExhibitName.getMainWords(n, noAttrWords);
2071 break;
2072 }
2073 }
2074
2075 // Now search backwards for different name to compare against...
2076 final ListIterator<Name.ExhibitFull> liBack = exhibits.listIterator(ourIndex);
2077 while(liBack.hasPrevious())
2078 {
2079 final CharSequence n = (liBack.previous());
2080 // FIXME: inefficient via full name and tokenizer...
2081 final CharSequence prevNameMainWords =
2082 ExhibitName.getMainWordsComponent(n, noAttrWords);
2083 if(!TextUtils.contentEqualsIgnoreCase(prevNameMainWords, ourNameMainWords))
2084 {
2085 prevNameWords = ExhibitName.getMainWords(n, noAttrWords);
2086 break;
2087 }
2088 }
2089
2090 // Result word prefix (preserves case, ends with word separator).
2091 final StringBuilder result = new StringBuilder();
2092
2093 // Iterate until we run out of words or both neighbours have
2094 // (or have different words).
2095 while(ourNameWords.hasMoreElements())
2096 {
2097 final String ourNameWord = (String) ourNameWords.nextElement();
2098 result.append(ourNameWord).append(ExhibitName.WORD_SEP);
2099
2100 // Take next word, if extant, on both prev and next sides.
2101 final String nextNameWord = nextNameWords.hasMoreElements() ?
2102 (String) nextNameWords.nextElement() : "";
2103 final String prevNameWord = prevNameWords.hasMoreElements() ?
2104 (String) prevNameWords.nextElement() : "";
2105
2106 // If neighbour has another word but it is different to ours,
2107 // pretend neighbour was truncated at previous word.
2108 // We ignore case, since users probably will.
2109 if(nextNameWord.equalsIgnoreCase(ourNameWord))
2110 { continue; } // Looks the same so cannot stop yet.
2111 else
2112 { nextNameWords = emptyEn; } // No longer a contender.
2113
2114 if(prevNameWord.equalsIgnoreCase(ourNameWord))
2115 { continue; } // Looks the same so cannot stop yet.
2116 // else
2117 // { prevNameWords = emptyEn; } // No longer a contender.
2118
2119 break; // Done!
2120 }
2121
2122 // Attempt to return as space-saving Name, else a String, either being interned.
2123 return(Name.createOrStringFallback(result, null));
2124 }
2125
2126 /**Get BufferedImage containing expanded image loaded as static resource from WAR; never null.
2127 * Loaded on first use and cached statically,
2128 * optionally via a SoftReference to allow the system to reclaim memory.
2129 * <p>
2130 * There may be an adverse effect on system behaviour if many large images
2131 * are cached in memory; this may be mitigated by storing them via
2132 * a SoftReference so that the memory can be recycled automatically.
2133 * <p>
2134 * This method is internally synchronized to allow only one image load/decode
2135 * to happen at once to conserve CPU and memory (and other) resources.
2136 * <p>
2137 * If the image is indexed and forceToARGBTrueColour is true then
2138 * we expand it to true-colour to make it possible to add markings.
2139 * <p>
2140 * This uses our built-in mediahandler classes to decode the image,
2141 * so the image type must be one that we have a decoder for.
2142 * <p>
2143 * This may not handle alpha correctly in all cases.
2144 * <p>
2145 * Under memory pressure this may release cached resources
2146 * whether hard or soft cached.
2147 * <p>
2148 * <strong>Beware:</strong> since a reference is returned rather than
2149 * a copy, be careful not to adjust the returned image unintentionally.
2150 *
2151 * @param context gives context for WAR from which to load the raw
2152 * base clickable-map image
2153 * @param resourceRRURL root-relative URL (eg "/my/image.gif") of
2154 * source image in WAR; must not be null and must be a type
2155 * (and with a file extension) that we have a MIME mediahandler for
2156 * @param forceToARGBTrueColour if true, and the source image is not
2157 * ARGB true-colour, then we force conversion to ARGB true-colour
2158 * before cacheing to make it easier to draw on the image
2159 * @param cacheViaSoftReference if true then we attempt to cache the
2160 * image via a SoftReference; if at any point this is true when the
2161 * image needs to be (re)cached, the image remains permanently cached
2162 * via a strong reference thereafter
2163 * @param copyResult if true, we force the result to be a copy of
2164 * the cached value to avoid accidentally changing the cached copy;
2165 * this may force a change in colour model and/or discarding properties
2166 *
2167 * @return image, possibly a copy, possibly with a converted colour model
2168 *
2169 * @throws java.lang.IllegalStateException if the image is not loadable
2170 */
2171 public static BufferedImage getAndCacheStaticImage(
2172 final boolean copyResult,
2173 final String resourceRRURL,
2174 final boolean forceToARGBTrueColour,
2175 final ServletContext context,
2176 final boolean cacheViaSoftReference)
2177 throws IllegalStateException
2178 {
2179 if((context == null) ||
2180 (resourceRRURL == null))
2181 { throw new IllegalArgumentException(); }
2182
2183 synchronized(_gACSI_cache)
2184 {
2185 // An item in the cache is one of:
2186 // * null (ie completely absent)
2187 // * BufferedImage
2188 // * SoftReference (possibly cleared) to BufferedImage
2189 final Object rawItem = _gACSI_cache.get(resourceRRURL);
2190 BufferedImage result = (!(rawItem instanceof SoftReference))
2191 ? (BufferedImage) rawItem
2192 : (BufferedImage) (((SoftReference<?>) rawItem).get());
2193
2194 if(result == null) // Needs fetching.
2195 {
2196 final InputStream is =
2197 context.getResourceAsStream(resourceRRURL);
2198 if(is == null)
2199 { throw new IllegalStateException("cannot get InputSteam for image to cache from " + resourceRRURL); }
2200
2201 // Find correct handler given name of file.
2202 final ExhibitMIME.ExhibitTypeParameters etp =
2203 ExhibitMIME.getInputFileType(resourceRRURL);
2204 if(etp == null)
2205 { throw new IllegalStateException("cannot get MIME type for image to cache from " + resourceRRURL); }
2206 if(etp.handler == null)
2207 { throw new IllegalStateException("cannot get handler for image to cache from " + resourceRRURL); }
2208
2209 try {
2210 // Decode the image.
2211 result = etp.handler.decodeImage(is);
2212 if(result == null)
2213 { throw new IllegalStateException("cannot get decode image to cache from " + resourceRRURL); }
2214
2215 // Force to ARGB if required.
2216 if(forceToARGBTrueColour)
2217 { result = ImageUtils.convertToTrueColourARGB(result, false); }
2218
2219 // Cache the result.
2220 if(cacheViaSoftReference)
2221 { _gACSI_cache.put(resourceRRURL, new SoftReference<BufferedImage>(result)); }
2222 else
2223 { _gACSI_cache.put(resourceRRURL, result); }
2224 }
2225 catch(final IOException e)
2226 { throw new IllegalStateException("cannot decode image to cache from " + resourceRRURL + ": IOException: " + e.getMessage()); }
2227 }
2228
2229 // If forced to copy the result, do so.
2230 // TODO: Maybe try to preserve relevant image properties?
2231 if(copyResult)
2232 {
2233 // Coerce data into original colour model.
2234 // Discard any properties of the original.
2235 final ColorModel cm = ImageUtils.extractColorModelOrRGB(result);
2236 final int width = result.getWidth();
2237 final int height = result.getHeight();
2238 final WritableRaster raster = cm.createCompatibleWritableRaster(
2239 width, height);
2240 final BufferedImage copiedResult =
2241 new BufferedImage(cm, raster, false, null);
2242
2243 // Actually copy the pixels...
2244 copiedResult.setRGB(0, 0, width, height,
2245 result.getRGB(0, 0, width, height, null, 0, width),
2246 0, width);
2247
2248 return(copiedResult);
2249 }
2250
2251 return(result);
2252 }
2253 }
2254
2255 /**Private cache for getAndCacheStaticImage(); never null.
2256 * Is a map from root-relative URL to a BufferedImage
2257 * or SoftReference to a BufferedImage.
2258 * <p>
2259 * Thread-safe LRU-managed limited-size memory-sensitive map.
2260 * <p>
2261 * We are prepared to discard everything if very short of memory.
2262 * <p>
2263 * All getAndCacheStaticImage() activity is synchronized on this instance.
2264 */
2265 private static final MemoryTools.SimpleLRUMapAutoSizeForHitRate<String,Object> _gACSI_cache =
2266 MemoryTools.SimpleLRUMapAutoSizeForHitRate.<String,Object>create(0, 1024, "_gACSI_cache");
2267
2268
2269 /**Generate a unique key for the given HTTP request; returns null if not possible to generate.
2270 * For example, we use this to help limit the number of times that
2271 * a given user is asked to vote.
2272 * <p>
2273 * Generates a string starting with the given prefix and some
2274 * leading portion of the client's IP address...
2275 * <p>
2276 * It is not fatal if this conflates users,
2277 * but it is more useful that this never thinks one user is more than one
2278 * to avoid pestering them too much
2279 * (or letting a spider inject too much noise for example).
2280 *
2281 * @param prefix unique prefix to the generated key; non-empty, non-null
2282 * @param request the user's request
2283 */
2284 public static String generateUserDataPointID(final String prefix,
2285 final HttpServletRequest request)
2286 {
2287 if((prefix == null) || (prefix.length() == 0)) { return(null); }
2288 if(request == null) { return(null); }
2289
2290 final StringBuilder sb = new StringBuilder(prefix.length() + 21);
2291 sb.append(prefix);
2292
2293 try
2294 {
2295 // Get the IP address
2296 final InetAddress ia = InetAddress.getByName(request.getRemoteAddr());
2297
2298 // Use all but the final octet to construct the data-point ID.
2299 // Assume that this will distinguish most genuine users.
2300 final byte[] addr = ia.getAddress();
2301 for(int i = 0; i < addr.length-1; ++i)
2302 { sb.append(addr[i] & 0xff).append('.'); }
2303
2304 return(sb.toString());
2305 }
2306 catch(final UnknownHostException e)
2307 {
2308 return(null); // Could not understand the client's address.
2309 }
2310 }
2311
2312 /**Name of additional parameter to record if this is a vote "for" ("pro").
2313 * Value of the parameter is to be "true" or "false".
2314 */
2315 public static final String VOTE_PRO_PARAM_NAME = "pro";
2316
2317 /**Name of additional parameter to record if this is a vote "against" ("con").
2318 * Value of the parameter is to be "true" or "false".
2319 */
2320 public static final String VOTE_CON_PARAM_NAME = "con";
2321
2322 /**Create and post the handler for a vote if possible and returns the unique listenerID.
2323 * This replaces any existing listener for this voter.
2324 * <p>
2325 * This returns null if we could not create a listener.
2326 * <p>
2327 * If the handler is invoked,
2328 * then this registers a dummy (inactive) handler
2329 * to delay the next request to the user to vote.
2330 *
2331 * @param exhibitFullName valid exhibit name; never null
2332 * @param request client's HTTP request; never null
2333 * @param vars where stats updates are posted; never null
2334 *
2335 * @return listenerID, or null if one could not be generated
2336 */
2337 public static String createAndFileVoteListener(final Name.ExhibitFull exhibitFullName,
2338 final HttpServletRequest request,
2339 final SimpleVariablePipelineIF vars)
2340 {
2341 // if(!ExhibitName.validNameSyntax(exhibitFullName)) { return(null); }
2342 if(exhibitFullName == null) { return(null); }
2343 if(vars == null) { return(null); }
2344
2345 final String dpID = generateUserDataPointID(WebConsts.VOTER_DATA_POINT_PREFIX, request);
2346 if(dpID == null) { return(null); }
2347
2348 // Compute expiry time (with a random element).
2349 final long expireBy = System.currentTimeMillis() + WebConsts.VOTE_MIN_REQUEST_GAP_MS +
2350 Rnd.fastRnd.nextInt(WebConsts.VOTE_MIN_REQUEST_GAP_MS);
2351
2352 // Capture the visitor's IP address for later (in case they vote).
2353 InetAddress va = null;
2354 try { va = InetAddress.getByName(request.getRemoteAddr()); }
2355 catch(final UnknownHostException e) { } // Ignore errors...
2356 final InetAddress voterIPAddr = va;
2357
2358 // Create new listener.
2359 // Pass in a weak reference to the vars pipeline to avoid blocking GC...
2360 final StatsSink.AbstractStatsListener newAsl =
2361 (new VoteHandler(dpID, expireBy, exhibitFullName, new WeakReference<SimpleVariablePipelineIF>(vars), voterIPAddr));
2362
2363 // Get the listener ID...
2364 final String listenerID = newAsl.uniqueListenerID;
2365
2366 // File the request!
2367 StatsSink.addListenerForDataPoint(newAsl);
2368
2369 return(listenerID);
2370 }
2371
2372
2373 /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
2374 * The list is sorted:
2375 * <ol>
2376 * <li>By dictionary-order i18n localised title.
2377 * </ol>
2378 * <p>
2379 * We may flag categories as good or bad if significantly so.
2380 *
2381 * @param asList if true, entries are preceded by <li>
2382 * else they are followed by <br />\r\n;
2383 */
2384 public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
2385 final LocaleBeanBase localeBean,
2386 final boolean asList)
2387 throws IOException
2388 {
2389 return(getCategoryListSortedAsHTML(dsb, localeBean, asList ? null : "<br />\r\n"));
2390 }
2391
2392 /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
2393 * The list is sorted:
2394 * <ol>
2395 * <li>By dictionary-order i18n localised title.
2396 * </ol>
2397 * <p>
2398 * We may flag categories as good or bad if significantly so.
2399 *
2400 * @param entrySeparator text (followed by CRLF) to terminate entries;
2401 * null if entries are to be wrapped with li tags
2402 */
2403 public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
2404 final LocaleBeanBase localeBean,
2405 final String entrySeparator)
2406 throws IOException
2407 {
2408 if((dsb == null) || (localeBean == null))
2409 { throw new IllegalArgumentException(); }
2410
2411 final boolean asList = (null == entrySeparator);
2412
2413 // Private data on each category, for sorting.
2414 final class PerCat implements Comparable<PerCat>
2415 {
2416 PerCat(final String cat,
2417 final String i18nTitle,
2418 final Boolean isGood,
2419 final int entriesInCat)
2420 {
2421 category = cat;
2422 title = i18nTitle;
2423 good = isGood;
2424 numEntries = entriesInCat;
2425 }
2426
2427 final String category;
2428 final String title;
2429 final Boolean good;
2430 final int numEntries;
2431
2432 /**Compares this object with the specified object for order.
2433 * Order is:
2434 * <ol>
2435 * <li>By dictionary-order i18n localised title.
2436 * </ol>
2437 */
2438 public int compareTo(final PerCat other)
2439 {
2440 // Sort by localised title, case insensitively.
2441 final int tComp = String.CASE_INSENSITIVE_ORDER.compare(title, other.title);
2442 if(tComp != 0) { return(tComp); }
2443
2444 // Officially break ties by underlying canonical category name.
2445 // This should never really be needed.
2446 return(category.compareTo(other.category));
2447 }
2448 }
2449
2450 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2451
2452 // Get the list of categories.
2453 final Map<String,Integer> categories = aep.getCategoryExhibitCounts();
2454
2455 // Create sorted set of details.
2456 // We don't expend huge effort on getting the category good/bad status.
2457 final SortedSet<PerCat> cs = new TreeSet<PerCat>();
2458 for(final String cat : categories.keySet())
2459 {
2460 cs.add(new PerCat(
2461 cat,
2462 GenUtils.computeSectionTitle(aep, cat, localeBean),
2463 aep.isCategoryGood(cat, dsb, false),
2464 categories.get(cat)
2465 ));
2466 }
2467
2468 // Convert to nicely-formatted HTML.
2469 final StringBuilder sb = new StringBuilder(79 * categories.size());
2470 for(final PerCat pc : cs)
2471 {
2472 if(asList) { sb.append("<li>"); }
2473
2474 // If category significantly good/bad then mark it so.
2475 if(pc.good != null)
2476 {
2477 final String proIcon = "smile.gif";
2478 final String conIcon = "frown.gif";
2479 sb.append("<img src=\"/_static/icon/").
2480 append(pc.good.booleanValue() ? proIcon : conIcon).
2481 append("\" width=15 height=15> ");
2482 }
2483
2484 // Open link to category RRURL.
2485 sb.append("<a href=\"").
2486 append(WebConsts.VIRTUAL_COLLECTIONS_BYCATEGORY_ROOT).
2487 append(pc.category).
2488 append("/\">");
2489 sb.append(pc.title);
2490 // Close link to category.
2491 sb.append("</a>");
2492
2493 // Insert exhibit count.
2494 sb.append(" <i>(").append(pc.numEntries).append(")</i>");
2495
2496 // Finish the point
2497 if(!asList) { sb.append(entrySeparator); }
2498 else { sb.append("</li>\r\n"); }
2499 }
2500
2501 return(sb.toString());
2502 }
2503
2504 /**Returns true if this seems to be a slave disconnected from the master.
2505 * This instance may, for example, not wish to collect votes from users
2506 * if the votes may get discarded without getting to the master.
2507 */
2508 public static final boolean isDisconnectedSlave(final DataSourceBean dsb)
2509 {
2510 if(dsb == null) { throw new IllegalArgumentException(); }
2511
2512 // If definitely a master then this is not 'disconnected' by definition.
2513 if(Boolean.FALSE.equals(dsb.isSlave()))
2514 { return(false); }
2515
2516 // This instance is treated as disconnected if it is a slave with no xfer key
2517 // since that implies that it won't be allowed to send updates (eg votes) home.
2518 if(!LocalProps.hasXferKey())
2519 { return(true); }
2520
2521 // This instance is to be treated as NOT disconnected
2522 // if it can see at least one other system via the system variables,
2523 // ie at least two systems in total.
2524 // This uses the availablity of client-count as a measure of connectivity.
2525 final SimpleVarStats stats = VarTools.generateSimpleStats(dsb,
2526 SystemVariables.ThroughputMonitorFilter_CLIENT_COUNT,
2527 0); /* Minimum possible life. */
2528 return((stats == null) || (stats.getSystemCount() < 2));
2529 }
2530
2531 /**Returns true if this request is apparently a precacheing request, eg from a "Web accelerator".
2532 * This is true if a client (such as FireFox) is "reading ahead"
2533 * but it may be the case that no real human gets to see the content.
2534 * <p>
2535 * See https://developer.mozilla.org/en/Link_prefetching_FAQ
2536 */
2537 public static boolean isPrecacheRequest(final HttpServletRequest request)
2538 {
2539 // "X-Moz: prefetch" header covers FF1--3.5 and Google's Web Accelerator.
2540 return("prefetch".equalsIgnoreCase(request.getHeader("X-Moz")));
2541 }
2542
2543
2544 /**Private key used by getCatPageExhibitMetaDataHTML(); never null. */
2545 private static final AEPLinkedKey metadataCacheKey = new AEPLinkedKey("metadataCacheKey");
2546
2547 /**Static dictionary used by getCatPageExhibitMetaDataHTML() for compression of in-memory data; never null.
2548 * The static dictionary content should be reviewed after any major format changes,
2549 * though this is not a correctness issue, only a matter of compression efficiency.
2550 */
2551 public static final Compact7BitString.StaticDictionary sDictMD = new Compact7BitString.StaticDictionary("getCatPageExhibitMetaDataHTML",
2552 Arrays.asList(new String[]{
2553 "JPEG", /* MANUALLY ADDED: count=16817, saving=50451, meanFirstPos=126 */
2554 "javax_imageio_1", /* count=17761, saving=248654, meanFirstPos=37 */
2555 "ColorSpaceType", /* count=17752, saving=230776, meanFirstPos=67 */
2556 "Compression", /* count=17684, saving=176840, meanFirstPos=106 */
2557 "NumChannels", /* count=16942, saving=169420, meanFirstPos=86 */
2558 "metadata", /* count=17939, saving=125573, meanFirstPos=9 */
2559 "TypeName", /* count=16826, saving=117782, meanFirstPos=120 */
2560 "compact", /* count=18684, saving=112104, meanFirstPos=3 */
2561 "Chroma", /* count=17755, saving=88775, meanFirstPos=53 */
2562 "value", /* count=18684, saving=74736, meanFirstPos=87 */
2563 "image", /* count=16951, saving=67804, meanFirstPos=23 */
2564 "YCbCr", /* count=15240, saving=60960, meanFirstPos=73 */
2565 "name", /* count=17751, saving=53253, meanFirstPos=71 */
2566 "\"</", /* count=18684, saving=37368, meanFirstPos=72 */
2567 "></", /* count=17805, saving=35610, meanFirstPos=95 */
2568 "</", /* count=18684, saving=18684, meanFirstPos=10 */
2569 "=\"", /* count=18684, saving=18684, meanFirstPos=70 */
2570 "> ", /* count=18684, saving=18684, meanFirstPos=68 */
2571 "><", /* count=18684, saving=18684, meanFirstPos=4 */
2572 "dd", /* count=18684, saving=18684, meanFirstPos=13 */
2573 "dl", /* count=18684, saving=18684, meanFirstPos=1 */
2574 "dt", /* count=18684, saving=18684, meanFirstPos=5 */
2575 "BackgroundIndex", /* count=808, saving=11312, meanFirstPos=103 */
2576 "BlackIsZero", /* count=934, saving=9340, meanFirstPos=89 */
2577 "accessionData", /* count=745, saving=8940, meanFirstPos=9 */
2578 "stream", /* count=984, saving=4920, meanFirstPos=23 */
2579 "GRAY", /* count=1157, saving=3471, meanFirstPos=73 */
2580 "CRC32", /* count=745, saving=2980, meanFirstPos=61 */
2581 "RGB", /* count=1353, saving=2706, meanFirstPos=73 */
2582 "TRUE", /* count=837, saving=2511, meanFirstPos=91 */
2583 "date", /* count=745, saving=2235, meanFirstPos=23 */
2584 "hash", /* count=745, saving=2235, meanFirstPos=59 */
2585 "size", /* count=745, saving=2235, meanFirstPos=41 */
2586 "sampleRate", /* count=174, saving=1566, meanFirstPos=88 */
2587 "MD5", /* count=745, saving=1490, meanFirstPos=82 */
2588 "encoding", /* count=174, saving=1218, meanFirstPos=70 */
2589 "frames", /* count=162, saving=810, meanFirstPos=37 */
2590 "audio", /* count=178, saving=712, meanFirstPos=56 */
2591 "57024", /* count=173, saving=692, meanFirstPos=47 */
2592 "PCM_SIGNED", /* count=55, saving=495, meanFirstPos=78 */
2593 "ULAW", /* count=112, saving=336, meanFirstPos=75 */
2594 "PCM_UNSIGNED", /* count=7, saving=77, meanFirstPos=77 */
2595 "BI_RGB", /* count=6, saving=30, meanFirstPos=74 */
2596 "1136915", /* MANUALLY ADDED: count ~ 10 as prefix. */
2597 }));
2598
2599 /**Generates HTML form of exhibit metadata, "" if no such metadata for the specified exhibit; never null.
2600 * The result is keyed to both the DataSourceBean and the exhibitName.
2601 * <p>
2602 * Cached values are discarded when the AEP changes.
2603 * <p>
2604 * The computed HTML is locale-invariant, which is why cacheing is viable.
2605 * <p>
2606 * The toString() method should be called on the result to get the String HTML text,
2607 * which may be a String or some other internal representation.
2608 *
2609 * @param dsb valid non-null DataSourceBean
2610 * @param exhibitName valid non-null exhibit full name
2611 */
2612 @SuppressWarnings("unchecked")
2613 public static Object getCatPageExhibitMetaDataHTML(final DataSourceBean dsb,
2614 final Name.ExhibitFull exhibitName)
2615 {
2616 assert((dsb != null) && ExhibitName.validNameSyntax(exhibitName));
2617
2618 // Get existing cache map, or create new one.
2619 // The cache is a size-limited, thread-safe Map
2620 // from full exhibit name to HTML formatted metadata ("" if none).
2621 // The items can be large and possibly moderately expensive to (re)compute
2622 // though all in-memory (no disc access for example) so a miss is not that bad.
2623 // Races here may result in some wasted work but no errors.
2624 MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object> cachedMetaDataMap;
2625 while((cachedMetaDataMap = (MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey)) == null)
2626 {
2627 // Limit size to ~10 per 1MB of heap, but minimum a few tens to cover popular pages.
2628 final int maxCacheSize = Math.max(32, (int) Math.min(16384, Runtime.getRuntime().totalMemory() >> 17));
2629 dsb.putIfAbsentAEPLinkedValue(metadataCacheKey, MemoryTools.SimpleProbabilisticCache.<Name.ExhibitFull, Object>create(maxCacheSize, metadataCacheKey.comment));
2630 }
2631 final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2632 if(cachedMetaData != null) { return(cachedMetaData); }
2633
2634 // Need to (re)compute metadata for this exhibit
2635 // eg for the first time and/or after an AEP load/change.
2636 try
2637 {
2638 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2639
2640 final String result = getCatPageExhibitMetaDataHTMLRaw(exhibitName, aep);
2641 assert(result != null);
2642 if(IsDebug.isDebug) { System.out.println("INFO: getCatPageExhibitMetaDataHTML() result size of "+result.length()+" chars"); }
2643
2644 // We do not intern() the result
2645 // since we expect each non-"" metadata value to be unique.
2646 // Providing that the system is not hideously short of memory
2647 // then cache in an a compact form if possible else as a plain String.
2648 if(!MemoryTools.isMemoryStressed())
2649 {
2650 try { cachedMetaDataMap.put(exhibitName, Compact7BitString.convertToCompact7BitString(result, sDictMD)); }
2651 catch(final IllegalArgumentException e) { cachedMetaDataMap.put(exhibitName, result); }
2652 }
2653
2654 // Return the uncompressed result to save the caller a little time...
2655 return(result);
2656 }
2657 catch(final IOException e)
2658 {
2659 // Give up without cacheing anything in case of error.
2660 // e.printStackTrace(); // Absorb the error and don't whinge...
2661 return(""); // Cannot compute the value right now...
2662 }
2663 }
2664
2665 /**Computes the raw data for getCatPageExhibitMetaDataHTML() without cacheing; never null.
2666 * @return "" if there is no metadata for this exhibit
2667 */
2668 public static String getCatPageExhibitMetaDataHTMLRaw(final Name.ExhibitFull exhibitName, final AllExhibitProperties aep)
2669 {
2670 if(null == exhibitName) { throw new IllegalArgumentException(); }
2671 final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
2672 final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
2673
2674 final Node metadata = (epc == null) ? null : epc.getMetadata();
2675 final AccessionData accessionMetadata = epl.getAccessionMetadata();
2676 if((metadata != null) || (accessionMetadata != null))
2677 {
2678 final StringBuilder sb = new StringBuilder(2048);
2679 if(metadata != null)
2680 { sb.append(TextUtils.toXML(metadata, true, true)); }
2681 if(accessionMetadata != null)
2682 { sb.append(TextUtils.toXML(accessionMetadata.getAsDOM(), true, true)); }
2683 return(sb.toString());
2684 }
2685
2686 // No metadata at all.
2687 return("");
2688 }
2689
2690 /**As for getCatPageExhibitMetaDataHTML(), but will compute a missing value asynchronously; never null.
2691 * If the value is already computed then it is available immediately,
2692 * else this attempts to spin off task compute the value,
2693 * and get() will block until the value is ready/computed.
2694 * <p>
2695 * If the target thread pool is full the computation will be done synchronously,
2696 * ie in this thread blocking this call until complete.
2697 * <p>
2698 * The toString() method should be called on the result to get the String text.
2699 */
2700 @SuppressWarnings("unchecked")
2701 public static Future<?> getCatPageExhibitMetaDataHTMLFuture(
2702 final DataSourceBean dsb,
2703 final Name.ExhibitFull exhibitName)
2704 {
2705 // Try first to return any extant cached value without blocking.
2706 // If present, we need not start any thread at all.
2707 final MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object> cachedMetaDataMap = (MemoryTools.SimpleProbabilisticCache<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey);
2708 if(null != cachedMetaDataMap)
2709 {
2710 final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2711 // Return already-finished Future with value, if any.
2712 if(null != cachedMetaData)
2713 { return(ThreadUtils.makeCompletedFuture(cachedMetaData)); }
2714 }
2715
2716 // If we did not find a cached value,
2717 // then set up the task to compute the value asynchronously.
2718 final Callable<?> callable = new Callable<Object>(){
2719 public final Object call() throws Exception
2720 { return(getCatPageExhibitMetaDataHTML(dsb, exhibitName)); }
2721 };
2722 // Start the (CPU-intensive) thread immediately.
2723 final Future<?> result = ThreadUtils.computeIntensiveThreadPool.submit(callable);
2724
2725 // Return the handle for the caller to retrieve the value...
2726 return(result);
2727 }
2728
2729 /**Hyphenate long HTML text (that contains zero or more `-' characters and little or no whitespace).
2730 * Replaces hyhens ('-') with spaces to allow a browser to wrap the text.
2731 * <p>
2732 * Usually used with exhibit names or fragments of such names.
2733 */
2734 public static final String hyphenateHTMLText(final String s)
2735 {
2736 return(s.replace('-', ' '));
2737 }
2738
2739 /**LRU thread-safe private cache mapping from exhibit type and Accept header to acceptability of that MIME type for inlining in XHTML mobile text.
2740 * We assume that the Accept headers will be more or less constant for a given device,
2741 * and probably constant between instances of the device,
2742 * so we have enough entries to cover the likely different <em>types</em> of device
2743 * using the Gallery at any one time.
2744 * <p>
2745 * We take care to avoid using huge Accept values in keys to avoid DoS-style issues.
2746 * <p>
2747 * We're prepared to discard this entirely when short of memory.
2748 */
2749 private static final MemoryTools.SimpleProbabilisticCache<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean> _inlineableInXHTML = MemoryTools.SimpleProbabilisticCache.<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean>create(512, "_inlineableInXHTML");
2750
2751 /**Returns true if the given MIME-type can always be inlined in an XHTML (mobile) page.
2752 * If the type argument is null, this returns false.
2753 * <p>
2754 * This always allows JPEG and GIF,
2755 * but may also allow other (image) types listed in an incoming "Accept" header.
2756 */
2757 public static boolean canInlineInXHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType,
2758 final HttpServletRequest request)
2759 {
2760 if(exhibitType == null) { return(false); }
2761
2762 // Allow commonly-supported GIF (89a) and JPEG types always.
2763 switch(exhibitType.type)
2764 {
2765 case ExhibitMIME.ET_JPEG:
2766 case ExhibitMIME.ET_GIF: // Should really check GIF version.
2767 return(true);
2768 }
2769 // No header to analyse, so cannot allow more than the basic types.
2770 if(request == null) { return(false); }
2771
2772 // We are prepared to test for a limited selection of other inlineable types.
2773 switch(exhibitType.type)
2774 {
2775 case ExhibitMIME.ET_PNG:
2776 case ExhibitMIME.ET_SWF:
2777 case ExhibitMIME.ET_BMP:
2778 // TODO: WBMP, SVG?
2779 {
2780 // Look for any Accept headers.
2781 final Enumeration<?> headers = request.getHeaders("Accept");
2782 // No Accept headers to analyse, so cannot allow more than the basic types.
2783 if(headers == null) { return(false); }
2784
2785 // Allow for possibility of multiple Accept headers (probably rare though).
2786 while(headers.hasMoreElements())
2787 {
2788 final String h = (String) headers.nextElement();
2789 // We refuse to process gigantic header values at all
2790 // to preserve performance and avoid DoS-style problems.
2791 if(h.length() >= 1024) { continue; }
2792
2793 // Make cache lookup key and attempt cache lookup.
2794 final Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String> key =
2795 new Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>(exhibitType, h);
2796 final Boolean cachedResult = _inlineableInXHTML.get(key);
2797 if(Boolean.TRUE.equals(cachedResult)) { return(true); }
2798 if(Boolean.FALSE.equals(cachedResult)) { continue; /* Try other headers. */ }
2799 System.out.println("INFO: new XHTML Accept header '"+TextUtils.sanitiseForXML(h, 256, true)+"' from User-Agent: "+TextUtils.sanitiseForXML(request.getHeader("User-Agent"), 256, true));
2800
2801 // Do explicit search though header for specified MIME type.
2802 for(final String t : h.split(","))
2803 {
2804 // Allow for direct MIME-type match,
2805 // or with trailing q factor, eg "image/png;q=0.9",
2806 // or just a generic catch-all */*.
2807 final String trimmed = t.trim(); // Remove padding whitespace.
2808 if(trimmed.equals("*/*") ||
2809 trimmed.equals(exhibitType.mimeType) ||
2810 (trimmed.startsWith(exhibitType.mimeType) && trimmed.substring(exhibitType.mimeType.length()).startsWith(";")))
2811 {
2812 _inlineableInXHTML.put(key, Boolean.TRUE); // Cache success.
2813 return(true);
2814 }
2815 }
2816
2817 // Cache failure (against this header).
2818 _inlineableInXHTML.put(key, Boolean.FALSE);
2819 }
2820 }
2821 }
2822 return(false);
2823 }
2824
2825 /**Returns true if the given MIME-type can always be inlined in an HTML page.
2826 * If the argument is null, this returns false.
2827 */
2828 public static boolean canInlineInHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType)
2829 {
2830 if(exhibitType == null) { return(false); }
2831 switch(exhibitType.type)
2832 {
2833 case ExhibitMIME.ET_JPEG:
2834 case ExhibitMIME.ET_GIF:
2835 case ExhibitMIME.ET_PNG: // Most HTML browsers will accept PNG now.
2836 case ExhibitMIME.ET_SWF: // Most browsers have a Flash plug-in now.
2837 case ExhibitMIME.ET_BMP: // Embedded BMP should be widely supported.
2838 case ExhibitMIME.ET_HTMLFRAG:
2839 return(true);
2840 }
2841 return(false);
2842 }
2843
2844 /**Private key used by getCategoryTreeFilterBean(); never null. */
2845 private static final DataSourceBean.AEPLinkedKey _getCategoryTreeFilterBeanKey = new DataSourceBean.AEPLinkedKey("_getCategoryTreeFilterBeanKey");
2846
2847 /**Get selected by-category TreeFilterBean from entire exhibit set; never null.
2848 * Used for the "by category" exhibit tree view and elsewhere.
2849 * <p>
2850 * This data is cached linked to the DSB
2851 * (which in passing ensures that it can be dropped automatically under extreme memory stress).
2852 * <p>
2853 * The category name is primarily checked for syntactic validity,
2854 * not for actual presence in the AEP.
2855 */
2856 @SuppressWarnings("unchecked")
2857 public static TreeFilterBean getCategoryTreeFilterBean(final DataSourceBean dsb,
2858 final CharSequence category)
2859 {
2860 if((dsb == null) || !ExhibitName.validNameInitialComponentSyntax(category))
2861 { throw new IllegalArgumentException(); }
2862
2863 // Map from category name to TreeFilerBean.
2864 // Created as necessary on first use (after AEP change).
2865 ConcurrentMap<String,TreeFilterBean> trees;
2866 while(null == (trees = (ConcurrentMap<String,TreeFilterBean>) dsb.getAEPLinkedValue(_getCategoryTreeFilterBeanKey)))
2867 { dsb.putIfAbsentAEPLinkedValue(_getCategoryTreeFilterBeanKey, new ConcurrentHashMap<String,TreeFilterBean>()); }
2868
2869 // Atomically ensure that the right tree filter bean for category exists, else create it.
2870 TreeFilterBean tfb;
2871 final String categoryAsString = category.toString();
2872 while(null == (tfb = trees.get(categoryAsString)))
2873 {
2874 // Create the filter bean.
2875 tfb = new TreeFilterBean();
2876 // Give it a name for improved diagnostics.
2877 tfb.setName("category:"+categoryAsString);
2878 // Although this is assumed to be expensive to (re)compute,
2879 // allow it to be dumped unless there's loads of memory free right now
2880 // AND this is a multiprocessor (ie, assumed 'big') host.
2881 // In an unstressed system this may hang around indefinitely
2882 // providing quick access to the pages based on them.
2883 // Note that being DSB-linked allows this to be dumped under extreme memory stress anyway.
2884 tfb.setMemorySensitiveCache((ThreadUtils.AVAILABLE_PROCESSORS == 1) || !MemoryTools.lotsFree());
2885 // Set the filter for the right category.
2886 tfb.setExpr(new FilterExpr(null, new BuiltInFilters.filtByCategory(new String[]{categoryAsString})));
2887 // Store the new bean in the cache, iff no one else got there first.
2888 trees.putIfAbsent(MemoryTools.intern(categoryAsString), tfb);
2889 }
2890
2891 return(tfb);
2892 }
2893
2894 /**Flags for User-Agent pattern matching checking for mobile phones. */
2895 private static final int MOBILE_REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
2896
2897 /**Mobile-browser detection regex 1 c/o detectmobilebrowser.com 2010/06/30. */
2898 private static final Pattern MOBILE_REGEX_DMB_1_20100630 = Pattern.compile(".*(android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*", MOBILE_REGEX_FLAGS);
2899
2900 /**Mobile-browser detection regex 2 c/o detectmobilebrowser.com 2010/06/30. */
2901 private static final Pattern MOBILE_REGEX_DMB_2_20100630 = Pattern.compile("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", MOBILE_REGEX_FLAGS);
2902
2903 /**Return true if client appears to be a mobile device (browser sniffing).
2904 * This attempts to detect a small-display, CPU- and bandwidth- constrained device,
2905 * that might benefit from being sent small XHTML pages rather than standard HTML.
2906 * <p>
2907 * This examines the User-Agent and is unlikely to be completely reliable.
2908 * <p>
2909 * Thanks for the regex to http://detectmobilebrowser.com/
2910 */
2911 public static boolean isBrowserOnMobileDevice(final HttpServletRequest request)
2912 {
2913 final String ua = request.getHeader("User-Agent");
2914 return(MOBILE_REGEX_DMB_1_20100630.matcher(ua).matches() ||
2915 MOBILE_REGEX_DMB_2_20100630.matcher(ua.substring(0,4)).matches());
2916 }
2917
2918
2919 /**If true then allow some "sneaky" browser concurrency.
2920 * For browsers that do not always pipeline by default
2921 * but do allow fetches from different-named hosts in parallel
2922 * (eg the biggies such as IE6/IE7, FF1/1.5/2 as of 2006Q4),
2923 * then we can possibly help throughput by fetching some images (etc)
2924 * from a different name for this same host.
2925 * <p>
2926 * The trick is that where the user has arrived at the site
2927 * with a name other than the local mirror name,
2928 * and the URI that we were going to use was a rrURL (root-relative URL),
2929 * then we can prepend the local mirror name to make a new absolute URL
2930 * that the browser may be prepared to fetch from concurrently.
2931 * <p>
2932 * This has to be done completely consistently for any given item,
2933 * eg a static "page-furniture" image, to avoid defeating cacheing.
2934 */
2935 public static final boolean ALLOW_SNEAKY_HTTP_CONCURRENCY = true;
2936
2937 /**Get optional prefix for rrURL for extra "sneaky" browser concurrency.
2938 * This can only apply if:
2939 * <ul>
2940 * <li>ALLOW_SNEAKY_HTTP_CONCURRENCY is true.
2941 * <li>This host has a mirror prefix
2942 * (ie so that there is an "alternate" name for this host.
2943 * <li>The incoming request hostname is not this local mirror hostname.
2944 * <li>We do recognise the form (a vhost for) the URL in the request.
2945 * </ul>
2946 * else this routine always returns "".
2947 * <p>
2948 * This basically only works if the user is using a "generic" URL,
2949 * but that is more likely to be a machine far away (ie with large RTT),
2950 * so extra concurrency to try to overcome latency is especially helpful.
2951 * <p>
2952 * This slightly inflates the HTML that the client will see
2953 * but only if using a generic URL.
2954 * <p>
2955 * May inflate the number of concurrent connections back to this host,
2956 * but usually only by 1 or 2 at most.
2957 * <p>
2958 * Note that this scheme <em>does not</em> rely on any other host being up,
2959 * nor having exactly the same content as us.
2960 * <p>
2961 * This <em>is not</em> a technique for distributing load.
2962 *
2963 * @return "" or the http://mirror-... name (with no trailing slash) for this host
2964 */
2965 public static String getOptionalSneakyConcurrencyRRURLPrefix(final HttpServletRequest request)
2966 {
2967 if(!ALLOW_SNEAKY_HTTP_CONCURRENCY)
2968 { return(""); /* The default, ie no prefix. */}
2969
2970 final String mirrorTag = LocalProps.getMirrorTag();
2971 if(null == mirrorTag)
2972 { return(""); /* No reliable client-visible alternate hostname for us. */ }
2973
2974 // If we don't understand the hostname in the query then don't add the prefix.
2975 // TODO: could cache this against/in the request if too time-consuming...
2976 final String serverName = request.getServerName();
2977 // Our preferred form of the hostname is all-lower-case.
2978 final String serverNameLC = serverName.toLowerCase();
2979 final VirtualHosts.VirtualHost vHost =
2980 VirtualHosts.getVirtualHostDetails(serverNameLC, null);
2981 if(null == vHost)
2982 { return(""); /* Don't recognise the name in this request. */ }
2983
2984 final String localMirrorName = HostUtils.makeMirrorNameGeneric(mirrorTag);
2985 if(localMirrorName.equals(request.getServerName()))
2986 { return(""); /* Client is already using the alternate hostname for us. */ }
2987
2988 // Return suitable prefix to make rrURL into absolute URL.
2989 return("http://" + localMirrorName);
2990 }
2991
2992 /**Name of generic GenProps property for news-flash HTML segment. */
2993 private static final String GEN_NEWSFLASH_HTML = "newsflash.HTML";
2994
2995 /**Get "newsflash" HTML for the main site front page, or "" if none; never null.
2996 * This is retrieved from the GenProps.
2997 */
2998 public static String getNewsflashHTML(final GenProps gp)
2999 {
3000 if(gp == null) { return(""); /* Be kind to the caller. */ }
3001 final String result = gp.getGen().get(GEN_NEWSFLASH_HTML);
3002 if(result == null) { return(""); }
3003 return(result);
3004 }
3005
3006 /**Prefix of all user testimonials in the global properties. */
3007 private static final String TEST_PNAME_PREFIX = "org.hd.org.pg2k.testimonial.";
3008
3009 /**Logically immutable cached lists of testimonal Strings by language (not locale); never null.
3010 * Private to getUserTestimonial().
3011 * <p>
3012 * No null keys, not null/empty values.
3013 * <p>
3014 * Small fixed size.
3015 * <p>
3016 * Thread-safe (and supporting reasonable concurrency if possible).
3017 */
3018 private static final Map<String, List<String>> _gUT_cache = new ConcurrentHashMap<String, List<String>>();
3019
3020 /**Get short user quote/testimonial at random from those available for the given locale; never null but may be "".
3021 * This finds a quote, if any, suitable for the current locale
3022 * (infact, currently just the language is matched),
3023 * picked randomly from those available, or "" if none is available.
3024 * <p>
3025 * The text is HTML/XML safe, and is pure 7-bit printable ASCII,
3026 * with any non-ASCII characters encoded as HTML/XML entities.
3027 *
3028 * @param l the required locale; never null
3029 */
3030 public static String getUserTestimonial(final LocaleBeanBase l)
3031 {
3032 if(l == null) { throw new IllegalArgumentException(); }
3033
3034 // If the cache is completely empty then try to populate it now.
3035 // Other calls while we are builing the cache are safe
3036 // but may see empty entries until we are done.
3037 if(_gUT_cache.isEmpty())
3038 {
3039 try
3040 {
3041 // We build a map of by-language lists,
3042 // which we then wrap up as immutable and post in the cache map (atomically).
3043 final Map<String, ArrayList<String>> m = new HashMap<String, ArrayList<String>>();
3044 final ResourceBundle gp = ResourceBundle.getBundle(CoreConsts.GLOBAL_PROPS_NAME);
3045 // Common prefix for all testimonial properties.
3046 final int prefixLength = TEST_PNAME_PREFIX.length();
3047 // Regex pattern to match the tail of each testimonial property.
3048 final Pattern p = Pattern.compile("^[a-z][a-z][.].*");
3049 // Search the global properties...
3050 for(final String key : gp.keySet())
3051 {
3052 // Skip entries not of interest to us.
3053 if(!key.startsWith(TEST_PNAME_PREFIX)) { continue; }
3054 // We expect to find 2 lower-case letters and then another dot
3055 // and then a unique number/ID (which we don't use).
3056 final String tail = key.substring(prefixLength);
3057 if(!p.matcher(tail).matches())
3058 {
3059 System.err.println("WARNING: bad testimonial property name prefix "+key+" in "+CoreConsts.GLOBAL_PROPS_NAME+" properties");
3060 continue;
3061 }
3062 final String lang = tail.substring(0, 2); // Extract the language code...
3063 // Add this to the end of the appropriate List, or create one if need be.
3064 ArrayList<String> list = m.get(lang);
3065 if(list == null)
3066 {
3067 list = new ArrayList<String>();
3068 m.put(lang, list);
3069 }
3070 list.add(gp.getString(key));
3071 }
3072 // Copy immutable versions of the by-language lists into the cache.
3073 for(final String lang : m.keySet())
3074 {
3075 final ArrayList<String> arrayList = m.get(lang);
3076 arrayList.trimToSize(); // Save space since this won't ever be expanded...
3077 _gUT_cache.put(lang, Collections.unmodifiableList(arrayList)); // Atomically update cache.
3078 }
3079 }
3080 catch(final Throwable t)
3081 {
3082 System.err.println("ERROR: problem retrieving testimonial texts.");
3083 t.printStackTrace();
3084 return(""); // Nothing will be ready yet...
3085 }
3086 }
3087
3088 // Retrieve all quotes for the specified language.
3089 final List<String> quotes = _gUT_cache.get(l.getLocale().getLanguage());
3090 if(quotes != null)
3091 {
3092 assert(!quotes.isEmpty()); // Should be no empty lists.
3093 final int qSize = quotes.size();
3094 if(qSize == 1) { return(quotes.get(0)); }
3095 return(quotes.get(Rnd.fastRnd.nextInt(qSize)));
3096 }
3097
3098 return(""); // Nothing available.
3099 }
3100
3101
3102 /**Private key used by getTrialData(); never null. */
3103 private static final AEPLinkedKey trailDataCacheKey = new AEPLinkedKey("trailDataCacheKey");
3104
3105 /**Get the TrailData for a given trail exhibit (by full name); null if none.
3106 * This caches the result in the DSB, linked to the AEP,
3107 * so never retaining data for expired trails.
3108 * <p>
3109 * (This may negatively cache failure to load TrailData (for a while) for efficiency.)
3110 * <p>
3111 * Safe to apply to an arbitrary/unvetted exhibit name, even an invalid/null value.
3112 */
3113 @SuppressWarnings("unchecked")
3114 public static TrailData getTrailData(final DataSourceBean dsb,
3115 final Name.ExhibitFull trailExhibitFullName)
3116 {
3117 if((dsb == null) || (trailExhibitFullName == null)) { return(null); }
3118
3119 // Get cache (or atomically create on first access, eg after an AEP change).
3120 ConcurrentMap<Name.ExhibitFull,TrailData> cache;
3121 while(null == (cache = (ConcurrentMap<Name.ExhibitFull,TrailData>) dsb.getAEPLinkedValue(trailDataCacheKey)))
3122 { dsb.putIfAbsentAEPLinkedValue(trailDataCacheKey, new ConcurrentHashMap<Name.ExhibitFull,TrailData>()); }
3123
3124 // Return value immediately if already cached...
3125 final TrailData cachedValue = cache.get(trailExhibitFullName);
3126 if(cachedValue != null) { return(cachedValue); }
3127
3128 // // If the exhibit name is invalid (eg from bogus Web/unsafe input)
3129 // // then don't both even trying to compute a value to save time and risk.
3130 // if(!ExhibitName.validNameFinalComponentSyntax(trailExhibitName)) { return(null); }
3131
3132 try
3133 {
3134 // Compute and cache value if possible.
3135 final TrailData value = TrailData.readTrailFromExhibit(dsb,
3136 trailExhibitFullName, dsb.getLogger());
3137 if(value != null) { cache.putIfAbsent(trailExhibitFullName, value); }
3138 return(value);
3139 }
3140 catch(final Exception e)
3141 {
3142 // Report but absorb any error to avoid it propagating upwards...
3143 e.printStackTrace();
3144 return(null);
3145 }
3146 }
3147
3148
3149 /**Private key used by approxWordCount(); never null. */
3150 private static final AEPLinkedKey _awc_CacheKey = new AEPLinkedKey("_awc_CacheKey");
3151
3152 /**Compute (crude) estimate of words in catalogue page for given exhibit; non-negative.
3153 * This is designed to be reasonably fast, though not necessarily amazingly accurate,
3154 * and is intended to help decide how many ad blocks a page may reasonably support.
3155 * <p>
3156 * This may cache its results against the AEP instance.
3157 * <p>
3158 * This counts 'non-furniture' words, ie those originating from the data itself,
3159 * including the exhibit name, exhibit description, tree AKA/description, etc,
3160 * with different constituents possibly weighted differently.
3161 * <p>
3162 * For simplicity, this does its computations based on the default site language,
3163 * even if there may be significant variation in apparent word count
3164 * for other localisations.
3165 *
3166 * @return zero in case of difficulty (eg exhibit does not exist),
3167 * else approximate (positive) word count
3168 */
3169 public static final int approxWordCount(final DataSourceBean dsb,
3170 final Name.ExhibitFull fullExhibitName)
3171 {
3172 if((dsb == null) || (fullExhibitName == null))
3173 { throw new IllegalArgumentException(); }
3174
3175 // Get existing (thread-safe) cache, or atomically create it if necessary.
3176 // The cache is a size-limited thread-safe Map with reasonably-fast get()
3177 // from full exhibit name to (positive) word count.
3178 // Races here may result in some wasted work but no errors.
3179 // Cap relative to heap size: about 8k (2^13) entries for 1GB (2^30) heap.
3180 MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer> cachedCounts;
3181 while((cachedCounts = GenUtils.<MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer>>cast(dsb.getAEPLinkedValue(_awc_CacheKey))) == null)
3182 { dsb.putIfAbsentAEPLinkedValue(_awc_CacheKey, MemoryTools.SimpleProbabilisticCache.<String, Integer>create(Math.max(128, (int) (Runtime.getRuntime().totalMemory() >>> 17)), _awc_CacheKey.comment)); }
3183 // Return the cached word count, if present.
3184 final Integer cachedCount = cachedCounts.get(fullExhibitName);
3185 if(cachedCount != null) { return(cachedCount.intValue()); }
3186
3187 // Running word count for this exhibit.
3188 int wordCount = 0;
3189
3190 // Count words in its name main component, roughly.
3191 wordCount += ExhibitName.getMainWordsCount(fullExhibitName, Collections.<String>emptySet());
3192 assert(wordCount > 0);
3193
3194 // Get a default-locale LocaleBean
3195 // to extract the descriptive/AKA text with.
3196 final LocaleBeanBase lb = new LocaleBean();
3197
3198 try
3199 {
3200 // Count words in any extant per-exhibit description text.
3201 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
3202
3203 // Return (uncached) zero for non-extant exhibit.
3204 if(null == aep.aeid.getStaticAttr(fullExhibitName))
3205 { return(0); }
3206
3207 final ExhibitPropsLoadable epl = aep
3208 .getExhibitPropsLoadable(fullExhibitName);
3209 final String description = epl.getDescription();
3210 // Use the slow-but-sensible (eg for line-end handling) tokeniser.
3211 if(description != null)
3212 { wordCount += (new StringTokenizer(description)).countTokens(); }
3213
3214 // Get AKA/treedesc text with as little markup as we can easily manage.
3215 final String akaText = GenUtils.getLocalisedTreeDesc(aep,
3216 fullExhibitName, lb, true, true, false, false).toString();
3217 // Only count real spaces as word boundaries
3218 // and halve the raw word count to allow for markup, repetition, etc.
3219 // FIXME: use find() or somesuch to avoid redundant construction of substrings.
3220 if(!akaText.isEmpty())
3221 { wordCount += (akaText.split(" +").length >>> 1); }
3222
3223 // Add (at low weighting) word count of section text.
3224 final CharSequence sectionDesc = GenUtils.getLocalisedSectionDesc(aep,
3225 ExhibitName.getCategoryComponent(fullExhibitName),
3226 lb);
3227 // Only count real spaces as word boundaries
3228 // and quarter the raw word count to allow for markup and lack of uniqueness
3229 // ie the fact that descriptive text is shared between related exhibits.
3230 // FIXME: use find() or somesuch to avoid redundant construction of substrings.
3231 if(sectionDesc != null)
3232 { wordCount += (sectionDesc.toString().split(" +").length >>> 2); }
3233 }
3234 catch(final IOException e)
3235 {
3236 // Shouldn't happen, but return what we have without cacheing...
3237 if(IsDebug.isDebug) { System.out.println("[WARNING: partial word count "+wordCount+" on cat page for "+fullExhibitName+".]"); }
3238 return(wordCount);
3239 }
3240
3241 // Cache the (complete) approx word count for next time...
3242 cachedCounts.put(fullExhibitName, Integer.valueOf(wordCount));
3243
3244 if(IsDebug.isDebug) { System.out.println("[Approx word count "+wordCount+" on cat page for "+fullExhibitName+".]"); }
3245 return(wordCount);
3246 }
3247
3248 /**Events to be examined by isPopularCatalogueEntry(); never null.
3249 * These must all have VLONG data stored.
3250 */
3251 private static final List<SimpleVariableDefinition> _iPCE_vars = Arrays.asList(new SimpleVariableDefinition[] {
3252 SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
3253 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
3254 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
3255 });
3256
3257 /**Returns true iff the named exhibit and/or catalogue page is popular (well visited/downloaded).
3258 * Uses the history to decide if a catalogue page and its exhibit
3259 * are frequently visited/downloaded
3260 * (wrt other catalogue pages globally and locally).
3261 *
3262 * @param vars source of event history; never null
3263 * @param exhibitFullName full exhibit name; never null
3264 * @return true if popular, false otherwise
3265 */
3266 public static boolean isPopularCatalogueEntry(final SimpleVariablePipelineIF vars,
3267 final CharSequence exhibitFullName)
3268 {
3269 if((vars == null) || (exhibitFullName == null)) { throw new IllegalArgumentException(); }
3270
3271 final String shortName = ExhibitName.getFileComponent(exhibitFullName).toString();
3272 // TODO: allow for shortened unique-key form in future...
3273
3274 for(final SimpleVariableDefinition var : _iPCE_vars)
3275 {
3276 // Look for activity yesterday and today.
3277 final EventVariableValue eventsToday = vars.getEventValue(var, EventPeriod.VLONG, true);
3278 if(eventsToday.getRank(shortName) < (eventsToday.getTotalDistinctValues()/2))
3279 { return(true); }
3280 final EventVariableValue eventsYesterday = vars.getEventValue(var, EventPeriod.VLONG, false);
3281 if(eventsYesterday.getRank(shortName) < (eventsYesterday.getTotalDistinctValues()/3))
3282 { return(true); }
3283
3284 // Now we look into the full collected history for this value.
3285 final EventVariableValue[] all = vars.getEventValues(var, EventPeriod.VLONG, 0, null);
3286 // If no "all" history at all or not enough to be significant
3287 // then the item may just not be especially popular...
3288 if(all.length < 1) { continue; }
3289 final EventVariableValue allEVV = all[0];
3290 if(allEVV == null) { continue; }
3291 if(allEVV.getRank(shortName) < (allEVV.getTotalDistinctValues()/4))
3292 { return(true); }
3293 }
3294
3295 return(false); // Not popular apparently.
3296 }
3297
3298
3299 /**Private key used by findLatestCodeBundle(); never null. */
3300 private static final AEPLinkedKey findLatestCodeBundleKey = new AEPLinkedKey("findLatestCodeBundleKey");
3301
3302 /**Name of the section/dir in which code/doc bundles are filed. */
3303 private static final String CODE_SECTION_DIR = "code";
3304
3305 /**Returns full exhibit name for latest version of a code bundle, or null if none.
3306 * This locates the latest (with a major-minor-micro versioning) bundle
3307 * in the 'code' section, for the given prefix, or null of none.
3308 * <p>
3309 * The author and extension are ignored for selection purposes.
3310 * <p>
3311 * For example, for the prefix/argument 'javadoc',
3312 * if the code section includes the files
3313 * 'javadoc-1-2-3-DHD.zip' and javadoc-1-10-1-ANON.zip'
3314 * this will return 'code/javadoc-1-10-1-ANON.zip'.
3315 * <p>
3316 * To be found a bundled archive name must be exactly of the form:<br />
3317 * <code>prefix-major-minor-micro-AUTH.XTN</code><br />
3318 * where the major, minor and micro components are (small, non-negative) integers.
3319 * There must be no attributes present.
3320 * <p>
3321 * (Note that a '-' is appended to the supplied prefix.)
3322 * <p>
3323 * This may cache the results against the AEP instance,
3324 * since the lookup may happen may times
3325 * and we may have to search through a fair amount of data for each lookup.
3326 * Note: this does not cache negative results
3327 * in part to bound the amount of space that can be consumed.
3328 *
3329 * @param dsb current data source; never null
3330 * @param prefix legitimate short-name as bundle name; never null nor empty
3331 */
3332 @SuppressWarnings("unchecked")
3333 public static Name.ExhibitFull findLatestCodeBundle(final DataSourceBean dsb, final String prefix)
3334 {
3335 if(dsb == null) { throw new IllegalArgumentException(); }
3336 if((prefix == null) || (prefix.length() < 1)) { throw new IllegalArgumentException(); }
3337
3338 // Look up the prefix in our (thread-safe) cache from prefix to full exhibit name.
3339 // This is AEP-linked, so is automatically discarded when a new AEP appears.
3340 ConcurrentMap<String,Name.ExhibitFull> cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
3341 Name.ExhibitFull result = null;
3342 if(cache != null)
3343 {
3344 result = cache.get(prefix);
3345 if(result != null) { return(result); /* Return cached result! */ }
3346 }
3347
3348 // Do the lookup.
3349 // First quickly filter for only plausible candidates
3350 // with the correct prefix and in the correct section.
3351 final String fullNamePrefix = CODE_SECTION_DIR + '/';
3352 final String shortNamePrefix = prefix + "-";
3353 final AllExhibitProperties aep;
3354 try { aep = dsb.getAllExhibitProperties(-1); } catch(final IOException e) { return(null); /* Shouldn't really happen anyway... */ }
3355 final Name.ExhibitFull candidates[] = aep.select(new AEPFilter() {
3356 /* (non-Javadoc)
3357 * @see org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter#accept(org.hd.d.pg2k.svrCore.AllExhibitProperties, java.lang.String)
3358 */
3359 //@Override
3360 public boolean accept(final AllExhibitProperties aep, final Name.ExhibitFull fullExhibitName)
3361 {
3362 // Quickly filter for the correct section.
3363 // if(!TextUtils.contentEquals(ExhibitName.getCategoryComponent(fullExhibitName), CODE_SECTION_DIR)) { return(false); }
3364 if(!TextUtils.startsWith(fullExhibitName, fullNamePrefix)) { return(false); }
3365 final Name.ExhibitShort shortName = fullExhibitName.getShortName();
3366 // Quickly filter for the correct bundle name.
3367 if(!TextUtils.startsWith(shortName, shortNamePrefix)) { return(false); }
3368 // OK, we can check more thoroughly later..
3369 return(true);
3370 }
3371 }, null, 0);
3372 if(candidates.length == 0)
3373 {
3374 System.err.println("WARNING: no candidate bundles with prefix "+prefix);
3375 return(null); /* No candidates... */
3376 }
3377
3378 // Now find the highest-versioned syntactically-valid candidate, if any...
3379 int bestMajor = -1;
3380 int bestMinor = -1;
3381 int bestMicro = -1;
3382 for(final Name.ExhibitFull c : candidates)
3383 {
3384 final Name.ExhibitShort shortName = c.getShortName();
3385 assert(TextUtils.startsWith(shortName, shortNamePrefix)) : "should have the correct bundle name";
3386 // Rip off prefix and tokenise remainder (and ignore trailing AUTH.XTN part as last token).
3387 final int snpl = shortNamePrefix.length();
3388 final String[] tokens = shortName.subSequence(snpl, shortName.length()).toString().split(ExhibitName.WORD_SEPS);
3389 if(tokens.length != 4)
3390 {
3391 System.err.println("WARNING: badly-named (wrong token count "+tokens.length+") candidate bundle for prefix "+prefix+": "+c);
3392 continue; /* Invalid format. */
3393 }
3394 try
3395 {
3396 final int maj = Integer.parseInt(tokens[0], 10);
3397 assert(maj >= 0);
3398 final int min = Integer.parseInt(tokens[1], 10);
3399 assert(min >= 0);
3400 final int mic = Integer.parseInt(tokens[2], 10);
3401 assert(mic >= 0);
3402 if(maj < bestMajor) { continue; /* Too old. */ }
3403 if(maj > bestMajor)
3404 { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
3405 if(min < bestMinor) { continue; /* Too old. */ }
3406 if(min > bestMinor)
3407 { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
3408 if(mic < bestMicro) { continue; /* Too old. */ }
3409 if(mic > bestMicro)
3410 { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
3411 }
3412 catch(final NumberFormatException e)
3413 {
3414 System.err.println("WARNING: badly-named (NumberFormatException + "+e.getMessage()+") candidate bundle for prefix "+prefix+": "+c);
3415 continue; /* Invalid format. */
3416 }
3417 }
3418
3419 // If the result is positive then cache it,
3420 // creating a new (thread-safe) cache if necessary.
3421 // Negative results are not cached.
3422 if(result != null)
3423 {
3424 while(cache == null)
3425 {
3426 dsb.putIfAbsentAEPLinkedValue(findLatestCodeBundleKey, new ConcurrentHashMap<String, Name.ExhibitFull>());
3427 // Retrieve whatever the current cache now is (there may have been a race).
3428 cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
3429 }
3430 // Update the cached value if not already done...
3431 cache.putIfAbsent(prefix, result);
3432
3433 if(IsDebug.isDebug) { System.out.println("INFO: found bundle for prefix "+prefix+" as "+result); }
3434 }
3435
3436 else { System.err.println("WARNING: found no bundle for prefix "+prefix); }
3437
3438 // Return the result!
3439 return(result);
3440 }
3441 }