001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.webSvr.util;
031
032 import java.awt.image.BufferedImage;
033 import java.awt.image.ColorModel;
034 import java.awt.image.WritableRaster;
035 import java.io.IOException;
036 import java.io.InputStream;
037 import java.lang.ref.SoftReference;
038 import java.lang.ref.WeakReference;
039 import java.net.Inet6Address;
040 import java.net.InetAddress;
041 import java.net.MalformedURLException;
042 import java.net.URL;
043 import java.net.UnknownHostException;
044 import java.util.ArrayList;
045 import java.util.Arrays;
046 import java.util.BitSet;
047 import java.util.Collection;
048 import java.util.Collections;
049 import java.util.Comparator;
050 import java.util.Enumeration;
051 import java.util.HashMap;
052 import java.util.HashSet;
053 import java.util.LinkedList;
054 import java.util.List;
055 import java.util.ListIterator;
056 import java.util.Map;
057 import java.util.ResourceBundle;
058 import java.util.Set;
059 import java.util.SortedSet;
060 import java.util.TreeSet;
061 import java.util.concurrent.Callable;
062 import java.util.concurrent.ConcurrentHashMap;
063 import java.util.concurrent.ConcurrentMap;
064 import java.util.concurrent.Future;
065 import java.util.regex.Pattern;
066
067 import javax.servlet.ServletContext;
068 import javax.servlet.http.HttpServletRequest;
069 import javax.servlet.http.HttpServletResponse;
070
071 import org.apache.http.HeaderElement;
072 import org.apache.http.message.BasicHeader;
073 import org.hd.d.pg2k.svrCore.AbstractSimpleLogger;
074 import org.hd.d.pg2k.svrCore.AccessionData;
075 import org.hd.d.pg2k.svrCore.AddrTools;
076 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
077 import org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter;
078 import org.hd.d.pg2k.svrCore.Compact7BitString;
079 import org.hd.d.pg2k.svrCore.CoreConsts;
080 import org.hd.d.pg2k.svrCore.ExhibitName;
081 import org.hd.d.pg2k.svrCore.ExhibitPropsComputable;
082 import org.hd.d.pg2k.svrCore.ExhibitPropsComputableMutable;
083 import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
084 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
085 import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
086 import org.hd.d.pg2k.svrCore.GenUtils;
087 import org.hd.d.pg2k.svrCore.HostUtils;
088 import org.hd.d.pg2k.svrCore.ImageUtils;
089 import org.hd.d.pg2k.svrCore.LocaleBeanBase;
090 import org.hd.d.pg2k.svrCore.MemoryTools;
091 import org.hd.d.pg2k.svrCore.MemoryTools.CacheMiniMap;
092 import org.hd.d.pg2k.svrCore.Name;
093 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
094 import org.hd.d.pg2k.svrCore.Rnd;
095 import org.hd.d.pg2k.svrCore.TextUtils;
096 import org.hd.d.pg2k.svrCore.ThreadUtils;
097 import org.hd.d.pg2k.svrCore.Tuple;
098 import org.hd.d.pg2k.svrCore.VarTools;
099 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
100 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME.ExhibitTypeParameters;
101 import org.hd.d.pg2k.svrCore.collections.LRUMapAutoSizeForHitRate;
102 import org.hd.d.pg2k.svrCore.collections.SimpleLRUMap;
103 import org.hd.d.pg2k.svrCore.collections.SimpleProbabilisticCache;
104 import org.hd.d.pg2k.svrCore.location.GeoUtils;
105 import org.hd.d.pg2k.svrCore.props.GenProps;
106 import org.hd.d.pg2k.svrCore.props.GenPropsGenNames;
107 import org.hd.d.pg2k.svrCore.props.LocalProps;
108 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
109 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
110 import org.hd.d.pg2k.svrCore.vars.SimpleVarStats;
111 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
112 import org.hd.d.pg2k.svrCore.vars.SimpleVariablePipelineIF;
113 import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
114 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
115 import org.hd.d.pg2k.webSvr.catalogue.TrailData;
116 import org.hd.d.pg2k.webSvr.exhibit.BuiltInFilters;
117 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
118 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean.AEPLinkedKey;
119 import org.hd.d.pg2k.webSvr.exhibit.FilterExpr;
120 import org.hd.d.pg2k.webSvr.exhibit.TreeFilterBean;
121 import org.hd.d.pg2k.webSvr.virtualHosts.VirtualHosts;
122 import org.hd.d.tmf.ThroughputMonitorFilterBase;
123 import org.w3c.dom.Node;
124
125 import ORG.hd.d.IsDebug;
126
127
128 /**Web-server-related utility functions.
129 * This is for algorithms only of interest to Web apps, often JSPs.
130 * <p>
131 * One advantage of having code here rather than in-line in a JSP
132 * is that is is pre-compiled off-line for speed and robustness;
133 * code here is also easier to test.
134 */
135 public final class WebUtils
136 {
137 /**Prevent construction of an instance. */
138 private WebUtils() { }
139
140
141 /**Name of event/variable to which we post a voter's approximate geo location. */
142 public static final SimpleVariableDefinition VOTER_LOCATION_STATS_EVENT_DEF = SystemVariables.GENSTATS_STRING_GLOBAL_EVENT;
143
144 /**Prefix of event value for voter's approximate geo location. */
145 public static final String VOTER_LOCATION_STATS_EVENT_PREFIX = "voterLocation=";
146
147
148 /**Handler for exhibit voting; holds no strong references to anything important. */
149 private static final class VoteHandler extends StatsSink.AbstractStatsListener
150 {
151 private final ExhibitFull exhibitFullName;
152 private final WeakReference<SimpleVariablePipelineIF> varsWR;
153 private final String dpID;
154 private final InetAddress voterIPAddr;
155 private final long expireBy;
156
157 private VoteHandler(
158 final String uniqueDataPointID,
159 final long expireBy,
160 final ExhibitFull exhibitFullName,
161 final WeakReference<SimpleVariablePipelineIF> varsWR,
162 final InetAddress voterIPAddr)
163 {
164 super(uniqueDataPointID, expireBy);
165 this.exhibitFullName = exhibitFullName;
166 this.varsWR = varsWR;
167 dpID = uniqueDataPointID;
168 this.voterIPAddr = voterIPAddr;
169 this.expireBy = expireBy;
170 }
171
172 @Override public final String handle(final Map<String, String[]> parameters)
173 {
174 // When we've done, redirect back to the exhibit page...
175 // Stick a random parameter value on the end to ensure that
176 // all common/broken browsers (eg IE6, FF1) reload the page.
177 final String result = WebUtils.makeCatPageRRURL(exhibitFullName, WebConsts.F_secondary_generated_HTML_suffix) + "?rnd="+(Rnd.fastRnd.nextLong() >>> 1);
178
179 // If the pipeline has gone away then return immediately...
180 final SimpleVariablePipelineIF vars = varsWR.get();
181 if(null == vars) { return(result); }
182
183 if(parameters != null)
184 {
185 // Allow for GET or POST plain/image style (.x, .y) values.
186 final boolean votePro =
187 (null != parameters.get(VOTE_PRO_PARAM_NAME)) ||
188 (null != parameters.get(VOTE_PRO_PARAM_NAME + ".x"));
189 final boolean voteCon =
190 (null != parameters.get(VOTE_CON_PARAM_NAME)) ||
191 (null != parameters.get(VOTE_CON_PARAM_NAME + ".x"));
192 if(votePro != voteCon) // Exactly one selected...
193 {
194 try
195 {
196 // Post vote event value to correct event stream...
197 /*if(IsDebug.isDebug)*/ { System.out.println("***VOTE registered (for="+votePro+"): " + exhibitFullName + " by " + dpID); }
198 vars.setVariable(new SimpleVariableValue(
199 (votePro ? SystemVariables.VOTE_PRO : SystemVariables.VOTE_CON),
200 exhibitFullName.getShortName().toString()));
201
202 if(voterIPAddr != null)
203 {
204 // Note location of voter (as ccTLD or region).
205 // We are prepared to spend some time on this
206 // as voting is rare and significant
207 // and we've probably already collected/cached
208 // any required data.
209 final String location = GeoUtils.getRegionByAddress(voterIPAddr, false);
210 vars.setVariable(new SimpleVariableValue(
211 VOTER_LOCATION_STATS_EVENT_DEF,
212 VOTER_LOCATION_STATS_EVENT_PREFIX + location));
213 }
214 }
215 catch(final IOException e)
216 { e.printStackTrace(); /* Just absorb errors. */ }
217 }
218 }
219
220 // Compute time before next vote will be requested
221 // if a user does take this opportunity to vote.
222 // A relatively long time after we expect the original voting opportunity to expire
223 // to act as a second-level screen against spiders
224 // and to avoid pestering a human voter too often.
225 // (We also make this time more unpredictable with a good random source.)
226 final long nextVote = expireBy + 11*WebConsts.VOTE_MIN_REQUEST_GAP_MS +
227 (GenUtils.mustConservePowerExtreme() ? Rnd.fastRnd : Rnd.goodRnd).nextInt(7*WebConsts.VOTE_MIN_REQUEST_GAP_MS);
228
229 // Install a dummy handler to postpone the next time that this user gets asked to vote.
230 // Assumes that the extant listener is removed *before* a call to handle()
231 // so that this new listener will not be removed on return.
232 StatsSink.addListenerForDataPoint(new StatsSink.AbstractStatsListener(dpID, nextVote){
233 /**Dummy handler that should never in fact be invoked. */
234 @Override public final String handle(final Map<String, String[]> parameters) { return(result); }
235 });
236
237 return(result);
238 }
239 }
240
241
242 /**Simple class to allow logging to the given servlet's log().
243 * This holds only a WeakReference to the ServletContext
244 * so as not to obstruct GC when all strong refs go away.
245 * <p>
246 * Stops logging when the referent becomes null.
247 */
248 public static final class ServletLogger extends AbstractSimpleLogger
249 {
250 public ServletLogger(final ServletContext ctxt)
251 {
252 if(ctxt == null) { throw new IllegalArgumentException(); }
253 ctxtWR = new WeakReference<ServletContext>(ctxt);
254 }
255
256 /**Weak ref to servlet context; never null but the referent may be. */
257 private volatile WeakReference<ServletContext> ctxtWR;
258
259 /**Log the given message.
260 * If the weak reference to the context has died
261 * the log output is silently discarded.
262 */
263 public void log(final String message)
264 {
265 final ServletContext context = ctxtWR.get();
266 if(null == context) { return; }
267 context.log(message);
268 }
269 }
270
271
272 /**Simple class to allow logging to a given servlet's log() or System.out if none available.
273 * This allows a logger to be created at instance scope
274 * for (say) a Filter, and set with a context when the config is set
275 * and cleared when one is not available,
276 * all the while remaining a valid logger.
277 * <p>
278 * This holds only a WeakReference to the ServletContext
279 * so as not to obstruct GC when all strong refs go away.
280 */
281 public static final class ServletLoggerWithFallback extends AbstractSimpleLogger
282 {
283 /**Weak ref to servlet context; may be null or the referent may be. */
284 private volatile WeakReference<ServletContext> ctxtWR;
285
286 /**Set context, or clear/remove it if null. */
287 public void setContext(final ServletContext context)
288 {
289 final WeakReference<ServletContext> ctxtWROld = ctxtWR;
290 final ServletContext ctxtOld = (null == ctxtWROld) ? null : ctxtWROld.get();
291 if(null == context) { ctxtWR = null; }
292 else { ctxtWR = new WeakReference<ServletContext>(context); }
293 if(ctxtOld != context)
294 { log("ServletLoggerWithFallback: now logging to " + ((null == context) ? "System.out" : "context.log()")); }
295 }
296
297 /**Log the given message.
298 * Logs to the servlet context logger if available,
299 * else logs to System.out.
300 */
301 public void log(final String message)
302 {
303 final WeakReference<ServletContext> wr = ctxtWR;
304 final ServletContext context = (null == wr) ? null : wr.get();
305 if(context != null) { context.log(message); }
306 else { System.out.println(message); }
307 }
308 }
309
310
311 /**System variables tried, in order, for a "popular" exhibit; private to getPopularExhibit().
312 * We put the download var first to get a decent rate of update
313 * since this value changes quite frequently.
314 * <p />
315 * We don't put the vote var first so as to reduce the temptation
316 * to "throw" a vote to get an exhibit shown on the front page.
317 */
318 private static final SimpleVariableDefinition _gPE_vars[] = {
319 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
320 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
321 SystemVariables.VOTE_PRO,
322 SystemVariables.ACCESSPATTERN_CLICKTHROUGH,
323 SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
324 };
325
326 // /**Gets "popular" exhibit, possibly filtered by type; null if none available.
327 // * Tries to pick a "popular" exhibit by looking at one recently voted for,
328 // * or downloaded, etc, in the system variables,
329 // * and that has both thumbnails available where they are possible.
330 // * <p>
331 // * This rejects exhibits with a below-par (negative) rating.
332 // * <p>
333 // * Using the system variables should mean that this can pick up
334 // * values set from any mirror, etc, fairly quickly.
335 // * <p>
336 // * This cannot guarantee to return a non-null value,
337 // * but any value that it does return is a current, valid exhibit.
338 // *
339 // * @param dsb handle on the system variables and data; never null
340 // * @param type if not null only exhibits of this type are candidates
341 // * @deprecated Use {@link #getPopularExhibit(DataSourceBean,ExhibitMIME.ExhibitTypeParameters,Collection)} instead
342 // */
343 // @Deprecated
344 // public static String getPopularExhibit(final DataSourceBean dsb,
345 // final ExhibitMIME.ExhibitTypeParameters type)
346 // { return getPopularExhibit(dsb, type, null); }
347
348 /**Gets name of "popular" exhibit, possibly filtered by type; null if none available.
349 * Tries to pick a "popular" exhibit by looking at one recently voted for,
350 * or downloaded, etc, in the system variables,
351 * and that has both thumbnails available where they are possible.
352 * <p>
353 * This rejects exhibits with a below-par (negative) rating.
354 * <p>
355 * Using the system variables should mean that this can pick up
356 * values set from any mirror, etc, fairly quickly.
357 * <p>
358 * This cannot guarantee to return a non-null value,
359 * but any value that it does return is a current, valid exhibit.
360 *
361 * @param dsb handle on the system variables and data; never null
362 * @param type if not null only exhibits of this type are candidates
363 * @param excludeFullNames if non-null, any exhibits included by full name
364 * are not candidates to be returned
365 * @param beQuick if true then don't spend too long trying to calculate this
366 * but instead give up quickly if need be
367 * (so as not to block page generation for example)
368 */
369 public static Name.ExhibitFull getPopularExhibit(final DataSourceBean dsb,
370 final ExhibitMIME.ExhibitTypeParameters type,
371 final Collection<String> excludeFullNames,
372 final boolean beQuick)
373 {
374 if(dsb == null) { throw new IllegalArgumentException(); }
375
376 final long start = System.currentTimeMillis();
377
378 for(final SimpleVariableDefinition def : _gPE_vars)
379 {
380 assert(def != null);
381
382 // Half the time skip a "local" definition
383 // so as to get to see global popular items in the mix.
384 if(def.isLocal() && Rnd.fastRnd.nextBoolean())
385 { continue; }
386
387 try
388 {
389 final SimpleVariableValue svv = dsb.getVariable(def);
390 if(svv == null) { continue; }
391
392 // Specified variable must be of String type.
393 assert(def.getType() == SimpleVariableDefinition.TYPE_STRING);
394
395 final String s = (String) svv.getValue();
396 // Skip any null values.
397 if(null == s) { continue; }
398
399 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
400 final Name.ExhibitFull fullName = aep.aeid.getFullName(s);
401
402 // If this is in the exclusion list then skip it...
403 if((excludeFullNames != null) && excludeFullNames.contains(fullName)) { continue; }
404
405 // Seems not to be a valid/extant exhibit, so give up...
406 if(fullName == null) { continue; }
407 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(fullName);
408 if(esa == null) { continue; }
409
410 // If the type was specified and does not match, give up!
411 final ExhibitTypeParameters actualType = (ExhibitMIME.getInputFileType(esa.getCharSequence()));
412 if(actualType == null)
413 { continue; /* Reject untyped exhibit. */ }
414 if((type != null) && !type.equals(actualType))
415 { continue; /* Reject wrong-type exhibit. */ }
416
417 // Skip if this exhibit may be "sensitive" somehow.
418 final GenProps gp = dsb.getGenProps(-1);
419 if(GenUtils.isSensitive(fullName, gp)) { continue; }
420
421 // If this exhibit type supports thumbnails
422 // then reject anything without both immediately available.
423 if(actualType.canPossiblyCreateThumbnailOfSameMIMEType())
424 {
425 final ExhibitThumbnails thumbnails = dsb.getThumbnails(fullName, false);
426 if((thumbnails == null) ||
427 (thumbnails.getSmall() == null) ||
428 (thumbnails.getStandard() == null))
429 { continue; /* Reject this. */ }
430 }
431
432 // We allow use of a stale (and ignore a not-yet-computed) rating so as to be quick.
433 final ExhibitPropsComputableMutable ePCM =
434 aep.getExhibitPropsComputableMutable(fullName);
435 // If we don't actually know (absent/stale rating) how good this exhibit is
436 // then attempt to find out for next time in the background
437 // unless the system is (temporarily) conserving energy or other than lightly loaded.
438 if((ePCM == null) || ePCM.isStale())
439 {
440 if(!GenUtils.mustConservePower() && WebUtils.isLightlyLoaded(dsb.getServletContext()))
441 {
442 // Use 'discardable' task pool to ensure that we don't block.
443 ThreadUtils.lowPriorityThreadPoolDiscardable.submit(new Runnable() {
444 /**Force full non-stale recomputation of EPCM of not-apparently-unpopular exhibit. */
445 public void run() { aep.getExhibitPropsComputableMutable(fullName, false, gp, dsb, dsb.getScorerCache()); }
446 });
447 }
448 }
449 // Reject/skip anything with a definite below-par (non-positive) rating.
450 if((ePCM != null) && (ePCM.getGoodness() <= 0)) { continue; }
451
452 return(fullName); // Got one!
453 }
454 catch(final IOException e)
455 {
456 // Silently ignore a probably-transient problem...
457 }
458
459 // If urged to be quick by our caller
460 // then abort if we've already taken too long trying
461 // (a significant fraction of allowed page-generation time).
462 if(beQuick && ((System.currentTimeMillis() - start) > WebConsts.MAX_PG_DOWNLOAD_MS/2))
463 { break; }
464 }
465
466 return(null); // Nothing found...
467 }
468
469 /**If true then check for bots/spiders by UA (User-Agent); note that clients can forge their UA. */
470 private static final boolean CHECK_FOR_SPIDERS_BY_UA = true;
471
472 /**Immutable Set of known spider/bot UA strings; should probably be moved to a text/properties file.
473 * This is a set of lower-cased first (space/tab/bracket-delimited) words
474 * from the UA strings.
475 * <p>
476 * The names consist only of non-regex-metacharacters in the set [a-z'_-],
477 * so are safe to embed in a regex.
478 */
479 // @SuppressWarnings("unchecked")
480 private static final Set<String> spiderUAName1stWordsLC = (!CHECK_FOR_SPIDERS_BY_UA) ? Collections.<String>emptySet() :
481 Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(new String[]{
482 "", /* Empty UA string... */
483 "-",
484 "alexibot",
485 "appie",
486 "aqua_products",
487 "asterias",
488 "b2w",
489 "baiduspider",
490 "backdoorbot",
491 "becomebot",
492 "blowfish",
493 "bookmark",
494 "botalot",
495 "builtbottough",
496 "bullseye",
497 "bunnyslippers",
498 "cheesebot",
499 "cherrypicker",
500 "cherrypickerelite",
501 "cherrypickerse",
502 "copernic",
503 "copyrightcheck",
504 "cosmos",
505 "crescent",
506 "curl",
507 "dittospyder",
508 "dumbot",
509 "emailcollector",
510 "emailsiphon",
511 "emailwolf",
512 "enterprise_search",
513 "erocrawler",
514 "extractorpro",
515 "fairad",
516 "faxobot",
517 "findlinks",
518 "flaming",
519 "foobot",
520 "freefind",
521 "gaisbot",
522 "getright",
523 "gigabot",
524 "googlebot-image",
525 "grub",
526 "grub-client",
527 "harvest",
528 "hatena",
529 "hloader",
530 "http",
531 "httplib",
532 "humanlinks",
533 "ia_archiver",
534 "indy",
535 "infonavirobot",
536 "iron33",
537 "jennybot",
538 "jetbot",
539 "kalooga",
540 "kenjin",
541 "keyword",
542 "larbin",
543 "lexibot",
544 "libweb",
545 "libwww-perl",
546 "linkextractorpro",
547 "linkscan",
548 "linkwalker",
549 "lnspiderguy",
550 "looksmart",
551 "lwp-trivial",
552 "lynx",
553 "mata",
554 "miixpc",
555 "mister",
556 "moget",
557 "msiecrawler",
558 "msnbot",
559 "naver",
560 "netants",
561 "netmechanic",
562 "nicerspro",
563 "nutch",
564 "offline",
565 "omniexplorer_bot",
566 "openbot",
567 "openfind",
568 "oracle",
569 "perman",
570 "port",
571 "propowerbot",
572 "prowebwalker",
573 "psbot",
574 "python-urllib",
575 "queryn",
576 "radiation",
577 "repomonkey",
578 "rma",
579 "searchpreview",
580 "sitesnagger",
581 "sootle",
582 "spankbot",
583 "spanner",
584 "stanford",
585 "suzuran",
586 "szukacz",
587 "teleport",
588 "teleportpro",
589 "telesoft",
590 "thenomad",
591 "tocrawl",
592 "true_robot",
593 "turingos",
594 "url",
595 "url_spider_pro",
596 "urly",
597 "vci",
598 "wbdbot",
599 "webauto",
600 "webbandit",
601 "webcopier",
602 "webenhancer",
603 "websauger",
604 "website",
605 "webster",
606 "webstripper",
607 "webvac",
608 "webzip",
609 "wget",
610 "www-collector-e",
611 "xenu's",
612 "yahooseeker",
613 "zeus",
614 })));
615
616 /**Set of characters taken as a main-part terminator in a User-Agent header, including whitespace.
617 * This should be usable as the separator arg to StringTokenizer,
618 * and in a regex when wrapped in "[]" square brackets.
619 */
620 private static final String UA_TERMINATOR_CHARS = " \t/(:";
621
622 /**Extra case-insensitive patterns matched in UA names, "|"-separated, or null if none.
623 * Essentially, anything alphanumeric-ish ending in "bot", or
624 * anything alphanumeric-ish containing "spider".
625 */
626 private static final String UA_BOT_PATTERNS = "([a-z0-9._-]*bot)|([a-z0-9._-]*spider[a-z0-9._-]*)";
627
628 /**Case-insensitive regex match for all non-empty UA names from spiderUAName1stWordsLC; null if not checking UA names.
629 * Made public to enable some unit testing.
630 */
631 public static final Pattern UA_REGEX;
632 /**Initialise UA_REGEX. */
633 static
634 {
635 if(!CHECK_FOR_SPIDERS_BY_UA) { UA_REGEX = null; }
636 else
637 {
638 final StringBuilder sb = new StringBuilder(32 + (spiderUAName1stWordsLC.size()<<4));
639 // First build the simple compound regex name1|name2|...
640 // possibly with some additional common patterns.
641 if(null != UA_BOT_PATTERNS) { sb.append(UA_BOT_PATTERNS); }
642 for(final String s : spiderUAName1stWordsLC)
643 {
644 if((s == null) || (s.length() == 0)) { continue; }
645 if(sb.length() > 0) { sb.append('|'); }
646 sb.append(s);
647 }
648 // Now wrap and add terminator...
649 sb.insert(0, "^(");
650 sb.append(")[").append(UA_TERMINATOR_CHARS).append("]?.*$");
651 // Now compile the regex...
652 UA_REGEX = Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
653 }
654 }
655
656 /**Name of Boolean attribute in request we cache result of requestProbablyFromSpider() by. */
657 private static final String _rPFS_CACHE_PNAME = "org.hd.pg2k._rPFS_CACHE";
658
659 /**LRU cache from (common) whole UAs to "bot"ness to save some repeated/slow String parsing; never null when checking for bots by UA.
660 * We're prepared to discard all of this under memory stress
661 * as we only have to work this out at most once per request for example.
662 * <p>
663 * TODO: consider some form of incremental auto-expiry even when not full since keys can be relatively large (eg something like SimpleLRUMap + AutoExpirable)
664 */
665 private static final CacheMiniMap<String,Boolean> _isBot_cache = !CHECK_FOR_SPIDERS_BY_UA ? null :
666 SimpleProbabilisticCache.<String,Boolean>create(Math.max(32, (int)(Runtime.getRuntime().totalMemory() >> 20)), "_isBot_cache");
667
668 /**Attempt to determine quickly if the current request is probably from a spider/bot (ie not a human).
669 * NB: This does not attempt to distinguish between good spiders (ie bona fide SEs)
670 * and bad/broken/rude bots/scrapers/spiders.
671 * <p>
672 * This tries to distinguish human from non-human,
673 * at least in part to conserve (prime interactive) resources for humans,
674 * and does not claim to be perfect.
675 * <p>
676 * This returns true if there is no (valid) referring page
677 * (and this visitor is not new to the site, ie has recently pulled another page),
678 * though some browsers/firewalls may routinely block this info,
679 * and some referrals, eg to target="_blank", may show no Referer value.
680 * <p>
681 * This should be quick as most or all requests may be tested with this.
682 * <p>
683 * TODO: Should expand this to test against well-known IP addresses.
684 * <p>
685 * TODO: Should include a behavioural element, eg mindless, rapid, pauseless browsing.
686 *
687 * @param request the incoming request; never null
688 *
689 * @return true if the requesting client is probably a bot, false if probably a human
690 */
691 public static boolean requestProbablyFromSpider(final HttpServletRequest request)
692 {
693 // If we already worked this out, return result already computed/cached!
694 final Object cachedResult = request.getAttribute(_rPFS_CACHE_PNAME);
695 if(cachedResult instanceof Boolean)
696 { return(((Boolean) cachedResult).booleanValue()); }
697
698 // First check for lack of a valid "Referer" header
699 // AND the the client being 'new' to this site.
700 if((null == request.getAttribute(ThroughputMonitorFilterBase.REQ_ATTR_NAME_NEW_CLIENT_FLAG)) &&
701 "".equals(requestProbablyReferredFromExternalSite(request)))
702 {
703 if(IsDebug.isDebug) { System.out.println("[Client has no/unparsable Referer and is not new: probably a spider/robot.]"); }
704 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
705 return(true);
706 }
707
708 // Now check for a known bot User-Agent...
709 if(CHECK_FOR_SPIDERS_BY_UA)
710 {
711 final Enumeration<?> aeEn = request.getHeaders("User-Agent");
712 final boolean hasNoUA = (aeEn == null) || !aeEn.hasMoreElements();
713 if(hasNoUA)
714 {
715 if(IsDebug.isDebug) { System.out.println("[Client has no UA: assumed to be a spider/robot.]"); }
716 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
717 return(true); /* Rude/suspicious. */
718 }
719 else
720 {
721 // Check all such UA headers if more than one for some reason...
722 while(aeEn.hasMoreElements())
723 {
724 final String wholeUA = (String) aeEn.nextElement();
725
726 final int lenUA = wholeUA.length();
727 if((lenUA < 2) || (lenUA > 512))
728 {
729 // Strange-sized UA is suspicious...
730 // And we don't want our cache/memory full of huge UA strings.
731 if(IsDebug.isDebug) { System.out.println("[Client has empty/tiny/huge (ie rude/broken/silly/dangerous) UA: assumed to be a spider/robot; client IP: "+request.getRemoteAddr()+".]"); }
732 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
733 return(true);
734 }
735
736 //final long t1 = System.nanoTime();
737 final boolean isBot;
738 final Boolean b = _isBot_cache.get(wholeUA);
739 if(b != null) { isBot = b.booleanValue(); }
740 else
741 {
742 isBot = UA_REGEX.matcher(wholeUA).matches();
743 // Cache match result for this whole User-Agent value.
744 _isBot_cache.put(wholeUA, isBot ? Boolean.TRUE : Boolean.FALSE);
745
746 // When we see a (new-ish) bot UA then log it and where it came from...
747 if(isBot) { System.out.println("[INFO: Probable bot/spider UA seen (full UA='"+TextUtils.sanitiseForXML(wholeUA, 256, false)+"'); client IP: "+request.getRemoteAddr()+".]"); }
748 }
749 //final long t2 = System.nanoTime(); System.out.println("[UA match time: "+(t2-t1)+"ns]");
750
751 if(isBot)
752 {
753 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.TRUE);
754 return(true);
755 }
756 }
757 }
758 }
759
760 // TODO: check known spider/bot IP addresses...
761
762 request.setAttribute(_rPFS_CACHE_PNAME, Boolean.FALSE);
763 return(false); // Probably human...
764 }
765
766 /**Attempt to detect if the current request has been referred from an external Web site.
767 * This checks if the "Referer" is apparently from gallery mirrors or aliases.
768 * <p>
769 * Note that since this info can be forged,
770 * or disabled/knobbled for security reasons,
771 * this is not completely reliable.
772 * <p>
773 * A missing "Referer" will be taken as indicating an "external" reference,
774 * and will catch most well-behaved spiders as a result.
775 * <p>
776 * Since we may have to do some string parsing this may not be very fast,
777 * but it should not be very slow either.
778 * <p>
779 * Even if there is more than one "Referer" header we will only look at one.
780 *
781 * @param request the incoming request; never null
782 *
783 * @return null if referred from a Gallery site/host/alias,
784 * "" if unparsable or no referring URL,
785 * else normalised (lowercase, stripped of common prefixes, etc)
786 * referring host's name
787 */
788 public static String requestProbablyReferredFromExternalSite(final HttpServletRequest request)
789 {
790 final Enumeration<?> rEn = request.getHeaders("Referer");
791 final boolean noRef = ((rEn == null) || !(rEn.hasMoreElements())); // No Referer...
792 if(noRef) { return(""); }
793
794 // Get the referring URL...
795 final String ref = (String) rEn.nextElement();
796
797 try
798 {
799 // Attempt to parse it for the hostname.
800 // If we fail, ie ref is ill-formed, then count this as "external".
801 final URL url = new URL(ref);
802
803 // If we can't find virtual host details for the host
804 // then assume that this is an external host.
805 final String rawHostName = url.getHost();
806 final String normalisedHostName = HostUtils.normaliseVirtualHostName(rawHostName);
807 if(null == VirtualHosts.getVirtualHostDetails(normalisedHostName, null))
808 {
809 final String ourName = request.getServerName();
810 if((null != ourName) &&
811 normalisedHostName.equals(HostUtils.normaliseVirtualHostName(ourName)))
812 {
813 return(null); // Our host name, even if not expected one, so treat as OK.
814 }
815 return(normalisedHostName); // OK, got the referring host, and it's not us.
816 }
817 return(null); // This was apparently referred by us, so is OK.
818 }
819 catch(final Exception e)
820 {
821 return(""); /* Cannot parse referrer, so treat as if external. */
822 }
823 }
824
825 /**Generate full URL for exhibit given exhibit name; never null.
826 * This may take account of such factors as which servers
827 * are up, how heavily loaded, etc,
828 * in order to give best throughput and perform automatic load balancing,
829 * to give the best possible user experience.
830 * <p>
831 * In order to do this well we may need the request details,
832 * so they should be supplied if possible.
833 * These should be the client's request to a Gallery site,
834 * else null.
835 * <p>
836 * Defaults to generic main host if specific better mirror (etc) cannot be identified.
837 */
838 public static URL makeExhibitURL(final CharSequence exhibitName,
839 final HttpServletRequest request,
840 final DataSourceBean vars)
841 throws MalformedURLException
842 {
843 return(new URL("http://" +
844 MirrorSelectionUtils.chooseMirrorHostForHighBandwidth(request, vars) +
845 makeExhibitRRURL(exhibitName)));
846 }
847
848 /**Generate root-relative URL for exhibit given exhibit name; never null.
849 */
850 public static String makeExhibitRRURL(final CharSequence exhibitName)
851 //throws MalformedURLException
852 {
853 return("/" + WebConsts.BASE_PATH_EXHIBITS + "/" + exhibitName);
854 }
855
856 /**Generate full URL for thumbnail/sample given exhibit name and standard/small selector; never null.
857 */
858 public static URL makeThumbnailURL(final CharSequence exhibitName, final boolean std)
859 throws MalformedURLException
860 {
861 return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeThumbnailRRURL(exhibitName, std)));
862 }
863
864 /**Generate root-relative URL for thumbnail/sample given exhibit name and standard/small selector; never null.
865 */
866 public static String makeThumbnailRRURL(final CharSequence exhibitName, final boolean std)
867 {
868 return("/" + WebConsts.BASE_PATH_TN + "/" + (std ? WebConsts.PATH_TN_STD : WebConsts.PATH_TN_SML) + "/" + exhibitName);
869 }
870
871 /**Generate full URL for catalogue page given exhibit name; never null.
872 * This should always generate the canonical/"official" form of the URL,
873 * eg not including mirrors or aliases.
874 * <p>
875 * The media-type suffix (eg ".html" or ".wml") must be supplied.
876 */
877 public static URL makeCatPageURL(final CharSequence exhibitName, final String mediaTypeSuffix)
878 throws MalformedURLException
879 {
880 return(new URL("http", CoreConsts.MAIN_DATA_HOST, makeCatPageRRURL(exhibitName, mediaTypeSuffix)));
881 }
882
883 /**Generate root-relative URL for catalogue page given exhibit name; never null.
884 * This should take account of such factors as which servers
885 * are up, how heavily loaded, etc.
886 * <p>
887 * The media-type suffix (eg ".html" or ".wml") must be supplied.
888 */
889 public static String makeCatPageRRURL(final CharSequence exhibitName, final String mediaTypeSuffix)
890 {
891 return("/" + WebConsts.BASE_PATH_CATPAGE + "/" + exhibitName + mediaTypeSuffix);
892 }
893
894 /**Returns true if this Web server is overloaded (eg for bandwidth or CPU).
895 * If this server is clearly overloaded then this routine returns true,
896 * and it is possible to start trimming UI features to reduce load,
897 * eg the number of search results shown.
898 * <p>
899 * If not overloaded or not possible to tell, this returns false.
900 * <p>
901 * (If the context is null then this routine may have to return false.)
902 * <p>
903 * This may gather information from a number of sources,
904 * but in the main the ThroughputFilter's data will be used.
905 * We may explicitly set the overload flag here
906 * if we detect the system to be overloaded
907 * to make it quicker for us and others on a subsequent call;
908 * this will get overwritten by TMF when it next gets to assess load.
909 * <p>
910 * If true then the UI and application should attempt to use less bandwidth
911 * and CPU time than normal, perhaps by using cheaper algorithms than
912 * usual (eg less comprehensive searches) or a less-graphics-intensive UI.
913 * <p>
914 * This may default to true while the server is starting up to ensure that
915 * as little extra CPU (for example) as possible is consumed while
916 * the server is compiling JSPs, loading classes, etc.
917 * <p>
918 * This is intended to be relatively cheap to call,
919 * since it may get called frequently,
920 * for example especially when the system is busy,
921 * and/or at start-up before JIT compilation (eg HotSpot) has kicked in.
922 */
923 public static boolean isOverloaded(final ServletContext ctxt)
924 {
925 if(ctxt != null)
926 {
927 // If the ThroughputFilter thinks that the server is overloaded,
928 // then this reports the system as overloaded too.
929 final Object overloadFlag =
930 ctxt.getAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME);
931 // Take anything but the presence of the value Boolean.FALSE
932 // (eg the absence of any value at all) as indication of a problem,
933 // eg that the system may be too busy even to clear the flag,
934 // as well as a short-term cache of overload status computed in this routine
935 // until the TF computes/sets a new value.
936 if((!(overloadFlag instanceof Boolean)) || ((Boolean) overloadFlag).booleanValue())
937 { return(true); }
938 }
939
940 // If the system is known to be at or over our CPU load limit then report overload.
941 if(ThreadUtils.isCPUHeavilyLoaded())
942 {
943 // Effectively cache overload status for a while (until the next TF update).
944 if(ctxt != null) { ctxt.removeAttribute(WebConsts.BANDWIDTH_OVERLOAD_ATTR_NAME); }
945 return(true);
946 }
947
948 // Can't see any problems, given the available data, so not overloaded.
949 return(false);
950 }
951
952 /**Private flag for isLightlyLoaded() to note time when we were last non lightly loaded.
953 * Once we discover that we are not lightly loaded we set this to the current time,
954 * and then do not re-test for a little while in case the testing itself adds to the load.
955 * <p>
956 * Initially zero.
957 * <p>
958 * Marked volatile for thread-safe lock-free access.
959 */
960 private static volatile long _lastNotLightlyLoaded;
961
962 /**Time that we last reset the target load average to zero to restart our load ramp-up.
963 * Initially 'now' in encourage a gentle start.
964 * <p>
965 * Marked volatile for thread-safe lock-free access.
966 */
967 private static volatile long _lastResetLA = System.currentTimeMillis();
968
969 /**Time over which to ramp up load limit in isLightlyLoaded(), ms; strictly positive.
970 * Should be at least the 60s time claimed in the documentation for
971 * OperatingSystemMXBean.getSystemLoadAverage()
972 * to dampen CPU-load flapping,
973 * especially when emerging from power-conservation mode.
974 */
975 private static final int LOAD_AVERAGE_RAMP_UP_TIME = 180000 + Rnd.fastRnd.nextInt(60000); // 3+ minutes.
976
977 /**Returns true if the site seems to be lightly loaded (CPU, bandwidth, etc).
978 * In case of difficulties/confusion this reports false.
979 * <p>
980 * When running well, the system should over around the normal / light-load boundary.
981 * <p>
982 * This routine is quite careful and conservative,
983 * and will only return true if this server and the host system
984 * both seem to be lightly loaded and stable by all the appropriate metrics.
985 * <p>
986 * This never reports the system to be lightly loaded if it is low on power
987 * (eg on a nearly-expired battery) so as to avoid unnecessary background work.
988 * <p>
989 * Where this can check 'uptime' then it tries to enforce a modest CPU ramp-up
990 * over approximately the 60s-or-so of OperatingSystemMXBean.getSystemLoadAverage()
991 * to limit rapid fluctuations in CPU load (and, for example, power consumption).
992 * <p>
993 * This routine is designed to be called frequently,
994 * ie is reasonably fast and efficient.
995 */
996 public static boolean isLightlyLoaded(final ServletContext ctxt)
997 {
998 final long now = System.currentTimeMillis();
999 // Don't re-test within ~1s of finding the system to be significantly loaded.
1000 if(now - _lastNotLightlyLoaded < 1013) { return(false); }
1001
1002 // Treat the system as stressed and thus NOT lightly loaded if (temporarily) short of power
1003 // so as to discourage unnecessary background/speculative work.
1004 // Also forces load-limit ramp-up to restart from scratch.
1005 if(GenUtils.mustConservePower())
1006 { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1007
1008 // We check for memory stress; we are not lightly loaded if memory is hurting.
1009 if(MemoryTools.isMemoryStressed())
1010 { _lastNotLightlyLoaded = now; return(false); }
1011
1012 final double loadFrac = ThreadUtils.loadFraction();
1013 if(loadFrac >= 0) // -ve indicates load average not available so we don't test.
1014 {
1015 // Now we check the whole-host-operating-system (time-averaged) load.
1016 // This should be well under full capacity to be considered lightly loaded,
1017 // ie the run-queue length must be much less than the number of available CPUs.
1018 // Note that where this value is not available getSystemLoadAverage() returns a -ve value
1019 // and we then regard the system as not busy by this metric, as a reasonable fallback.
1020 // This does not take into account any stress on I/O nor other subsystems.
1021 final float targetBasicThreshold = LocalProps.getLightLoadMax();
1022 // If the system is well over over our lower load limit, our fault or not,
1023 // then restart our load ramp-up to give the system a chance to recover.
1024 if(loadFrac >= 2 * targetBasicThreshold)
1025 { _lastNotLightlyLoaded = now; _lastResetLA = now; return(false); }
1026 final long timeSinceLoadRampUpStart = (now - _lastResetLA);
1027 // If still in load ramp-up phase then reduce 'uptime' threshold accordingly.
1028 final float targetThreshold = (timeSinceLoadRampUpStart >= LOAD_AVERAGE_RAMP_UP_TIME) ?
1029 targetBasicThreshold :
1030 (targetBasicThreshold * Math.max(0, Math.min(1, timeSinceLoadRampUpStart / (float) LOAD_AVERAGE_RAMP_UP_TIME)));
1031 final boolean aboveThreshold = loadFrac >= targetThreshold;
1032 if(aboveThreshold)
1033 { _lastNotLightlyLoaded = now; return(false); }
1034 }
1035
1036 if(ctxt != null)
1037 {
1038 // If the ThroughputFilter doesn't think that we are lightly loaded,
1039 // then we immediately report the system as not lightly loaded.
1040 final Object underloadFlag =
1041 ctxt.getAttribute(WebConsts.BANDWIDTH_LIGHTLOAD_ATTR_NAME);
1042 // We take the absence of any value as indication of a problem,
1043 // eg that the system may be too busy even to set the flag.
1044 if((!(underloadFlag instanceof Boolean)) ||
1045 (!((Boolean) underloadFlag).booleanValue()))
1046 { _lastNotLightlyLoaded = now; return(false); }
1047
1048 // We double-check that some problem is not making us seem overloaded at the same time...
1049 if(isOverloaded(ctxt)) { _lastNotLightlyLoaded = now; return(false); }
1050
1051 return(true); // Seems lightly loaded...
1052 }
1053
1054 // Site status is unclear, so assume not quiet for now.
1055 // However, this 'false' is not cacheable, ie is not a real result.
1056 return(false);
1057 }
1058
1059 /**If true then exhibitHasThumbnail() will try to cache its results.
1060 * This should avoid us having to load thumbnails into memory
1061 * just to see if they exist or not.
1062 */
1063 public static final boolean TN_AVAIL_CACHE = true;
1064
1065 /**The target thumbnail availability cache minimum size, strictly positive.
1066 * Should be large enough that most thumbnail availability requests
1067 * will be caught by it regardless of exhibit set size,
1068 * but small enough to limit memory requirements to something reasonable.
1069 * Should be *much* larger than the full set of thumbnails ever seen
1070 * on one page and/or that might be in our "popular" page set
1071 * so as to avoid thrashing fruitlessly.
1072 * <p>
1073 * We are prepared to give up all cache entries rather than cause an OOME.
1074 */
1075 private static final int TN_AVAIL_CACHE_SIZE_MIN_TARGET = 131 +
1076 5*WebConsts.SINGLE_PAGE_CONTACT_SHEET_TN_COUNT +
1077 SystemVariables.MAX_DIFF_EXHIBIT_NAME_VALUES;
1078
1079 /**Private key used by exhibitHasThumbnail(); never null.
1080 * Made AEP linked to more be able to accurately size it for the current AEP,
1081 * and ensure that it is periodically rebuilt/refreshed from scratch.
1082 */
1083 private static final DataSourceBean.AEPLinkedKey tnHTMLCacheKey = new DataSourceBean.AEPLinkedKey("tnHTMLCacheKey");
1084
1085 /**Test if the given exhibits have thumbnails (locally) available; never null.
1086 * This is suitable for a bulk check, eg before rendering a table,
1087 * and may be internally parallelised to overcome I/O latency, etc.
1088 *
1089 * @param exhibitNames non-null list of full exhibit names;
1090 * not altered by this routine and must not be changed by the caller
1091 * while this routine is running
1092 *
1093 * @return a bit in the result set for each thumbnail that definitely
1094 * exists in the requested size; an unset bit may represent "not known"
1095 */
1096 @SuppressWarnings("unchecked")
1097 public static BitSet exhibitsHaveThumbnail(final DataSourceBean dataSource,
1098 final List<Name.ExhibitFull> exhibitNames,
1099 final boolean standard,
1100 final boolean forceCreation)
1101 {
1102 final int n = exhibitNames.size();
1103
1104 // Only try the parallel fetch if:
1105 // * We have an extant cache of thumbnail state.
1106 // * There is more than one item to check.
1107 // * We're not starved of resources (eg power).
1108 final LRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte> tnExistenceMap;
1109 if(TN_AVAIL_CACHE && (n > 1) &&
1110 /* (!GenUtils.mustConservePower()) && */
1111 (null != (tnExistenceMap = (LRUMapAutoSizeForHitRate<Name.ExhibitFull, Byte>) dataSource.getAEPLinkedValue(tnHTMLCacheKey))))
1112 {
1113 // Find all the exhibits whose thumbnail state is not cached at all,
1114 // and try to start one or more background thread(s) to check their status.
1115 // We expect this activity to be largely I/O bound.
1116
1117 // Tasks to wait for the completion of...
1118 final List<Future<?>> tasks = new LinkedList<Future<?>>();
1119
1120 for(final Name.ExhibitFull en : exhibitNames)
1121 {
1122 // If we have any tn state already cached then skip this entry...
1123 if(null != tnExistenceMap.get(en)) { continue; }
1124
1125 // Try to fetch this missing entry into cache concurrently...
1126 tasks.add(ThreadUtils.nonCPUThreadPool.submit(new Runnable(){
1127 public final void run()
1128 { exhibitHasThumbnail(dataSource, en, standard, forceCreation); }
1129 }));
1130 }
1131
1132 // Wait for any tasks to complete.
1133 for(final Future<?> task : tasks)
1134 {
1135 try { task.get(); }
1136 catch(final Exception e) { e.printStackTrace(); /* Absorb but log any error. */ }
1137 }
1138 }
1139
1140 // Fetch all values through the cache using the caller's thread.
1141 final BitSet result = new BitSet(n);
1142 for(int i = n; --i >= 0; )
1143 {
1144 if(exhibitHasThumbnail(dataSource, exhibitNames.get(i), standard, forceCreation))
1145 { result.set(i); }
1146 }
1147
1148 return(result);
1149 }
1150
1151 /**Test if the given exhibit has a thumbnail (locally) available.
1152 * Does not try to force one to be made if one is not extant (or in cache)
1153 * unless the forceCreation argument is true.
1154 * <p>
1155 * Since this is likely to be testing what is in (local) cache,
1156 * its results can be considered at best a (good) hint.
1157 * <p>
1158 * We do not cache entirely negative answers
1159 * (ie that an exhibit has no thumbnails)
1160 * as this may change if we overcome (eg) a temporary resource issue.
1161 * But we assume the converse,
1162 * ie that once we have a thumbnail then it will always be available.
1163 * <p>
1164 * Always returns false for exhibit types that do not support thumbnails.
1165 *
1166 * @param dataSource the data source; never null
1167 * @param exhibitName the full exhibit name; must be valid (eg non-null)
1168 * @param standard if true, tests for the presence of a standard-size
1169 * @param forceCreation if true, we can try to force creation/fetch
1170 * of the underlying thumbnail if not already available locally
1171 *
1172 * @return true if exhibit definitely has/had one/both thumbnails,
1173 * false if unknown or thumbnails are not currently available
1174 */
1175 @SuppressWarnings("unchecked")
1176 public static boolean exhibitHasThumbnail(final DataSourceBean dataSource,
1177 final Name.ExhibitFull exhibitName,
1178 final boolean standard,
1179 final boolean forceCreation)
1180 {
1181 if((dataSource == null) || (exhibitName == null))
1182 { throw new IllegalArgumentException(); }
1183
1184 // If we are cacheing the availability of thumbnails
1185 // then ensure the cache map exists here.
1186 // This is a thread-safe size-limited LRU map.
1187 // Note that this is *not* linked to the AEP
1188 // so that we may retain a little stale information indefinitely,
1189 // especially though AEP changes when much has to be recomputed.
1190 MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte> tnExistenceMap;
1191 if(TN_AVAIL_CACHE)
1192 {
1193 while((tnExistenceMap = (MemoryTools.CacheMiniMap<Name.ExhibitFull, Byte>) dataSource.getAEPLinkedValue(tnHTMLCacheKey)) == null)
1194 {
1195 // Size the table to suit the exhibit set and allow growth of the exhibit set...
1196 final int nExhibits = dataSource.getAllExhibitProperties(-1).aeid.length;
1197 dataSource.putIfAbsentAEPLinkedValue(tnHTMLCacheKey, LRUMapAutoSizeForHitRate.<Name.ExhibitFull, Byte>create(0, Math.max(2*TN_AVAIL_CACHE_SIZE_MIN_TARGET, 1+nExhibits), "exhibitHasThumbnail"));
1198 }
1199 }
1200
1201 // We store availability as a bit-mask
1202 // * bit 0 is 1 iff the small thumbnail is available
1203 // * bit 1 is 1 iff the std thumbnail is available
1204 byte availability = 0; // Assume none available by default.
1205
1206 // Only use cached positive values.
1207 // A negative cached result has us check again...
1208 // We cache negative results mainly to maintain correct hit-rate stats.
1209 final Byte cachedAvailability;
1210 if(!TN_AVAIL_CACHE || (null == (cachedAvailability = tnExistenceMap.get(exhibitName))) || (cachedAvailability.byteValue() <= 0))
1211 {
1212 // Hauling the thumbnail pair into view can be very expensive,
1213 // eg may involve disc fetches or image decoding or worse,
1214 // and may simply displace other items better kept in cache/memory.
1215 final ExhibitThumbnails tns = dataSource.getThumbnails(exhibitName, forceCreation);
1216
1217 if(tns != null)
1218 {
1219 if(tns.getSmall() != null)
1220 { availability |= 1; }
1221 if(tns.getStandard() != null)
1222 { availability |= 2; }
1223 }
1224
1225 if(TN_AVAIL_CACHE)
1226 {
1227 // Byte.valueOf() avoids creating distinct instances.
1228 tnExistenceMap.put(exhibitName, Byte.valueOf(availability));
1229 if(IsDebug.isDebug && ((tnExistenceMap.size() & 0x3ff) == 0)) { dataSource.log(" tnExistenceMap: " + tnExistenceMap.toString()); }
1230 }
1231 }
1232 // Use the cached (positive) value...
1233 else
1234 { availability = cachedAvailability.byteValue(); }
1235
1236 final boolean tnAvailable = (0 != (availability & (standard ? 2 : 1)));
1237 return(tnAvailable);
1238 }
1239
1240 /**Get thumbnail image URL to embed in HTML page (usually JPEG/GIF/PNG); null if none available.
1241 * By preference uses purpose-built thumbnail, else tries to
1242 * use image itself if small enough.
1243 * <p>
1244 * Has to be passed a dataSource and a full exhibit name.
1245 * <p>
1246 * Returns null if no suitable thumbnail URL can be generated.
1247 * <p>
1248 * This may cache its results,
1249 * in particular assuming that once a particular thumbnail becomes available
1250 * that it does not go away again.
1251 *
1252 * @param tnDim (output argument) if not null and result is not null,
1253 * is filled in with the thumbnail dimensions
1254 * @param dontCreateTn if true, don't force creation of a thumbnail if
1255 * one is not already readily available
1256 * @param rrURLOnly if true then only generate a root-relative URL,
1257 * else an absolute URL at a CDN (or with sneaky concurrency) is allowed
1258 * and either form may be returned
1259 */
1260 public static String makeHTMLInlineImageThumbnailURL(final DataSourceBean dataSource,
1261 final Name.ExhibitFull exhibitName,
1262 final boolean std,
1263 final boolean rrURLOnly,
1264 final java.awt.Dimension tnDim,
1265 final boolean dontCreateTn)
1266 throws IOException
1267 {
1268 final AllExhibitProperties aep = dataSource.getAllExhibitProperties(-1);
1269
1270 // Get full exhibit details...
1271 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(exhibitName);
1272 // Stop if exhibit has gone away or is invalid.
1273 if(esa == null) { return(null); }
1274
1275 if(WebUtils.canInlineInHTMLPage((ExhibitMIME.getInputFileType(esa.getCharSequence()))))
1276 {
1277 // Get the exhibit immutable computable properties if possible.
1278 final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
1279 final java.awt.Dimension xyDim = (epc == null) ? null : epc.getXyDimensions();
1280 // Compute what thumbnail dimensions should be...
1281 final java.awt.Dimension thumbnailXyDim = (xyDim == null) ? null : ExhibitThumbnails.computeThumbnailDimensions(xyDim, std);
1282
1283 // Is this in fact a 2D image?
1284 if(thumbnailXyDim != null)
1285 {
1286 // Pass dimensions back to caller.
1287 if(tnDim != null)
1288 {
1289 tnDim.width = thumbnailXyDim.width;
1290 tnDim.height = thumbnailXyDim.height;
1291 }
1292
1293 // Is this a small enough (in bytes) exhibit to be its own thumbnail?
1294 final boolean smallExhibit = (esa.length <=
1295 (std ? ExhibitThumbnails.STD_ABS_MAX_BYTES : ExhibitThumbnails.SML_ABS_MAX_BYTES));
1296 // Can this be its own thumbnail?
1297 // It must be small enough in bytes,
1298 // and possibly small enough in XxY pixels too.
1299 final boolean canBeOwnThumbnail =
1300 (smallExhibit && (WebConsts.ALLOW_BROWSER_IMAGE_SCALE || thumbnailXyDim.equals(xyDim)));
1301
1302 // See if we have thumbnails available
1303 // (if eager, force one to be made,
1304 // else see if one already exists if we can't inline directly).
1305 // If we force them to be created
1306 // then we expect them to remain cached!
1307 // Shall we force creation?
1308 final boolean forceCreation = !dontCreateTn &&
1309 (WebConsts.EAGER_TN_USE || !canBeOwnThumbnail);
1310
1311 // Check for availability of real thumbnail.
1312 final boolean tnAvailable = exhibitHasThumbnail(dataSource,
1313 exhibitName,
1314 std,
1315 forceCreation);
1316
1317 final boolean canShowThumbnail = tnAvailable || canBeOwnThumbnail;
1318
1319 //System.err.println("[sE/cBOT/cST: "+smallExhibit+"/"+canBeOwnThumbnail+"/"+canShowThumbnail+".]");
1320
1321 if(canShowThumbnail)
1322 {
1323 // Chose the (root-relative, or CDN/optimised full) URL to use
1324 // (always serve from same host as this page).
1325 // Show true thumbnail by preference...
1326 if(rrURLOnly)
1327 { return(tnAvailable ? WebUtils.makeThumbnailRRURL(exhibitName, std) : WebUtils.makeExhibitRRURL(exhibitName)); }
1328 else
1329 { return(tnAvailable ? CDNUtils.makeThumbnailOptCDNURL(dataSource, exhibitName, std, null) : CDNUtils.makeExhibitOptCDNURL(dataSource, exhibitName, null).toString()); }
1330 }
1331 }
1332 }
1333
1334 return(null); // Can't make a thumbnail.
1335 }
1336
1337 /**Find the set of exhibits with the same subject as the indicated one.
1338 * Given a List of String exhibit names sorted by
1339 * ExhibitPropsGlobalImmutable.SMART_ORDER
1340 * (or possibly just by ExhibitPropsGlobalImmutable.SUBJECT_ORDER),
1341 * the SUBJECT_ORDER comparator,
1342 * and the index to a given item in that List,
1343 * finds all the items surrounding that have the same SUBJECT_ORDER,
1344 * ie that differ only in attribute words and are variants on the same
1345 * exhibit. (The result will always contain the input item.)
1346 * <p>
1347 * The List must not contain nulls, all entries must be valid exhibit
1348 * names, and the List must be sorted implicitly with the comparator
1349 * passed in. The input index must be within bounds.
1350 * <p>
1351 * This does not alter the List passed in. The return value is
1352 * an unmodifiable sublist of the input.
1353 * <p>
1354 * This returns the sublist of variants on the indicated exhibit;
1355 * this will degenerate to just containing the input exhibit name if there
1356 * are no other exhibit names for the same exhibit.
1357 * <p>
1358 * This assumes that the number of variants of any given exhibit is
1359 * small, and so a linear search is used.
1360 * <p>
1361 * If the List does not support efficient bidirectional movement and seeks
1362 * then this routine will be very inefficient.
1363 */
1364 public static List<Name.ExhibitFull> getExhibitVariantRange(final List<Name.ExhibitFull> allExhibitNames,
1365 final Comparator<Name.ExhibitFull> comparator,
1366 final int thisExhibitIndex)
1367 {
1368 final Name.ExhibitFull thisExhibit = allExhibitNames.get(thisExhibitIndex);
1369 final int listSize = allExhibitNames.size();
1370
1371 int first, last;
1372
1373 // Search backwards for first matching item.
1374 for(first = thisExhibitIndex; first > 0; --first)
1375 {
1376 if(comparator.compare(thisExhibit, allExhibitNames.get(first-1)) != 0)
1377 { break; } // Stop when we hit an item with a different subject.
1378 }
1379
1380 // Search forwards for last matching item.
1381 for(last = thisExhibitIndex; last < listSize-1; ++last)
1382 {
1383 if(comparator.compare(thisExhibit, allExhibitNames.get(last+1)) != 0)
1384 { break; } // Stop when we hit an item with a different subject.
1385 }
1386
1387 return(Collections.unmodifiableList(allExhibitNames.subList(first, last+1)));
1388 }
1389
1390 /**Computes a minimal human-readable immutable unique prefix of an exhibit short name to distinguish given exhibit from most others.
1391 * Uniqueness is not guaranteed, but this is meant to give a reasonably
1392 * short result that a human can read in the title of a page, for example.
1393 * <p>
1394 * If inputs are bad then this will try to fail gently with an empty-string
1395 * result rather than an exception to make it robust if called directly from
1396 * JSP code, for example.
1397 * <p>
1398 * This is passed a List of exhibits sorted in a "smart" order that
1399 * sorts mainly on the file component of the name,
1400 * probably in a case-insensitive way.
1401 * <p>
1402 * This routine finds a short word prefix that (case-insensitively)
1403 * is different from neighbouring exhibits
1404 * and is thus (depending on the overall sort order)
1405 * probably the shortest unique prefix amongst the entire collection.
1406 * <p>
1407 * (If no unique value is possible, the entire prefix is returned.)
1408 * <p>
1409 * If the List of size 0 we return the empty string and do not use
1410 * the index parameter at all. This simplifies use where the list
1411 * may transiently be empty during start-up.
1412 * <p>
1413 * This routine may run very slowly if the List argument does not support
1414 * efficient random seeking.
1415 * <p>
1416 * The result is designed to be used in headings and other display text
1417 * such as image alt/title attributes.
1418 * <p>
1419 * TODO: fix inefficient double-parsing of main words...
1420 *
1421 * @param exhibits sorted (smart-ish sorted) list of exhibit names
1422 * (String value); must not change while routine is running
1423 * @param ourIndex index (strictly positive) of the exhibit whose
1424 * abbreviated name we wish to produce; must be within the List
1425 * @return "" in case of invalid arguments,
1426 * else some whole-word prefix of the name
1427 */
1428 public static CharSequence minimalUniqueENTitlePrefix(final List<Name.ExhibitFull> exhibits,
1429 final int ourIndex)
1430 {
1431 // Treat bad arguments gently as the outputs are likely to
1432 // be displayed in HTML directly.
1433 if(exhibits.size() == 0) { return(""); }
1434 if((ourIndex < 0) || (ourIndex >= exhibits.size())) { return(""); }
1435
1436 // Set of attribute words we use for parsing names.
1437 // In this case, deliberately empty.
1438 final Set<String> noAttrWords = Collections.emptySet();
1439
1440 // Dummy "empty" enumeration we use for missing neighbours.
1441 final Enumeration<?> emptyEn = Collections.enumeration(Collections.emptyList());
1442
1443 // Get our name and previous/next names
1444 // as word enumerations (or empty enumerations if not present).
1445 // for this purpose we pretend that there are no attribute words...
1446 final ListIterator<Name.ExhibitFull> liFwd = exhibits.listIterator(ourIndex);
1447 final CharSequence ourNameFull = (liFwd.next());
1448 // FIXME: inefficient via full name and tokenizer...
1449 final CharSequence ourNameMainWords = ExhibitName.getMainWordsComponent(
1450 ourNameFull, noAttrWords).toString();
1451 final Enumeration<?> ourNameWords = ExhibitName.getMainWords(
1452 ourNameFull, noAttrWords);
1453
1454 // Default to empty enumerations.
1455 Enumeration<?> nextNameWords = emptyEn;
1456 Enumeration<?> prevNameWords = emptyEn;
1457
1458 // Now search forwards for different name to compare against...
1459 // Note that we implicitly start from just after ourName above.
1460 while(liFwd.hasNext())
1461 {
1462 final CharSequence n = (liFwd.next());
1463 // FIXME: inefficient via full name and tokenizer...
1464 final CharSequence nextNameMainWords =
1465 ExhibitName.getMainWordsComponent(n, noAttrWords);
1466 if(!TextUtils.contentEqualsIgnoreCase(nextNameMainWords, ourNameMainWords))
1467 {
1468 nextNameWords = ExhibitName.getMainWords(n, noAttrWords);
1469 break;
1470 }
1471 }
1472
1473 // Now search backwards for different name to compare against...
1474 final ListIterator<Name.ExhibitFull> liBack = exhibits.listIterator(ourIndex);
1475 while(liBack.hasPrevious())
1476 {
1477 final CharSequence n = (liBack.previous());
1478 // FIXME: inefficient via full name and tokenizer...
1479 final CharSequence prevNameMainWords =
1480 ExhibitName.getMainWordsComponent(n, noAttrWords);
1481 if(!TextUtils.contentEqualsIgnoreCase(prevNameMainWords, ourNameMainWords))
1482 {
1483 prevNameWords = ExhibitName.getMainWords(n, noAttrWords);
1484 break;
1485 }
1486 }
1487
1488 // Result word prefix (preserves case, ends with word separator).
1489 final StringBuilder result = new StringBuilder();
1490
1491 // Iterate until we run out of words or both neighbours have
1492 // (or have different words).
1493 while(ourNameWords.hasMoreElements())
1494 {
1495 final String ourNameWord = (String) ourNameWords.nextElement();
1496 result.append(ourNameWord).append(ExhibitName.WORD_SEP);
1497
1498 // Take next word, if extant, on both prev and next sides.
1499 final String nextNameWord = nextNameWords.hasMoreElements() ?
1500 (String) nextNameWords.nextElement() : "";
1501 final String prevNameWord = prevNameWords.hasMoreElements() ?
1502 (String) prevNameWords.nextElement() : "";
1503
1504 // If neighbour has another word but it is different to ours,
1505 // pretend neighbour was truncated at previous word.
1506 // We ignore case, since users probably will.
1507 if(nextNameWord.equalsIgnoreCase(ourNameWord))
1508 { continue; } // Looks the same so cannot stop yet.
1509 else
1510 { nextNameWords = emptyEn; } // No longer a contender.
1511
1512 if(prevNameWord.equalsIgnoreCase(ourNameWord))
1513 { continue; } // Looks the same so cannot stop yet.
1514 // else
1515 // { prevNameWords = emptyEn; } // No longer a contender.
1516
1517 break; // Done!
1518 }
1519
1520 // Attempt to return as space-saving Name, else a String, either being interned.
1521 return(Name.createOrStringFallback(result, null));
1522 }
1523
1524 /**Get BufferedImage containing expanded image loaded as static resource from WAR; never null.
1525 * Loaded on demand and cached statically,
1526 * via a SoftReference to allow the system to reclaim memory rather than OOM.
1527 * <p>
1528 * There may be an adverse effect on system behaviour if many large images
1529 * are cached in memory; this may be mitigated by storing them via
1530 * a SoftReference so that the memory can be recycled automatically.
1531 * <p>
1532 * This method is internally synchronized to allow only one image load/decode
1533 * to happen at once to conserve CPU and memory (and other) resources.
1534 * <p>
1535 * If the image is indexed and forceToARGBTrueColour is true then
1536 * we expand it to true-colour to make it possible to add markings.
1537 * <p>
1538 * This uses our built-in mediahandler classes to decode the image,
1539 * so the image type must be one that we have a decoder for.
1540 * <p>
1541 * This may not handle alpha correctly in all cases.
1542 * <p>
1543 * Under memory pressure this may release cached resources
1544 * whether hard or soft cached.
1545 * <p>
1546 * <strong>Beware:</strong> since a reference is returned rather than
1547 * a copy, be careful not to adjust the returned image unintentionally.
1548 * @param copyResult if true, we force the result to be a copy of
1549 * the cached value to avoid accidentally changing the cached copy;
1550 * this may force a change in colour model and/or discarding properties
1551 * @param resourceRRURL root-relative URL (eg "/my/image.gif") of
1552 * source image in WAR; must not be null and must be a type
1553 * (and with a file extension) that we have a MIME mediahandler for
1554 * @param forceToARGBTrueColour if true, and the source image is not
1555 * ARGB true-colour, then we force conversion to ARGB true-colour
1556 * before cacheing to make it easier to draw on the image
1557 * @param context gives context for WAR from which to load the raw
1558 * base clickable-map image
1559 *
1560 * @return image, possibly a copy, possibly with a converted colour model
1561 *
1562 * @throws java.lang.IllegalStateException if the image is not loadable
1563 */
1564 public static BufferedImage getAndCacheStaticImage(
1565 final boolean copyResult,
1566 final String resourceRRURL,
1567 final boolean forceToARGBTrueColour,
1568 final ServletContext context)
1569 throws IllegalStateException
1570 {
1571 if((context == null) ||
1572 (resourceRRURL == null))
1573 { throw new IllegalArgumentException(); }
1574
1575 synchronized(_gACSI_cache)
1576 {
1577 // An item in the cache is one of:
1578 // * null (ie completely absent)
1579 // * SoftReference (possibly cleared) to BufferedImage
1580 final Object rawItem = _gACSI_cache.get(resourceRRURL);
1581 BufferedImage result = (null == rawItem) ? null :
1582 (BufferedImage) (((SoftReference<?>) rawItem).get());
1583
1584 if(result == null) // Needs fetching.
1585 {
1586 final InputStream is =
1587 context.getResourceAsStream(resourceRRURL);
1588 if(is == null)
1589 { throw new IllegalStateException("cannot get InputSteam for image to cache from " + resourceRRURL); }
1590
1591 // Find correct handler given name of file.
1592 final ExhibitMIME.ExhibitTypeParameters etp =
1593 ExhibitMIME.getInputFileType(resourceRRURL);
1594 if(etp == null)
1595 { throw new IllegalStateException("cannot get MIME type for image to cache from " + resourceRRURL); }
1596 if(etp.handler == null)
1597 { throw new IllegalStateException("cannot get handler for image to cache from " + resourceRRURL); }
1598
1599 try {
1600 // Decode the image.
1601 result = etp.handler.decodeImage(is);
1602 if(result == null)
1603 { throw new IllegalStateException("cannot get decode image to cache from " + resourceRRURL); }
1604
1605 // Force to ARGB if required.
1606 if(forceToARGBTrueColour)
1607 { result = ImageUtils.convertToTrueColourARGB(result, false); }
1608
1609 // Cache the result.
1610 _gACSI_cache.put(resourceRRURL, new SoftReference<BufferedImage>(result));
1611 }
1612 catch(final IOException e)
1613 { throw new IllegalStateException("cannot decode image to cache from " + resourceRRURL + ": IOException: " + e.getMessage()); }
1614 }
1615
1616 // If forced to copy the result, do so.
1617 // TODO: Maybe try to preserve relevant image properties?
1618 if(copyResult)
1619 {
1620 // Coerce data into original colour model.
1621 // Discard any properties of the original.
1622 final ColorModel cm = ImageUtils.extractColorModelOrRGB(result);
1623 final int width = result.getWidth();
1624 final int height = result.getHeight();
1625 final WritableRaster raster = cm.createCompatibleWritableRaster(
1626 width, height);
1627 final BufferedImage copiedResult =
1628 new BufferedImage(cm, raster, false, null);
1629
1630 // Actually copy the pixels...
1631 copiedResult.setRGB(0, 0, width, height,
1632 result.getRGB(0, 0, width, height, null, 0, width),
1633 0, width);
1634
1635 return(copiedResult);
1636 }
1637
1638 return(result);
1639 }
1640 }
1641
1642 /**Private cache for getAndCacheStaticImage(); never null.
1643 * Is a map from root-relative URL to a BufferedImage
1644 * or SoftReference to a BufferedImage.
1645 * <p>
1646 * Thread-safe LRU-managed limited-size memory-sensitive map.
1647 * <p>
1648 * We are prepared to discard everything if very short of memory.
1649 * <p>
1650 * All getAndCacheStaticImage() activity is synchronized on this instance.
1651 */
1652 private static final LRUMapAutoSizeForHitRate<String,Object> _gACSI_cache =
1653 LRUMapAutoSizeForHitRate.<String,Object>create(0, 1024, "_gACSI_cache");
1654
1655
1656 /**Generate a unique key for the given HTTP request; returns null if not possible to generate.
1657 * For example, we use this to help limit the number of times that
1658 * a given user is asked to vote.
1659 * <p>
1660 * Generates a string starting with the given prefix and some
1661 * leading portion of the client's IP address...
1662 * <p>
1663 * It is not fatal if this conflates users,
1664 * but it is more useful that this never thinks one user is more than one
1665 * to avoid pestering them too much
1666 * (or letting a spider inject too much noise for example).
1667 *
1668 * @param prefix unique prefix to the generated key; non-empty, non-null
1669 * @param request the user's request
1670 */
1671 public static String generateUserDataPointID(final String prefix,
1672 final HttpServletRequest request)
1673 {
1674 if((prefix == null) || (prefix.length() == 0)) { return(null); }
1675 if(request == null) { return(null); }
1676
1677 final StringBuilder sb = new StringBuilder(prefix.length() + 21); // 21 for IPv4.
1678 sb.append(prefix);
1679
1680 try
1681 {
1682 // Get the IP address
1683 final InetAddress ia = InetAddress.getByName(request.getRemoteAddr());
1684
1685 // Use all but the final octet to construct the data-point ID for a non-IPv6 address (ie IPv4).
1686 // For IPv6 addresses use hex bytes of the network part for now.
1687 // Assume that this will distinguish most genuine users from one another
1688 // but also not completely identify an individual (for privacy/security reasons).
1689 final byte[] addr = ia.getAddress();
1690 if(ia instanceof Inet6Address) // IPv6
1691 {
1692 sb.append("6:");
1693 for(int i = 0; i < 8; ++i) // Network part only...
1694 { sb.append(Integer.toHexString(addr[i] & 0xff)); }
1695 }
1696 else // IPv4
1697 {
1698 for(int i = 0; i < addr.length-1; ++i)
1699 { sb.append(addr[i] & 0xff).append('.'); }
1700 }
1701
1702 return(sb.toString());
1703 }
1704 catch(final UnknownHostException e)
1705 {
1706 return(null); // Could not understand the client's address.
1707 }
1708 }
1709
1710 /**Name of additional parameter to record if this is a vote "for" ("pro").
1711 * Value of the parameter is to be "true" or "false".
1712 */
1713 public static final String VOTE_PRO_PARAM_NAME = "pro";
1714
1715 /**Name of additional parameter to record if this is a vote "against" ("con").
1716 * Value of the parameter is to be "true" or "false".
1717 */
1718 public static final String VOTE_CON_PARAM_NAME = "con";
1719
1720 /**Create and post the handler for a vote if possible and returns the unique listenerID.
1721 * This replaces any existing listener for this voter.
1722 * <p>
1723 * This returns null if we could not create a listener.
1724 * <p>
1725 * If the handler is invoked,
1726 * then this registers a dummy (inactive) handler
1727 * to delay the next request to the user to vote.
1728 *
1729 * @param exhibitFullName valid exhibit name; never null
1730 * @param request client's HTTP request; never null
1731 * @param vars where stats updates are posted; never null
1732 *
1733 * @return listenerID, or null if one could not be generated
1734 */
1735 public static String createAndFileVoteListener(final Name.ExhibitFull exhibitFullName,
1736 final HttpServletRequest request,
1737 final SimpleVariablePipelineIF vars)
1738 {
1739 if(exhibitFullName == null) { return(null); }
1740 if(vars == null) { return(null); }
1741
1742 final String dpID = generateUserDataPointID(WebConsts.VOTER_DATA_POINT_PREFIX, request);
1743 if(dpID == null) { return(null); }
1744
1745 // Compute expiry time (with a random element).
1746 final long expireBy = System.currentTimeMillis() + WebConsts.VOTE_MIN_REQUEST_GAP_MS +
1747 Rnd.fastRnd.nextInt(WebConsts.VOTE_MIN_REQUEST_GAP_MS);
1748
1749 // Capture the visitor's IP address for later (in case they vote).
1750 InetAddress va = null;
1751 try { va = InetAddress.getByName(request.getRemoteAddr()); }
1752 catch(final UnknownHostException e) { } // Ignore errors...
1753 final InetAddress voterIPAddr = va;
1754
1755 // Create new listener.
1756 // Pass in a weak reference to the vars pipeline to avoid blocking GC...
1757 final StatsSink.AbstractStatsListener newAsl =
1758 (new VoteHandler(dpID, expireBy, exhibitFullName, new WeakReference<SimpleVariablePipelineIF>(vars), voterIPAddr));
1759
1760 // Get the listener ID...
1761 final String listenerID = newAsl.uniqueListenerID;
1762
1763 // File the request!
1764 StatsSink.addListenerForDataPoint(newAsl);
1765
1766 return(listenerID);
1767 }
1768
1769
1770 /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
1771 * The list is sorted:
1772 * <ol>
1773 * <li>By dictionary-order i18n localised title.
1774 * </ol>
1775 * <p>
1776 * We may flag categories as good or bad if significantly so.
1777 *
1778 * @param asList if true, entries are preceded by <li>
1779 * else they are followed by <br />\r\n;
1780 */
1781 public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
1782 final HttpServletRequest request,
1783 final LocaleBeanBase localeBean,
1784 final boolean asList)
1785 throws IOException
1786 {
1787 return(getCategoryListSortedAsHTML(dsb, request, localeBean, asList ? null : "<br />\r\n"));
1788 }
1789
1790 /**Get sorted, hyperlinked HTML i18n text list of exhibit categories; never null.
1791 * The list is sorted:
1792 * <ol>
1793 * <li>By dictionary-order i18n localised title.
1794 * </ol>
1795 * <p>
1796 * We may flag categories as good or bad if significantly so.
1797 *
1798 * @param entrySeparator text (followed by CRLF) to terminate entries;
1799 * null if entries are to be wrapped with li tags
1800 */
1801 public static final String getCategoryListSortedAsHTML(final DataSourceBean dsb,
1802 final HttpServletRequest request,
1803 final LocaleBeanBase localeBean,
1804 final String entrySeparator)
1805 throws IOException
1806 {
1807 if((dsb == null) || (localeBean == null))
1808 { throw new IllegalArgumentException(); }
1809
1810 final boolean asList = (null == entrySeparator);
1811
1812 // Private data on each category, for sorting.
1813 final class PerCat implements Comparable<PerCat>
1814 {
1815 PerCat(final String cat,
1816 final String i18nTitle,
1817 final Boolean isGood,
1818 final int entriesInCat)
1819 {
1820 category = cat;
1821 title = i18nTitle;
1822 good = isGood;
1823 numEntries = entriesInCat;
1824 }
1825
1826 final String category;
1827 final String title;
1828 final Boolean good;
1829 final int numEntries;
1830
1831 /**Compares this object with the specified object for order.
1832 * Order is:
1833 * <ol>
1834 * <li>By dictionary-order i18n localised title.
1835 * </ol>
1836 */
1837 public int compareTo(final PerCat other)
1838 {
1839 // Sort by localised title, case insensitively.
1840 final int tComp = String.CASE_INSENSITIVE_ORDER.compare(title, other.title);
1841 if(tComp != 0) { return(tComp); }
1842
1843 // Officially break ties by underlying canonical category name.
1844 // This should never really be needed.
1845 return(category.compareTo(other.category));
1846 }
1847 }
1848
1849 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
1850
1851 // Get the list of categories.
1852 final Map<String,Integer> categories = aep.getCategoryExhibitCounts();
1853
1854 // Create sorted set of details.
1855 // We don't expend huge effort on getting the category good/bad status.
1856 final SortedSet<PerCat> cs = new TreeSet<PerCat>();
1857 for(final String cat : categories.keySet())
1858 {
1859 cs.add(new PerCat(
1860 cat,
1861 GenUtils.computeSectionTitle(aep, cat, localeBean),
1862 aep.isCategoryGood(cat, dsb, false),
1863 categories.get(cat)
1864 ));
1865 }
1866
1867 // Convert to nicely-formatted HTML.
1868 final StringBuilder sb = new StringBuilder(79 * categories.size());
1869 for(final PerCat pc : cs)
1870 {
1871 if(asList) { sb.append("<li>"); }
1872
1873 // If category significantly good/bad then mark it so.
1874 if(pc.good != null)
1875 {
1876 final String proIcon = WebConsts.PRO_ICON_15x15_NAME;
1877 final String conIcon = WebConsts.CON_ICON_15x15_NAME;
1878 sb.append("<img src=\"").
1879 append(WebUtils.getOptionalSneakyConcurrencyRRURLPrefix(request)).
1880 append("/_static/icon/").
1881 append(pc.good.booleanValue() ? proIcon : conIcon).
1882 append("\" width=15 height=15> ");
1883 }
1884
1885 // Open link to category RRURL.
1886 sb.append("<a href=\"").
1887 append(WebConsts.VIRTUAL_COLLECTIONS_BYCATEGORY_ROOT).
1888 append(pc.category).
1889 append("/\">");
1890 sb.append(pc.title);
1891 // Close link to category.
1892 sb.append("</a>");
1893
1894 // Insert exhibit count.
1895 sb.append(" <i>(").append(pc.numEntries).append(")</i>");
1896
1897 // Finish the point
1898 if(!asList) { sb.append(entrySeparator); }
1899 else { sb.append("</li>\r\n"); }
1900 }
1901
1902 return(sb.toString());
1903 }
1904
1905 /**Returns true if this seems to be a slave disconnected from the master.
1906 * This instance may, for example, not wish to collect votes from users
1907 * if the votes may get discarded without getting to the master.
1908 */
1909 public static final boolean isDisconnectedSlave(final DataSourceBean dsb)
1910 {
1911 if(dsb == null) { throw new IllegalArgumentException(); }
1912
1913 // If definitely a master then this is not 'disconnected' by definition.
1914 if(Boolean.FALSE.equals(dsb.isSlave()))
1915 { return(false); }
1916
1917 // This instance is treated as disconnected if it is a slave with no xfer key
1918 // since that implies that it won't be allowed to send updates (eg votes) home.
1919 if(!LocalProps.hasXferKey())
1920 { return(true); }
1921
1922 // This instance is to be treated as NOT disconnected
1923 // if it can see at least one other system via the system variables,
1924 // ie at least two systems in total.
1925 // This uses the availability of client-count as a measure of connectivity.
1926 final SimpleVarStats stats = VarTools.generateSimpleStats(dsb,
1927 SystemVariables.ThroughputMonitorFilter_CLIENT_COUNT,
1928 0); /* Minimum possible life. */
1929 return((stats == null) || (stats.getSystemCount() < 2));
1930 }
1931
1932 /**Returns true if this request is apparently a precacheing request, eg from a "Web accelerator".
1933 * This is true if a client (such as FireFox) is "reading ahead"
1934 * but it may be the case that no real human gets to see the content.
1935 * <p>
1936 * See https://developer.mozilla.org/en/Link_prefetching_FAQ
1937 */
1938 public static boolean isPrecacheRequest(final HttpServletRequest request)
1939 {
1940 // "X-Moz: prefetch" header covers FF1--3.5 and Google's Web Accelerator.
1941 return("prefetch".equalsIgnoreCase(request.getHeader("X-Moz")));
1942 }
1943
1944
1945 /**Private key used by getCatPageExhibitMetaDataHTML(); never null. */
1946 private static final AEPLinkedKey metadataCacheKey = new AEPLinkedKey("metadataCacheKey");
1947
1948 /**Static dictionary used by getCatPageExhibitMetaDataHTML() for compression of in-memory data; never null.
1949 * The static dictionary content should be reviewed after any major format changes,
1950 * though this is not a correctness issue, only a matter of compression efficiency.
1951 */
1952 public static final Compact7BitString.StaticDictionary sDictMD = new Compact7BitString.StaticDictionary("getCatPageExhibitMetaDataHTML",
1953 Arrays.asList(new String[]{
1954 "JPEG", /* MANUALLY ADDED: count=16817, saving=50451, meanFirstPos=126 */
1955 "javax_imageio_1", /* count=17761, saving=248654, meanFirstPos=37 */
1956 "ColorSpaceType", /* count=17752, saving=230776, meanFirstPos=67 */
1957 "Compression", /* count=17684, saving=176840, meanFirstPos=106 */
1958 "NumChannels", /* count=16942, saving=169420, meanFirstPos=86 */
1959 "metadata", /* count=17939, saving=125573, meanFirstPos=9 */
1960 "TypeName", /* count=16826, saving=117782, meanFirstPos=120 */
1961 "compact", /* count=18684, saving=112104, meanFirstPos=3 */
1962 "Chroma", /* count=17755, saving=88775, meanFirstPos=53 */
1963 "value", /* count=18684, saving=74736, meanFirstPos=87 */
1964 "image", /* count=16951, saving=67804, meanFirstPos=23 */
1965 "YCbCr", /* count=15240, saving=60960, meanFirstPos=73 */
1966 "name", /* count=17751, saving=53253, meanFirstPos=71 */
1967 "\"</", /* count=18684, saving=37368, meanFirstPos=72 */
1968 "></", /* count=17805, saving=35610, meanFirstPos=95 */
1969 "</", /* count=18684, saving=18684, meanFirstPos=10 */
1970 "=\"", /* count=18684, saving=18684, meanFirstPos=70 */
1971 "> ", /* count=18684, saving=18684, meanFirstPos=68 */
1972 "><", /* count=18684, saving=18684, meanFirstPos=4 */
1973 "dd", /* count=18684, saving=18684, meanFirstPos=13 */
1974 "dl", /* count=18684, saving=18684, meanFirstPos=1 */
1975 "dt", /* count=18684, saving=18684, meanFirstPos=5 */
1976 "BackgroundIndex", /* count=808, saving=11312, meanFirstPos=103 */
1977 "BlackIsZero", /* count=934, saving=9340, meanFirstPos=89 */
1978 "accessionData", /* count=745, saving=8940, meanFirstPos=9 */
1979 "stream", /* count=984, saving=4920, meanFirstPos=23 */
1980 "GRAY", /* count=1157, saving=3471, meanFirstPos=73 */
1981 "CRC32", /* count=745, saving=2980, meanFirstPos=61 */
1982 "RGB", /* count=1353, saving=2706, meanFirstPos=73 */
1983 "TRUE", /* count=837, saving=2511, meanFirstPos=91 */
1984 "date", /* count=745, saving=2235, meanFirstPos=23 */
1985 "hash", /* count=745, saving=2235, meanFirstPos=59 */
1986 "size", /* count=745, saving=2235, meanFirstPos=41 */
1987 "sampleRate", /* count=174, saving=1566, meanFirstPos=88 */
1988 "MD5", /* count=745, saving=1490, meanFirstPos=82 */
1989 "encoding", /* count=174, saving=1218, meanFirstPos=70 */
1990 "frames", /* count=162, saving=810, meanFirstPos=37 */
1991 "audio", /* count=178, saving=712, meanFirstPos=56 */
1992 "57024", /* count=173, saving=692, meanFirstPos=47 */
1993 "PCM_SIGNED", /* count=55, saving=495, meanFirstPos=78 */
1994 "ULAW", /* count=112, saving=336, meanFirstPos=75 */
1995 "PCM_UNSIGNED", /* count=7, saving=77, meanFirstPos=77 */
1996 "BI_RGB", /* count=6, saving=30, meanFirstPos=74 */
1997 "1136915", /* MANUALLY ADDED: count ~ 10 as prefix. */
1998 }));
1999
2000 /**Generates HTML form of exhibit metadata, "" if no such metadata for the specified exhibit; never null.
2001 * The result is keyed to both the DataSourceBean and the exhibitName.
2002 * <p>
2003 * Cached values are discarded when the AEP changes.
2004 * <p>
2005 * The computed HTML is locale-invariant, which is why cacheing is viable.
2006 * <p>
2007 * The toString() method should be called on the result to get the String HTML text,
2008 * which may be a String or some other internal representation.
2009 *
2010 * @param dsb valid non-null DataSourceBean
2011 * @param exhibitName valid non-null exhibit full name
2012 */
2013 @SuppressWarnings("unchecked")
2014 public static Object getCatPageExhibitMetaDataHTML(final DataSourceBean dsb,
2015 final Name.ExhibitFull exhibitName)
2016 {
2017 if(null == dsb) { throw new IllegalArgumentException(); }
2018 if(null == exhibitName) { throw new IllegalArgumentException(); }
2019
2020 // Get existing cache map, or create new one.
2021 // The cache is a size-limited, thread-safe Map
2022 // from full exhibit name to HTML formatted metadata ("" if none).
2023 // The items can be large and possibly moderately expensive to (re)compute
2024 // though all in-memory (no disc access for example) so a miss is not that bad.
2025 // Races here may result in some wasted work but no errors.
2026 CacheMiniMap<Name.ExhibitFull,Object> cachedMetaDataMap;
2027 while((cachedMetaDataMap = (CacheMiniMap<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey)) == null)
2028 {
2029 // Limit size to ~10 per 1MB of heap, with a minimum of a few tens to cover popular pages.
2030 final int maxCacheSize = Math.max(32, (int) Math.min(16384, Runtime.getRuntime().totalMemory() >> 17));
2031 dsb.putIfAbsentAEPLinkedValue(metadataCacheKey, SimpleLRUMap.<Name.ExhibitFull, Object>create(maxCacheSize, metadataCacheKey.comment));
2032 }
2033 final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2034 if(cachedMetaData != null) { return(cachedMetaData); }
2035
2036 // Need to (re)compute metadata for this exhibit
2037 // eg for the first time and/or after an AEP load/change.
2038 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2039
2040 final String result = getCatPageExhibitMetaDataHTMLRaw(exhibitName, aep);
2041 assert(result != null);
2042 //if(IsDebug.isDebug) { dsb.log("INFO: getCatPageExhibitMetaDataHTML() result size of "+result.length()+" chars"); }
2043
2044 // We do not intern() the result
2045 // since we expect each non-"" metadata value to be unique.
2046 // Providing that the system has plenty of free memory
2047 // then cache in an a compact form if possible else as a plain String.
2048 if(MemoryTools.lotsFree())
2049 {
2050 // Cache the text wrapped as an AutoExpirable with limited lifetime
2051 // to gradually reclaim unused entries automatically,
2052 // in part because these can be quite large.
2053 Object optionallyCompacted = result;
2054 try { optionallyCompacted = Compact7BitString.convertToCompact7BitString(result, sDictMD); }
2055 catch(final IllegalArgumentException e) { /* Not 7-bit text, so leave as full String. */ }
2056 final Object toCache = optionallyCompacted;
2057 // Put in cache, wrapped to expire automatically.
2058 cachedMetaDataMap.put(exhibitName, new MemoryTools.AutoExpirableFixedLifeBase(WebConsts.DEFAULT_PAGE_CACHE_MS)
2059 { @Override public String toString() { return(toCache.toString()); } });
2060 // try { cachedMetaDataMap.put(exhibitName, Compact7BitString.convertToCompact7BitString(result, sDictMD)); }
2061 // catch(final IllegalArgumentException e) { cachedMetaDataMap.put(exhibitName, result); }
2062 }
2063
2064 // Return the uncompressed result to save the caller a little time...
2065 return(result);
2066 }
2067
2068 /**Computes the raw data for getCatPageExhibitMetaDataHTML() without cacheing; never null.
2069 * @return "" if there is no metadata for this exhibit
2070 */
2071 public static String getCatPageExhibitMetaDataHTMLRaw(final Name.ExhibitFull exhibitName, final AllExhibitProperties aep)
2072 {
2073 if(null == exhibitName) { throw new IllegalArgumentException(); }
2074 final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(exhibitName);
2075 final ExhibitPropsComputable epc = aep.getExhibitPropsComputable(exhibitName);
2076
2077 final Node metadata = (epc == null) ? null : epc.getMetadata();
2078 final AccessionData accessionMetadata = epl.getAccessionMetadata();
2079 if((metadata != null) || (accessionMetadata != null))
2080 {
2081 final StringBuilder sb = new StringBuilder(2048);
2082 if(metadata != null)
2083 { sb.append(TextUtils.toXML(metadata, true, true)); }
2084 if(accessionMetadata != null)
2085 { sb.append(TextUtils.toXML(accessionMetadata.getAsDOM(), true, true)); }
2086 return(sb.toString());
2087 }
2088
2089 // No metadata at all.
2090 return("");
2091 }
2092
2093 /**As for getCatPageExhibitMetaDataHTML(), but will compute a missing value asynchronously; never null.
2094 * If the value is already computed then it is available immediately,
2095 * else this attempts to spin off task compute the value,
2096 * and get() will block until the value is ready/computed.
2097 * <p>
2098 * If the target thread pool is full the computation will be done synchronously,
2099 * ie in this thread blocking this call until complete.
2100 * <p>
2101 * The toString() method should be called on the result to get the String text.
2102 */
2103 @SuppressWarnings("unchecked")
2104 public static Future<?> getCatPageExhibitMetaDataHTMLFuture(
2105 final DataSourceBean dsb,
2106 final Name.ExhibitFull exhibitName)
2107 {
2108 // Try first to return any extant cached value without blocking.
2109 // If present, we need not start any thread at all.
2110 final CacheMiniMap<Name.ExhibitFull,Object> cachedMetaDataMap = (CacheMiniMap<Name.ExhibitFull,Object>) dsb.getAEPLinkedValue(metadataCacheKey);
2111 if(null != cachedMetaDataMap)
2112 {
2113 final Object cachedMetaData = cachedMetaDataMap.get(exhibitName);
2114 // Return already-finished Future with value, if any.
2115 if(null != cachedMetaData)
2116 { return(ThreadUtils.makeCompletedFuture(cachedMetaData)); }
2117 }
2118
2119 // If we did not find a cached value,
2120 // then set up the task to compute the value asynchronously.
2121 final Callable<?> callable = new Callable<Object>(){
2122 public final Object call() throws Exception
2123 { return(getCatPageExhibitMetaDataHTML(dsb, exhibitName)); }
2124 };
2125 // Start the (CPU-intensive) thread immediately.
2126 final Future<?> result = ThreadUtils.computeIntensiveThreadPool.submit(callable);
2127
2128 // Return the handle for the caller to retrieve the value...
2129 return(result);
2130 }
2131
2132 /**Hyphenate long HTML text (that contains zero or more `-' characters and little or no whitespace).
2133 * Replaces hyphens ('-') with spaces to allow a browser to wrap the text.
2134 * <p>
2135 * Usually used with exhibit names or fragments of such names.
2136 */
2137 public static final String hyphenateHTMLText(final String s)
2138 {
2139 return(s.replace('-', ' '));
2140 }
2141
2142 /**LRU thread-safe private cache mapping from exhibit type and Accept header to acceptability of that MIME type for inlining in XHTML mobile text.
2143 * We assume that the Accept headers will be more or less constant for a given device,
2144 * and probably constant between instances of the device,
2145 * so we have enough entries to cover the likely different <em>types</em> of device
2146 * using the Gallery at any one time.
2147 * <p>
2148 * We take care to avoid using huge Accept values in keys to avoid DoS-style issues.
2149 * <p>
2150 * We're prepared to discard this entirely when short of memory.
2151 * <p>
2152 * TODO: consider some form of incremental auto-expiry even when not full since keys can be relatively large (eg something like SimpleLRUMap + AutoExpirable)
2153 */
2154 private static final CacheMiniMap<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean> _inlineableInXHTML =
2155 SimpleProbabilisticCache.<Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>, Boolean>create(Math.max(32, (int)(Runtime.getRuntime().totalMemory() >> 20)), "_inlineableInXHTML");
2156
2157 /**Returns true if the given MIME-type can always be inlined in an XHTML (mobile) page.
2158 * If the type argument is null, this returns false.
2159 * <p>
2160 * This always allows JPEG and GIF,
2161 * but may also allow other (image) types listed in an incoming "Accept" header.
2162 */
2163 public static boolean canInlineInXHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType,
2164 final HttpServletRequest request)
2165 {
2166 if(exhibitType == null) { return(false); }
2167
2168 // Allow commonly-supported GIF (89a) and JPEG types always.
2169 switch(exhibitType.type)
2170 {
2171 case ExhibitMIME.ET_JPEG:
2172 case ExhibitMIME.ET_GIF: // Should really check GIF version.
2173 return(true);
2174 }
2175 // No header to analyse, so cannot allow more than the basic types.
2176 if(request == null) { return(false); }
2177
2178 // We are prepared to test for a limited selection of other inlineable types.
2179 switch(exhibitType.type)
2180 {
2181 case ExhibitMIME.ET_PNG:
2182 case ExhibitMIME.ET_SWF:
2183 case ExhibitMIME.ET_BMP:
2184 // TODO: WBMP, SVG?
2185 {
2186 // Look for any Accept headers.
2187 final Enumeration<?> headers = request.getHeaders("Accept");
2188 // No Accept headers to analyse, so cannot allow more than the basic types.
2189 if(headers == null) { return(false); }
2190
2191 // Allow for possibility of multiple Accept headers (probably rare though).
2192 while(headers.hasMoreElements())
2193 {
2194 final String h = (String) headers.nextElement();
2195 // We refuse to process gigantic header values at all
2196 // to preserve performance and to avoid DoS-style problems.
2197 if(h.length() >= 1024) { continue; }
2198
2199 // Make cache lookup key and attempt cache lookup.
2200 final Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String> key =
2201 new Tuple.Pair<ExhibitMIME.ExhibitTypeParameters,String>(exhibitType, h);
2202 final Boolean cachedResult = _inlineableInXHTML.get(key);
2203 if(Boolean.TRUE.equals(cachedResult)) { return(true); }
2204 if(Boolean.FALSE.equals(cachedResult)) { continue; /* Try other headers. */ }
2205 if(IsDebug.isDebug) { System.out.println("INFO: new XHTML Accept header '"+TextUtils.sanitiseForXML(h, 256, true)+"' from User-Agent: "+TextUtils.sanitiseForXML(request.getHeader("User-Agent"), 256, true)); }
2206
2207 // Do explicit search though header for specified MIME type.
2208 for(final String t : h.split(","))
2209 {
2210 // Allow for direct MIME-type match,
2211 // or with trailing q factor, eg "image/png;q=0.9",
2212 // or just a generic catch-all */*.
2213 final String trimmed = t.trim(); // Remove padding whitespace.
2214 if(trimmed.equals("*/*") ||
2215 trimmed.equals(exhibitType.mimeType) ||
2216 (trimmed.startsWith(exhibitType.mimeType) && trimmed.substring(exhibitType.mimeType.length()).startsWith(";")))
2217 {
2218 _inlineableInXHTML.put(key, Boolean.TRUE); // Cache success.
2219 return(true);
2220 }
2221 }
2222
2223 // Cache failure (against this header).
2224 _inlineableInXHTML.put(key, Boolean.FALSE);
2225 }
2226 }
2227 }
2228 return(false);
2229 }
2230
2231 /**Returns true if the given MIME-type can always be inlined in an HTML page.
2232 * If the argument is null, this returns false.
2233 */
2234 public static boolean canInlineInHTMLPage(final ExhibitMIME.ExhibitTypeParameters exhibitType)
2235 {
2236 if(exhibitType == null) { return(false); }
2237 switch(exhibitType.type)
2238 {
2239 case ExhibitMIME.ET_JPEG:
2240 case ExhibitMIME.ET_GIF:
2241 case ExhibitMIME.ET_PNG: // Most HTML browsers will accept PNG now.
2242 case ExhibitMIME.ET_SWF: // Most browsers have a Flash plug-in now.
2243 case ExhibitMIME.ET_BMP: // Embedded BMP should be widely supported.
2244 case ExhibitMIME.ET_HTMLFRAG:
2245 return(true);
2246 }
2247 return(false);
2248 }
2249
2250 /**Private key used by getCategoryTreeFilterBean(); never null. */
2251 private static final DataSourceBean.AEPLinkedKey _getCategoryTreeFilterBeanKey = new DataSourceBean.AEPLinkedKey("_getCategoryTreeFilterBeanKey");
2252
2253 /**Get selected by-category TreeFilterBean from entire exhibit set; never null.
2254 * Used for the "by category" exhibit tree view and elsewhere.
2255 * <p>
2256 * This data is cached linked to the DSB
2257 * (which in passing ensures that it can be dropped automatically under extreme memory stress).
2258 * <p>
2259 * The category name is primarily checked for syntactic validity,
2260 * not for actual presence in the AEP.
2261 */
2262 @SuppressWarnings("unchecked")
2263 public static TreeFilterBean getCategoryTreeFilterBean(final DataSourceBean dsb,
2264 final CharSequence category)
2265 {
2266 if((dsb == null) || !ExhibitName.validNameInitialComponentSyntax(category))
2267 { throw new IllegalArgumentException(); }
2268
2269 // Map from category name to TreeFilerBean.
2270 // Created as necessary on first use (after AEP change).
2271 ConcurrentMap<String,TreeFilterBean> trees;
2272 while(null == (trees = (ConcurrentMap<String,TreeFilterBean>) dsb.getAEPLinkedValue(_getCategoryTreeFilterBeanKey)))
2273 { dsb.putIfAbsentAEPLinkedValue(_getCategoryTreeFilterBeanKey, new ConcurrentHashMap<String,TreeFilterBean>()); }
2274
2275 // Atomically ensure that the right tree filter bean for category exists, else create it.
2276 TreeFilterBean tfb;
2277 final String categoryAsString = category.toString();
2278 while(null == (tfb = trees.get(categoryAsString)))
2279 {
2280 // Create the filter bean.
2281 tfb = new TreeFilterBean();
2282 // Give it a name for improved diagnostics.
2283 tfb.setName("category:"+categoryAsString);
2284 // Although this is assumed to be expensive to (re)compute,
2285 // allow it to be dumped unless there's loads of memory free right now
2286 // AND this is a multiprocessor (ie, assumed 'big') host.
2287 // In an unstressed system this may hang around indefinitely
2288 // providing quick access to the pages based on them.
2289 // Note that being DSB-linked allows this to be dumped under extreme memory stress anyway.
2290 tfb.setMemorySensitiveCache((ThreadUtils.AVAILABLE_PROCESSORS == 1) || !MemoryTools.lotsFree());
2291 // Set the filter for the right category.
2292 tfb.setExpr(new FilterExpr(null, new BuiltInFilters.filtByCategory(new String[]{categoryAsString})));
2293 // Store the new bean in the cache, iff no one else got there first.
2294 trees.putIfAbsent(MemoryTools.intern(categoryAsString), tfb);
2295 }
2296
2297 return(tfb);
2298 }
2299
2300 /**Flags for User-Agent pattern matching checking for mobile phones. */
2301 private static final int MOBILE_REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
2302
2303 // /**Mobile-browser detection regex 1 c/o detectmobilebrowser.com 2010/06/30. */
2304 // private static final Pattern MOBILE_REGEX_DMB_1_20100630 = Pattern.compile(".*(android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*", MOBILE_REGEX_FLAGS);
2305 //
2306 // /**Mobile-browser detection regex 2 c/o detectmobilebrowser.com 2010/06/30. */
2307 // private static final Pattern MOBILE_REGEX_DMB_2_20100630 = Pattern.compile("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", MOBILE_REGEX_FLAGS);
2308
2309 /**Mobile-browser detection regex 1 c/o detectmobilebrowser.com 2010/08/12. */
2310 private static final Pattern MOBILE_REGEX_DMB_1_20110812 = Pattern.compile(".*(android.+mobile|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*", MOBILE_REGEX_FLAGS);
2311
2312 /**Mobile-browser detection regex 2 c/o detectmobilebrowser.com 2010/08/12. */
2313 private static final Pattern MOBILE_REGEX_DMB_2_20110812 = Pattern.compile("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", MOBILE_REGEX_FLAGS);
2314
2315 /**Return true if client appears likely to be a mobile device (browser sniffing).
2316 * This attempts to detect a small-display, CPU- and bandwidth- constrained device,
2317 * that might benefit from being sent small XHTML pages rather than standard HTML.
2318 * <p>
2319 * This examines the User-Agent and is unlikely to be completely reliable.
2320 * <p>
2321 * Thanks for the regex to http://detectmobilebrowser.com/
2322 */
2323 public static boolean isBrowserOnMobileDevice(final HttpServletRequest request)
2324 {
2325 final String ua = request.getHeader("User-Agent");
2326 if(null == ua) { return(false); } // No UA so assume not a mobile device.
2327 return(MOBILE_REGEX_DMB_1_20110812.matcher(ua).matches() ||
2328 MOBILE_REGEX_DMB_2_20110812.matcher(ua.substring(0,4)).matches());
2329 }
2330
2331
2332 /**If true then allow some "sneaky" browser concurrency.
2333 * For browsers that do not always pipeline by default
2334 * but do allow fetches from different-named hosts in parallel
2335 * (eg the biggies such as IE6/IE7, FF1/1.5/2 as of 2006Q4),
2336 * then we can possibly help throughput by fetching some images (etc)
2337 * from a different name or the literal IP address for this same host.
2338 * (The literal IP address has the benefit of needing no DNS lookup.)
2339 * <p>
2340 * The trick is that where the user has arrived at the site
2341 * with a name other than the local mirror name (or literal IP),
2342 * and the URI that we were going to use was a rrURL (root-relative URL),
2343 * then we can prepend the local mirror name/IP to make a new absolute URL
2344 * that the browser may be prepared to fetch from concurrently.
2345 * <p>
2346 * This has to be done completely consistently for any given item,
2347 * eg a static "page-furniture" image, to avoid defeating cacheing.
2348 */
2349 public static final boolean ALLOW_SNEAKY_HTTP_CONCURRENCY = true;
2350
2351 /**Iff true, do sneaky concurrency with a literal IP address.
2352 * This is instead of using the local mirror name,
2353 * and avoids any extra DNS lookup by the client,
2354 * and can be used even when the client is visiting a mirror explicitly.
2355 */
2356 private static final boolean LITERAL_IP_SNEAKY_HTTP_CONCURRENCY = true;
2357
2358 /**Request attribute to cache getOptionalSneakyConcurrencyRRURLPrefix() response.
2359 * Useful if sneaky concurrency is attempted several times in one response.
2360 * If a non-null value is stored against the attribute, it's used.
2361 */
2362 private static final String SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE = "org.hd.pg2k.SNEAKY.CACHE";
2363
2364 /**Get optional prefix for rrURL for extra "sneaky" browser concurrency.
2365 * This can only apply if:
2366 * <ul>
2367 * <li>ALLOW_SNEAKY_HTTP_CONCURRENCY is true.
2368 * <li>This host has a mirror prefix,
2369 * ie so that there is an "alternate" name/IP for this host.
2370 * <li>The incoming request hostname is not this local mirror hostname.
2371 * <li>We do recognise the form (a vhost for) the URL in the request.
2372 * </ul>
2373 * else this routine always returns "".
2374 * <p>
2375 * This basically only works if the user is using a "generic" URL,
2376 * but that is more likely to be a machine far away (ie with large RTT),
2377 * so extra concurrency to try to overcome latency is especially helpful.
2378 * <p>
2379 * This slightly inflates the HTML that the client will see
2380 * but only if using a generic URL.
2381 * <p>
2382 * May inflate the number of concurrent connections back to this host,
2383 * but usually only by 1 or 2 at most.
2384 * <p>
2385 * Note that this scheme <em>does not</em> rely on any other host being up,
2386 * nor having exactly the same content as us.
2387 * <p>
2388 * This <em>is not</em> a technique for distributing load.
2389 *
2390 * @return "" or the http://mirror-... name (with no trailing slash) for this host
2391 */
2392 public static String getOptionalSneakyConcurrencyRRURLPrefix(final HttpServletRequest request)
2393 {
2394 if(!ALLOW_SNEAKY_HTTP_CONCURRENCY)
2395 { return(""); /* The default, ie no prefix. */}
2396
2397 final String cached = (String) request.getAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE);
2398 if(null != cached) { return(cached); }
2399
2400 final String mirrorTag = LocalProps.getMirrorTag();
2401 if(null == mirrorTag)
2402 {
2403 request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2404 return(""); /* No reliable client-visible alternate hostname for us. */
2405 }
2406
2407 // If we don't understand the hostname in the query then don't add the prefix.
2408 // (This also covers the case where the hostname is already an IP literal.)
2409 final String serverName = request.getServerName();
2410 // Our preferred form of the hostname is all-lower-case.
2411 final String serverNameLC = serverName.toLowerCase();
2412 final VirtualHosts.VirtualHost vHost =
2413 VirtualHosts.getVirtualHostDetails(serverNameLC, null);
2414 if(null == vHost)
2415 {
2416 request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2417 return(""); /* Don't recognise the name in this request or already IP literal. */
2418 }
2419
2420 final String localMirrorName = HostUtils.makeMirrorNameGeneric(mirrorTag);
2421
2422 final String sneakyName;
2423 if(LITERAL_IP_SNEAKY_HTTP_CONCURRENCY)
2424 {
2425 try {
2426 // TODO: allow IPv6 addresses too.
2427 // Attempt non-blocking fast lookup at the cost of losing sneaky concurrency for this page.
2428 final InetAddress[] v4Addresses = AddrTools.lookupARecords(localMirrorName, true);
2429 // Always use first IP address returned, assuming that all are public/routable...
2430 sneakyName = v4Addresses[0].getHostAddress();
2431 }
2432 catch(final Exception e)
2433 {
2434 request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2435 return(""); /* Cannot find an address for us. */
2436 }
2437 }
2438 else
2439 {
2440 if(localMirrorName.equals(request.getServerName()))
2441 {
2442 request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, "");
2443 return(""); /* Client is already using the mirror/alternate hostname for us. */
2444 }
2445 sneakyName = localMirrorName;
2446 }
2447
2448 // Return suitable prefix to make rrURL into absolute URL.
2449 final String sneakyPrefix = "http://" + sneakyName;
2450 request.setAttribute(SNEAKY_HTTP_CONCURRENCY_REQ_ATTR_CACHE, sneakyPrefix);
2451 return(sneakyPrefix);
2452 }
2453
2454 /**Prefix of all user testimonials in the global properties. */
2455 private static final String TEST_PNAME_PREFIX = "org.hd.org.pg2k.testimonial.";
2456
2457 /**Logically immutable cached lists of testimonal Strings by language (not locale); never null.
2458 * Private to getUserTestimonial().
2459 * <p>
2460 * No null keys, not null/empty values.
2461 * <p>
2462 * Small fixed size.
2463 * <p>
2464 * Thread-safe (and supporting reasonable concurrency if possible).
2465 */
2466 private static final Map<String, List<String>> _gUT_cache = new ConcurrentHashMap<String, List<String>>();
2467
2468 /**Get short user quote/testimonial at random from those available for the given locale; never null but may be "".
2469 * This finds a quote, if any, suitable for the current locale
2470 * (infact, currently just the language is matched),
2471 * picked randomly from those available, or "" if none is available.
2472 * <p>
2473 * The text is HTML/XML safe, and is pure 7-bit printable ASCII,
2474 * with any non-ASCII characters encoded as HTML/XML entities.
2475 *
2476 * @param l the required locale; never null
2477 */
2478 public static String getUserTestimonial(final LocaleBeanBase l)
2479 {
2480 if(l == null) { throw new IllegalArgumentException(); }
2481
2482 // If the cache is completely empty then try to populate it now.
2483 // Other calls while we are builing the cache are safe
2484 // but may see empty entries until we are done.
2485 if(_gUT_cache.isEmpty())
2486 {
2487 try
2488 {
2489 // We build a map of by-language lists,
2490 // which we then wrap up as immutable and post in the cache map (atomically).
2491 final Map<String, ArrayList<String>> m = new HashMap<String, ArrayList<String>>();
2492 final ResourceBundle gp = ResourceBundle.getBundle(CoreConsts.GLOBAL_PROPS_NAME);
2493 // Common prefix for all testimonial properties.
2494 final int prefixLength = TEST_PNAME_PREFIX.length();
2495 // Regex pattern to match the tail of each testimonial property.
2496 final Pattern p = Pattern.compile("^[a-z][a-z][.].*");
2497 // Search the global properties...
2498 for(final String key : gp.keySet())
2499 {
2500 // Skip entries not of interest to us.
2501 if(!key.startsWith(TEST_PNAME_PREFIX)) { continue; }
2502 // We expect to find 2 lower-case letters and then another dot
2503 // and then a unique number/ID (which we don't use).
2504 final String tail = key.substring(prefixLength);
2505 if(!p.matcher(tail).matches())
2506 {
2507 System.err.println("WARNING: bad testimonial property name prefix "+key+" in "+CoreConsts.GLOBAL_PROPS_NAME+" properties");
2508 continue;
2509 }
2510 final String lang = tail.substring(0, 2); // Extract the language code...
2511 // Add this to the end of the appropriate List, or create one if need be.
2512 ArrayList<String> list = m.get(lang);
2513 if(list == null)
2514 {
2515 list = new ArrayList<String>();
2516 m.put(lang, list);
2517 }
2518 list.add(gp.getString(key));
2519 }
2520 // Copy immutable versions of the by-language lists into the cache.
2521 for(final String lang : m.keySet())
2522 {
2523 final ArrayList<String> arrayList = m.get(lang);
2524 arrayList.trimToSize(); // Save space since this won't ever be expanded...
2525 _gUT_cache.put(lang, Collections.unmodifiableList(arrayList)); // Atomically update cache.
2526 }
2527 }
2528 catch(final Throwable t)
2529 {
2530 System.err.println("ERROR: problem retrieving testimonial texts.");
2531 t.printStackTrace();
2532 return(""); // Nothing will be ready yet...
2533 }
2534 }
2535
2536 // Retrieve all quotes for the specified language.
2537 final List<String> quotes = _gUT_cache.get(l.getLocale().getLanguage());
2538 if(quotes != null)
2539 {
2540 assert(!quotes.isEmpty()); // Should be no empty lists.
2541 final int qSize = quotes.size();
2542 if(qSize == 1) { return(quotes.get(0)); }
2543 return(quotes.get(Rnd.fastRnd.nextInt(qSize)));
2544 }
2545
2546 return(""); // Nothing available.
2547 }
2548
2549
2550 /**Private key used by getTrialData(); never null. */
2551 private static final AEPLinkedKey trailDataCacheKey = new AEPLinkedKey("trailDataCacheKey");
2552
2553 /**Get the TrailData for a given trail exhibit (by full name); null if none.
2554 * This caches the result in the DSB, linked to the AEP,
2555 * so never retaining data for expired trails.
2556 * <p>
2557 * (This may negatively cache failure to load TrailData (for a while) for efficiency.)
2558 * <p>
2559 * Safe to apply to an arbitrary/unvetted exhibit name, even an invalid/null value.
2560 */
2561 @SuppressWarnings("unchecked")
2562 public static TrailData getTrailData(final DataSourceBean dsb,
2563 final Name.ExhibitFull trailExhibitFullName)
2564 {
2565 if((dsb == null) || (trailExhibitFullName == null)) { return(null); }
2566
2567 // Get cache (or atomically create on first access, eg after an AEP change).
2568 ConcurrentMap<Name.ExhibitFull,TrailData> cache;
2569 while(null == (cache = (ConcurrentMap<Name.ExhibitFull,TrailData>) dsb.getAEPLinkedValue(trailDataCacheKey)))
2570 { dsb.putIfAbsentAEPLinkedValue(trailDataCacheKey, new ConcurrentHashMap<Name.ExhibitFull,TrailData>()); }
2571
2572 // Return value immediately if already cached...
2573 final TrailData cachedValue = cache.get(trailExhibitFullName);
2574 if(cachedValue != null) { return(cachedValue); }
2575
2576 // // If the exhibit name is invalid (eg from bogus Web/unsafe input)
2577 // // then don't both even trying to compute a value to save time and risk.
2578 // if(!ExhibitName.validNameFinalComponentSyntax(trailExhibitName)) { return(null); }
2579
2580 try
2581 {
2582 // Compute and cache value if possible.
2583 final TrailData value = TrailData.readTrailFromExhibit(dsb,
2584 trailExhibitFullName, dsb.getLogger());
2585 if(value != null) { cache.putIfAbsent(trailExhibitFullName, value); }
2586 return(value);
2587 }
2588 catch(final Exception e)
2589 {
2590 // Report but absorb any error to avoid it propagating upwards...
2591 e.printStackTrace();
2592 return(null);
2593 }
2594 }
2595
2596
2597 /**Private key used by approxWordCount(); never null. */
2598 private static final AEPLinkedKey _awc_CacheKey = new AEPLinkedKey("_awc_CacheKey");
2599
2600 /**Compute (crude) estimate of words in catalogue page for given exhibit; non-negative.
2601 * This is designed to be reasonably fast, though not necessarily amazingly accurate,
2602 * and is intended to help decide how many ad blocks a page may reasonably support.
2603 * <p>
2604 * This may cache its results against the AEP instance.
2605 * <p>
2606 * This counts 'non-furniture' words, ie those originating from the data itself,
2607 * including the exhibit name, exhibit description, tree AKA/description, etc,
2608 * with different constituents possibly weighted differently.
2609 * <p>
2610 * For simplicity, this does its computations based on the default site language,
2611 * even if there may be significant variation in apparent word count
2612 * for other localisations.
2613 *
2614 * @return zero in case of difficulty (eg exhibit does not exist),
2615 * else approximate (positive) word count
2616 */
2617 public static final int approxWordCount(final DataSourceBean dsb,
2618 final Name.ExhibitFull fullExhibitName)
2619 {
2620 if((dsb == null) || (fullExhibitName == null))
2621 { throw new IllegalArgumentException(); }
2622
2623 // Get existing (thread-safe) cache, or atomically create it if necessary.
2624 // The cache is a size-limited thread-safe Map with reasonably-fast get()
2625 // from full exhibit name to (positive) word count.
2626 // Races here may result in some wasted work but no errors.
2627 // Cap relative to heap size: about 8k (2^13) entries for 1GB (2^30) heap.
2628 MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer> cachedCounts;
2629 while((cachedCounts = GenUtils.<MemoryTools.CacheMiniMap<Name.ExhibitFull,Integer>>cast(dsb.getAEPLinkedValue(_awc_CacheKey))) == null)
2630 { dsb.putIfAbsentAEPLinkedValue(_awc_CacheKey, SimpleProbabilisticCache.<Name.ExhibitFull,Integer>create(Math.max(128, (int) (Runtime.getRuntime().totalMemory() >>> 17)), _awc_CacheKey.comment)); }
2631 // Return the cached word count, if present.
2632 final Integer cachedCount = cachedCounts.get(fullExhibitName);
2633 if(cachedCount != null) { return(cachedCount.intValue()); }
2634
2635 // Get a default-locale LocaleBean
2636 // to extract the descriptive/AKA text with.
2637 final LocaleBeanBase lb = new LocaleBean();
2638
2639 // Count words in any extant per-exhibit description text.
2640 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
2641
2642 // Return (uncached) zero for non-extant exhibit.
2643 if(null == aep.aeid.getStaticAttr(fullExhibitName))
2644 { return(0); }
2645
2646 // Running word count for this exhibit.
2647 int wordCount = 0;
2648
2649 // Count words in its name main component, including modifiers.
2650 wordCount += ExhibitName.getMainWordsCount(fullExhibitName, Collections.<String>emptySet());
2651 assert(wordCount > 0);
2652
2653 final ExhibitPropsLoadable epl = aep
2654 .getExhibitPropsLoadable(fullExhibitName);
2655 final String description = epl.getDescription();
2656 // Use the slow-but-sensible (eg for line-end handling) tokeniser.
2657 if(description != null)
2658 { wordCount += TextUtils.quickWordCount(description); }
2659
2660 // Get AKA/treedesc text with as little markup as we can easily manage.
2661 final String akaText = GenUtils.getLocalisedTreeDesc(aep,
2662 fullExhibitName, lb, true, true, false, false).toString();
2663 // Only count real spaces as word boundaries
2664 // and halve the raw word count to allow for markup, repetition, etc.
2665 // FIXME: use find() or somesuch to avoid redundant construction of substrings.
2666 if(!akaText.isEmpty())
2667 { wordCount += (TextUtils.quickWordCount(akaText) >>> 1); }
2668
2669 // Add (at low weighting) word count of section text.
2670 final CharSequence sectionDesc = GenUtils.getLocalisedSectionDesc(aep,
2671 ExhibitName.getCategoryComponent(fullExhibitName),
2672 lb);
2673 // Only count real spaces as word boundaries
2674 // and quarter the raw word count to allow for markup and lack of uniqueness
2675 // ie the fact that descriptive text is shared between related exhibits.
2676 // FIXME: use find() or somesuch to avoid redundant construction of substrings.
2677 if(sectionDesc != null)
2678 { wordCount += (TextUtils.quickWordCount(sectionDesc) >>> 2); }
2679
2680 // Cache the (complete) approx word count for next time...
2681 cachedCounts.put(fullExhibitName, Integer.valueOf(wordCount));
2682
2683 if(IsDebug.isDebug) { System.out.println("[Approx word count "+wordCount+" on cat page for "+fullExhibitName+".]"); }
2684 return(wordCount);
2685 }
2686
2687 /**Events to be examined by isPopularCatalogueEntry(); never null.
2688 * These must all have VLONG data stored.
2689 */
2690 private static final List<SimpleVariableDefinition> _iPCE_vars = Arrays.asList(new SimpleVariableDefinition[] {
2691 SystemVariables.ACCESSPATTERN_CAT_PAGE_VIEW,
2692 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
2693 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
2694 });
2695
2696 /**Returns true iff the named exhibit and/or catalogue page is popular (well visited/downloaded).
2697 * Uses the history to decide if a catalogue page and its exhibit
2698 * are frequently visited/downloaded
2699 * (wrt other catalogue pages globally and locally).
2700 *
2701 * @param vars source of event history; never null
2702 * @param exhibitFullName full exhibit name; never null
2703 * @return true if popular, false otherwise
2704 */
2705 public static boolean isPopularCatalogueEntry(final SimpleVariablePipelineIF vars,
2706 final CharSequence exhibitFullName)
2707 {
2708 if((vars == null) || (exhibitFullName == null)) { throw new IllegalArgumentException(); }
2709
2710 final String shortName = ExhibitName.getFileComponent(exhibitFullName).toString();
2711 // TODO: allow for shortened unique-key form in future...
2712
2713 for(final SimpleVariableDefinition var : _iPCE_vars)
2714 {
2715 // Look for activity yesterday and today.
2716 final EventVariableValue eventsToday = vars.getEventValue(var, EventPeriod.VLONG, true);
2717 if(eventsToday.getRank(shortName) < (eventsToday.getTotalDistinctValues()/2))
2718 { return(true); }
2719 final EventVariableValue eventsYesterday = vars.getEventValue(var, EventPeriod.VLONG, false);
2720 if(eventsYesterday.getRank(shortName) < (eventsYesterday.getTotalDistinctValues()/3))
2721 { return(true); }
2722
2723 // Now we look into the full collected history for this value.
2724 final EventVariableValue[] all = vars.getEventValues(var, EventPeriod.VLONG, 0, null);
2725 // If no "all" history at all or not enough to be significant
2726 // then the item may just not be especially popular...
2727 if(all.length < 1) { continue; }
2728 final EventVariableValue allEVV = all[0];
2729 if(allEVV == null) { continue; }
2730 if(allEVV.getRank(shortName) < (allEVV.getTotalDistinctValues()/4))
2731 { return(true); }
2732 }
2733
2734 return(false); // Not popular apparently.
2735 }
2736
2737
2738 /**Private key used by findLatestCodeBundle(); never null. */
2739 private static final AEPLinkedKey findLatestCodeBundleKey = new AEPLinkedKey("findLatestCodeBundleKey");
2740
2741 /**Name of the section/dir in which code/doc bundles are filed. */
2742 private static final String CODE_SECTION_DIR = "code";
2743
2744 /**Returns full exhibit name for latest version of a code bundle, or null if none.
2745 * This locates the latest (with a major-minor-micro versioning) bundle
2746 * in the 'code' section, for the given prefix, or null of none.
2747 * <p>
2748 * The author and extension are ignored for selection purposes.
2749 * <p>
2750 * For example, for the prefix/argument 'javadoc',
2751 * if the code section includes the files
2752 * 'javadoc-1-2-3-DHD.zip' and javadoc-1-10-1-ANON.zip'
2753 * this will return 'code/javadoc-1-10-1-ANON.zip'.
2754 * <p>
2755 * To be found a bundled archive name must be exactly of the form:<br />
2756 * <code>prefix-major-minor-micro-AUTH.XTN</code><br />
2757 * where the major, minor and micro components are (small, non-negative) integers.
2758 * There must be no attributes present.
2759 * <p>
2760 * (Note that a '-' is appended to the supplied prefix.)
2761 * <p>
2762 * This may cache the results against the AEP instance,
2763 * since the lookup may happen may times
2764 * and we may have to search through a fair amount of data for each lookup.
2765 * Note: this does not cache negative results
2766 * in part to bound the amount of space that can be consumed.
2767 *
2768 * @param dsb current data source; never null
2769 * @param prefix legitimate short-name as bundle name; never null nor empty
2770 */
2771 @SuppressWarnings("unchecked")
2772 public static Name.ExhibitFull findLatestCodeBundle(final DataSourceBean dsb, final String prefix)
2773 {
2774 if(dsb == null) { throw new IllegalArgumentException(); }
2775 if((prefix == null) || (prefix.length() < 1)) { throw new IllegalArgumentException(); }
2776
2777 final ServletContext context = dsb.getServletContext();
2778 if(context == null) { throw new IllegalStateException(); }
2779
2780 // Look up the prefix in our (thread-safe) cache from prefix to full exhibit name.
2781 // This is AEP-linked, so is automatically discarded when a new AEP appears.
2782 ConcurrentMap<String,Name.ExhibitFull> cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
2783 Name.ExhibitFull result = null;
2784 if(cache != null)
2785 {
2786 result = cache.get(prefix);
2787 if(result != null) { return(result); /* Return cached result! */ }
2788 }
2789
2790 // Do the lookup.
2791 // First quickly filter for only plausible candidates
2792 // with the correct prefix and in the correct section.
2793 final String fullNamePrefix = CODE_SECTION_DIR + '/';
2794 final String shortNamePrefix = prefix + "-";
2795 final AllExhibitProperties aep= dsb.getAllExhibitProperties(-1);
2796 final Name.ExhibitFull candidates[] = aep.select(new AEPFilter() {
2797 /* (non-Javadoc)
2798 * @see org.hd.d.pg2k.svrCore.AllExhibitProperties.AEPFilter#accept(org.hd.d.pg2k.svrCore.AllExhibitProperties, java.lang.String)
2799 */
2800 //@Override
2801 public boolean accept(final AllExhibitProperties aep, final Name.ExhibitFull fullExhibitName)
2802 {
2803 // Quickly filter for the correct section.
2804 // if(!TextUtils.contentEquals(ExhibitName.getCategoryComponent(fullExhibitName), CODE_SECTION_DIR)) { return(false); }
2805 if(!TextUtils.startsWith(fullExhibitName, fullNamePrefix)) { return(false); }
2806 final Name.ExhibitShort shortName = fullExhibitName.getShortName();
2807 // Quickly filter for the correct bundle name.
2808 if(!TextUtils.startsWith(shortName, shortNamePrefix)) { return(false); }
2809 // OK, we can check more thoroughly later..
2810 return(true);
2811 }
2812 }, null, 0);
2813 if(candidates.length == 0)
2814 {
2815 context.log("WARNING: no candidate bundles with prefix "+prefix);
2816 return(null); /* No candidates... */
2817 }
2818
2819 // Now find the highest-versioned syntactically-valid candidate, if any...
2820 int bestMajor = -1;
2821 int bestMinor = -1;
2822 int bestMicro = -1;
2823 for(final Name.ExhibitFull c : candidates)
2824 {
2825 final Name.ExhibitShort shortName = c.getShortName();
2826 assert(TextUtils.startsWith(shortName, shortNamePrefix)) : "should have the correct bundle name";
2827 // Rip off prefix and tokenise remainder (and ignore trailing AUTH.XTN part as last token).
2828 final int snpl = shortNamePrefix.length();
2829 final String[] tokens = shortName.subSequence(snpl, shortName.length()).toString().split(ExhibitName.WORD_SEPS);
2830 if(tokens.length != 4)
2831 {
2832 context.log("WARNING: badly-named (wrong token count "+tokens.length+") candidate bundle for prefix "+prefix+": "+c);
2833 continue; /* Invalid format. */
2834 }
2835 try
2836 {
2837 final int maj = Integer.parseInt(tokens[0], 10);
2838 assert(maj >= 0);
2839 final int min = Integer.parseInt(tokens[1], 10);
2840 assert(min >= 0);
2841 final int mic = Integer.parseInt(tokens[2], 10);
2842 assert(mic >= 0);
2843 if(maj < bestMajor) { continue; /* Too old. */ }
2844 if(maj > bestMajor)
2845 { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
2846 if(min < bestMinor) { continue; /* Too old. */ }
2847 if(min > bestMinor)
2848 { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
2849 if(mic < bestMicro) { continue; /* Too old. */ }
2850 if(mic > bestMicro)
2851 { result = c; bestMajor = maj; bestMinor = min; bestMicro = mic; continue; /* Best so far! */ }
2852 }
2853 catch(final NumberFormatException e)
2854 {
2855 context.log("WARNING: badly-named (NumberFormatException + "+e.getMessage()+") candidate bundle for prefix "+prefix+": "+c);
2856 continue; /* Invalid format. */
2857 }
2858 }
2859
2860 // If the result is positive then cache it,
2861 // creating a new (thread-safe) cache if necessary.
2862 // Negative results are not cached.
2863 if(result != null)
2864 {
2865 while(cache == null)
2866 {
2867 dsb.putIfAbsentAEPLinkedValue(findLatestCodeBundleKey, new ConcurrentHashMap<String, Name.ExhibitFull>());
2868 // Retrieve whatever the current cache now is (there may have been a race).
2869 cache = (ConcurrentMap<String,Name.ExhibitFull>) (dsb.getAEPLinkedValue(findLatestCodeBundleKey));
2870 }
2871 // Update the cached value if not already done...
2872 cache.putIfAbsent(prefix, result);
2873
2874 if(IsDebug.isDebug) { context.log("INFO: found bundle for prefix "+prefix+" as "+result); }
2875 }
2876
2877 else { context.log("WARNING: found no bundle for prefix "+prefix); }
2878
2879 // Return the result!
2880 return(result);
2881 }
2882
2883 /**Returns true (and sets SC_NOT_MODIFIED status) iff the caller should avoid sending a GET response body.
2884 * Intended to be be called by a servlet handling a GET/HEAD operation
2885 * before most headers are set or any response body is sent/commited.
2886 *
2887 * @param lastModified last time this entity changed, or -1 if not known / not applicable
2888 * @param request never null (unless lastModified == -1)
2889 * @param response never null (unless lastModified == -1)
2890 * @return true if SC_NOT_MODIFIED has been set and servlet should return immediately
2891 * without sending a body,
2892 * false if no status set and body may still have to be sent.
2893 */
2894 public static boolean abortIfNotModifiedSince(final long lastModified,
2895 final HttpServletRequest request,
2896 final HttpServletResponse response)
2897 {
2898 if(lastModified != -1)
2899 {
2900 if(null == request) { throw new IllegalArgumentException(); }
2901 // Handle any If-Modified-Since if not already done by the container.
2902 final long ifModifiedSince = request.getDateHeader("If-Modified-Since");
2903 if((ifModifiedSince > -1) && (lastModified/1000 <= ifModifiedSince/1000))
2904 {
2905 if(null == response) { throw new IllegalArgumentException(); }
2906 // Don't send body because content has not been modified
2907 // (since client last requested it).
2908 response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
2909 return(true);
2910 }
2911 }
2912
2913 // Content (may have) changed; do send response body.
2914 return(false);
2915 }
2916
2917 /**Returns true (and sets SC_NOT_MODIFIED status) iff the caller should avoid sending a GET response body.
2918 * Intended to be be called by a servlet handling a GET/HEAD operation
2919 * before most headers are set or any response body is sent/commited.
2920 *
2921 * @param eTag valid single ETag token, strong or weak, for the page; null if not known / not applicable
2922 * @param lastModified last time this entity changed; -1 if not known / not applicable
2923 * @param request never null (unless eTag == null and lastModified == -1)
2924 * @param response never null (unless eTag == null and lastModified == -1)
2925 * @return true if SC_NOT_MODIFIED has been set and servlet should return immediately
2926 * without sending a body,
2927 * false if no status set and body may still have to be sent.
2928 */
2929 public static boolean abortIfETagMatchOrNotModifiedSince(final String eTag,
2930 final long lastModified,
2931 final HttpServletRequest request,
2932 final HttpServletResponse response)
2933 {
2934 if(null != eTag)
2935 {
2936 if(!eTag.endsWith("\"")) { throw new IllegalArgumentException(); }
2937 if(null == request) { throw new IllegalArgumentException(); }
2938 if(null == response) { throw new IllegalArgumentException(); }
2939 final String inm = request.getHeader("If-None-Match");
2940 if(null != inm)
2941 {
2942 if("*".equals(inm))
2943 {
2944 // Since this entity exists (else this call wouldn't be being made),
2945 // abort the body with 'NOT MODIFIED'.
2946 response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
2947 return(true);
2948 }
2949
2950 // Parse the tokens from the header value
2951 // and check for any matches, weak or strong.
2952 final BasicHeader bh = new BasicHeader("If-None-Match", inm);
2953 for(final HeaderElement he : bh.getElements())
2954 {
2955 final String token = he.toString();
2956 if(eTag.equals(token))
2957 {
2958 // Since one of the tokens matches our ETag
2959 // abort the body with 'NOT MODIFIED'.
2960 response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
2961 return(true);
2962 }
2963 }
2964
2965 // If INM header is present then must not fall back to LM/IMS.
2966 // Content (may have) changed; do send response body.
2967 return(false);
2968 }
2969 }
2970
2971 // Fall back to use of If-Modified-Since in absence of ETag/If-None-Match.
2972 return(abortIfNotModifiedSince(lastModified, request, response));
2973 }
2974
2975 /**Compute a suitable cache expiry time for a usually slowly-changing object (ms); non-negative.
2976 * Treats the item as if almost static in terms of rate of change
2977 * (but constrains the result to be no longer than the minimum for static objects).
2978 * <p>
2979 * Makes the cache time usually a significant multiple of
2980 * the interval between rechecks of exhibit immutable data
2981 * as this is expected to change relatively slowly.
2982 * <p>
2983 * Extend to a reasonable fraction of the underlying item's time since last change
2984 * capped to the maximum allowed for static content,
2985 * essentially replicating a common heuristic from browsers.
2986 * <p>
2987 * Increase it if the system is conserving/busy so as to reduce future server load.
2988 */
2989 public static long computeCacheMaxAgeMSFromTimestamp(
2990 final long timestamp,
2991 final ServletContext ctxt, final org.hd.d.pg2k.svrCore.props.GenProps gp)
2992 {
2993 final boolean conserve = GenUtils.mustConservePower() || WebUtils.isOverloaded(ctxt);
2994 final long basicCacheLifetime = Math.max((System.currentTimeMillis() - timestamp) >> 2,
2995 (Math.max(CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S * 1000,
2996 gp.getWEBSVR_MIN_EX_IMATTR_RECHECK_MS()) << (conserve ? 5 : 3)));
2997 // Constrain to be never more than the minimum lifetime of static/furniture items.
2998 return(Math.min(WebConsts.MIN_STATIC_WEBITEMS_CACHE_MS, basicCacheLifetime));
2999 }
3000
3001 /**Get "newsflash" HTML for the main site front page, or "" if none; never null.
3002 * This is retrieved from the GenProps.
3003 */
3004 public static String getNewsflashHTML(final GenProps gp)
3005 {
3006 if(gp == null) { return(""); /* Be kind to the caller. */ }
3007 final String result = gp.getGen().get(GenPropsGenNames.GEN_NEWSFLASH_HTML);
3008 if(result == null) { return(""); }
3009 return(result);
3010 }
3011
3012
3013
3014 /**Target traffic cycle time for recentTrafficLowForTypicalCycle() in milliseconds; strictly positive.
3015 * This is typically a week or multiple thereof to allow for the usual major cycle in traffic flows,
3016 * and to detect weekends and holiday dips for example.
3017 */
3018 private static final int WEB_TRAFFIC_CYCLE_MS = 7 * 24 * 3600 * 1000; // 1W
3019
3020 /**Sample interval used by WEB_TRAFFIC_CYCLE_MS; not null.
3021 * A smaller interval gives a finer-grained response at the cost of more work.
3022 * Typically MEDIUM or LONG would be used to have a response time around the hour mark.
3023 */
3024 private static final EventPeriod WEB_TRAFFIC_SAMPLE_PERIOD = EventPeriod.MEDIUM;
3025
3026 /**Minimum number of samples to trust when deciding traffic levels; strictly positive.
3027 * A value between about 4 and whatever a whole day is (inclusive) probably makes sense.
3028 */
3029 private static final int WEB_TRAFFIC_MIN_SAMPLES = 4; // Math.max(4, (24*3600*1000)/WEB_TRAFFIC_SAMPLE_PERIOD.getIntervalMs());
3030
3031 /**Integer number of LONG sample periods used to cover WEB_TRAFFIC_CYCLE_MS; strictly positive. */
3032 private static final int WEB_TRAFFIC_SAMPLE_PERIODS = Math.min(
3033 Math.max(WEB_TRAFFIC_CYCLE_MS / WEB_TRAFFIC_SAMPLE_PERIOD.getIntervalMs(), WEB_TRAFFIC_MIN_SAMPLES),
3034 SystemVariables.EVENT_SAMPLES_RETAINED);
3035
3036 /**If true then ignore any leading run of (oldest) zero traffic counts to give initial faster response for new server. */
3037 private static final boolean WEB_TRAFFIC_TRIM_LEADING_ZEROS = true;
3038
3039 /**Non-AEP-linked cache key for recentTrafficLowForTypicalCycle() result; non-null.
3040 * Stored value against key is (immutable) paid of interval number and boolean 'traffic low' flag.
3041 * This usually is usually from WEB_TRAFFIC_SAMPLE_PERIOD key,
3042 * but a more cautious shorter-term key with disjoint values can be used instead
3043 * to avoid repeated recomputation in the face of possible-transient problems such as with start-up and I/O.
3044 */
3045 private static final DataSourceBean.UnlinkedKey rTLFLC_resultKey = new DataSourceBean.UnlinkedKey("rTLFLC_resultKey");
3046
3047 /**Returns true if the last full period or so had low traffic compared to the last larger or so.
3048 * Generally true if traffic by some metric, local or global, is in the bottom quartile,
3049 * typically for a recent hour or thereabouts (to allow a nimble response) compared to a weekly cycle.
3050 * Can be used to stabilise ad revenue for example by switching in extra ads
3051 * when traffic is low, for example at weekends.
3052 * <p>
3053 * The calculation result is cached (against the dsb) to reduce CPU effort.
3054 *
3055 * @param dsb data source bean from which to retrieve stats; never null
3056 * @return false unless recent traffic was clearly below normal over a typical cycle
3057 */
3058 public static boolean recentTrafficLowForLatestCycle(final DataSourceBean dsb)
3059 {
3060 if(null == dsb) { throw new IllegalArgumentException(); }
3061
3062 // DEBUG MODE ONLY: additional ability to wiggle status on or off from LocalProps.
3063 if(IsDebug.isDebug)
3064 {
3065 final String f = LocalProps.getGen().get(GenPropsGenNames.GPGEN_AD_FORCE_LOWTRAFFIC_MODE);
3066 if(f != null) { return(Boolean.parseBoolean(f)); }
3067 }
3068
3069 // Compute current period for which result is good (and usually cached).
3070 // Data will be sampled to up to and including the previous period.
3071 final long now = System.currentTimeMillis();
3072 final long currentInterval = WEB_TRAFFIC_SAMPLE_PERIOD.getIntervalNumber(now);
3073
3074 // Return cached value if present and valid for the current period.
3075 final Tuple.Pair<Long, Boolean> cached = (Tuple.Pair<Long, Boolean>)dsb.getUnlinkedValue(rTLFLC_resultKey);
3076 if((null != cached) && (currentInterval == cached.first.longValue()))
3077 { return(cached.second); }
3078
3079 // Don't spend time (re)computing this if short of energy; indicate 'don't know' immediately.
3080 if(GenUtils.mustConservePowerExtreme()) { return(false); }
3081
3082 // Try cache with short-term key (lasting a minute or so), disjoint with the main key.
3083 // We only use this for presumed-transient results only,
3084 // and if no value or an old one is present only
3085 // so as to avoid overwriting a concurrently-computed value.
3086 final long shortTermKey = now >> 17; // ~2 minute lifetime.
3087 if((null != cached) && (shortTermKey == cached.first.longValue()))
3088 { return(cached.second); }
3089 // Create result for cacheing short-term transient/holding result if necessary.
3090 final Tuple.Pair<Long, Boolean> shortTermFalse = new Tuple.Pair<Long, Boolean>(shortTermKey, false);
3091
3092 // Need to (re)compute value.
3093 // Try to minimise time from here to cacheing a result
3094 // to minimise probablity of races where two or more threads redundantly compute the result concurrently.
3095 try
3096 {
3097 if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): computing for periods: " + WEB_TRAFFIC_SAMPLE_PERIODS); }
3098
3099 // The metric used is the number of local catalogue page hits.
3100 // This local stat may work best for tuning ad behaviour to local needs and traffic
3101 // and may be more robust than any global estimate in the face of comms problems, etc.
3102 // Will generally require a full cycle to pass for any new mirror/instance to participate.
3103 final SimpleVariableDefinition stat = SystemVariables.GENSTATS_STRING_LOCAL_EVENT;
3104 final BitSet whichValues = new BitSet(WEB_TRAFFIC_SAMPLE_PERIODS);
3105 whichValues.set(0, WEB_TRAFFIC_SAMPLE_PERIODS);
3106 final EventVariableValue[] evvs = dsb.getEventValues(stat,
3107 WEB_TRAFFIC_SAMPLE_PERIOD,
3108 currentInterval-1,
3109 whichValues);
3110 assert(null != evvs);
3111 // Get values for slot just before the current one.
3112 final EventVariableValue evvPrev;
3113 if((0 == evvs.length) || (null == (evvPrev = evvs[0])))
3114 {
3115 if(null != cached) { dsb.replaceUnlinkedValue(rTLFLC_resultKey, cached, shortTermFalse); } else { dsb.putIfAbsentUnlinkedValue(rTLFLC_resultKey, shortTermFalse); }
3116 dsb.log("recentTrafficLowForLatestCycle(): no evv data for previous slot, cannot compute now, will retry");
3117 return(false); // May be transient problem so result cached only briefly.
3118 }
3119 final int totalPrevCount = evvPrev.getCount(ThroughputMonitorFilterPG2K.THRFNAME_HIT_CAT_PAGE);
3120 if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): prev interval count: " + totalPrevCount); }
3121 // Collect and filter, and sort, all interval counts.
3122 final ArrayList<Integer> counts = new ArrayList<Integer>(evvs.length);
3123 // Examine evvs, oldest first.
3124 for(int i = evvs.length; --i >= 0; )
3125 {
3126 final EventVariableValue evv = evvs[i];
3127 if(null == evv) { continue; } // Exclude outages.
3128 final int count = evv.getCount(ThroughputMonitorFilterPG2K.THRFNAME_HIT_CAT_PAGE);
3129 if(WEB_TRAFFIC_TRIM_LEADING_ZEROS && (0 == count) && counts.isEmpty()) { continue; }
3130 counts.add(count);
3131 }
3132 final int intervalCount = counts.size();
3133 if(intervalCount < WEB_TRAFFIC_MIN_SAMPLES)
3134 {
3135 if(null != cached) { dsb.replaceUnlinkedValue(rTLFLC_resultKey, cached, shortTermFalse); } else { dsb.putIfAbsentUnlinkedValue(rTLFLC_resultKey, shortTermFalse); }
3136 if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): too few samples to be reliable now, will retry; got " + intervalCount + "/" + WEB_TRAFFIC_MIN_SAMPLES + " " + counts); }
3137 return(false); // Too few samples to be reliable; may be transient problem so result cached only briefly.
3138 }
3139 if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): counts per slot, oldest first (omitting null entries): " + counts); }
3140 Collections.sort(counts);
3141 final int index = Collections.binarySearch(counts, totalPrevCount);
3142 assert(index >= 0); // We know the totalPrevCount must be present in the array.
3143
3144 // DEFAULT: result is true if cat page views are in the bottom quartile...
3145 int threshold = (intervalCount >> 2);
3146
3147 // If a specific (valid) percentage has been supplied then use it.
3148 // But if GenProps still 'empty' then don't cache result for long in hope real GP value along soon.
3149 final GenProps gp = dsb.getGenProps(-1);
3150 final boolean gpNotLoadedYet = (0 == gp.timestamp);
3151 final String tpcS = gp.getGen().get(GenPropsGenNames.GPGEN_LOW_TRAFFIC_THRESHOLD_PERCENT);
3152 if(null != tpcS)
3153 {
3154 try
3155 {
3156 final int percent = Integer.parseInt(tpcS, 10);
3157 if((percent >= 0) && (percent <= 100))
3158 {
3159 if(IsDebug.isDebug) { dsb.log("recentTrafficLowForLatestCycle(): threshold percentage from GP: " + percent); }
3160 // Measure the index against the specified percentage threshold.
3161 threshold = (intervalCount * percent) / 100;
3162 }
3163 }
3164 catch(final Exception e)
3165 {
3166 // Absorb/log error and continue.
3167 dsb.log("Could not parse GP gen value "+GenPropsGenNames.GPGEN_LOW_TRAFFIC_THRESHOLD_PERCENT, e);
3168 }
3169 }
3170
3171 // Compute if traffic is below threshold...
3172 final boolean result = (index < threshold);
3173
3174 // Cache the result... (Only briefly if GenProps value not loaded yet.)
3175 dsb.putUnlinkedValue(rTLFLC_resultKey, new Tuple.Pair<Long, Boolean>(gpNotLoadedYet ? shortTermKey : currentInterval, result));
3176 /*if(IsDebug.isDebug)*/ { dsb.log("recentTrafficLowForLatestCycle(): (temp="+gpNotLoadedYet+" cached, "+(System.currentTimeMillis()-now)+"ms) result="+result+", prev interval count="+totalPrevCount+", index=" + index + "/"+(intervalCount-1)+", threshold="+threshold+", from " + counts); }
3177 return(result); // Cache result!
3178 }
3179 catch(final Exception e)
3180 {
3181 if(null != cached) { dsb.replaceUnlinkedValue(rTLFLC_resultKey, cached, shortTermFalse); } else { dsb.putIfAbsentUnlinkedValue(rTLFLC_resultKey, shortTermFalse); }
3182 dsb.log("recentTrafficLowForLatestCycle(): unexpected error trying to compute if previous period was low-traffic", e);
3183 return(false); // May be transient result cached only briefly.
3184 }
3185 }
3186 }