001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore.props;
030
031 import java.io.IOException;
032 import java.io.InvalidObjectException;
033 import java.io.ObjectInputStream;
034 import java.io.ObjectInputValidation;
035 import java.io.Serializable;
036 import java.net.InetAddress;
037 import java.net.MalformedURLException;
038 import java.net.URI;
039 import java.net.URL;
040 import java.util.ArrayList;
041 import java.util.Arrays;
042 import java.util.Calendar;
043 import java.util.Collections;
044 import java.util.Date;
045 import java.util.GregorianCalendar;
046 import java.util.HashMap;
047 import java.util.HashSet;
048 import java.util.Iterator;
049 import java.util.List;
050 import java.util.Locale;
051 import java.util.Map;
052 import java.util.Properties;
053 import java.util.Set;
054 import java.util.SortedMap;
055 import java.util.SortedSet;
056 import java.util.StringTokenizer;
057 import java.util.TimeZone;
058 import java.util.TreeMap;
059 import java.util.TreeSet;
060 import java.util.regex.Pattern;
061 import java.util.regex.PatternSyntaxException;
062
063 import org.hd.d.pg2k.svrCore.CoreConsts;
064 import org.hd.d.pg2k.svrCore.ExhibitName;
065 import org.hd.d.pg2k.svrCore.GenUtils;
066 import org.hd.d.pg2k.svrCore.HostUtils;
067 import org.hd.d.pg2k.svrCore.MemoryTools;
068 import org.hd.d.pg2k.svrCore.Name;
069 import org.hd.d.pg2k.svrCore.Rnd;
070 import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
071 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
072 import org.hd.d.pg2k.svrCore.location.GeoUtils;
073 import org.hd.d.pg2k.svrCore.location.GeoUtils.CCTLD;
074
075 import ORG.hd.d.IsDebug;
076
077 /**This holds run-time settable properties for the master and mirrors.
078 * It is immutable, and can be persisted or sent over RMI-IIOP.
079 * <p>
080 * All values are accessed through getXXX() methods
081 * (this object can be used as a bean)
082 * to allow us to always constrain values to sensible limits
083 * (and, for example, to patch up up any values that have `gone funny'
084 * in serialization because (say) because the class version changed
085 * and fields were added)
086 * and to allow for local overrides from properties.
087 * This means that some of the getXXXX() methods have to be synchronized.
088 * <p>
089 * We also extensively check object state at construction and
090 * deserialisation; and we try to impose explicit or implicit limits
091 * on the amount of space that an instance of this object can consume,
092 * especially because at a transition between an old version and a new one
093 * there may be multiple instances floating about in memory.
094 * <p>
095 * We don't retain the original raw properties internally, but parse them at
096 * construction time, to do most of the expensive work once if possible,
097 * and to keep the serialised form of the object small.
098 * Defaults are set at parse/constuction time,
099 * whereas limits are imposed at getXXX() time which makes for
100 * a lot of robustness if the object is damaged in transit or if
101 * the class details change slightly.
102 * <p>
103 * The version manufactured with a default contructor has a zero timestamp.
104 */
105 public final class GenProps implements Serializable, ObjectInputValidation
106 {
107 /**Our serialisation version. */
108 private static final long serialVersionUID = 8715644348905699663L;
109
110 /**Deserialise. */
111 private void readObject(final ObjectInputStream in)
112 throws IOException, ClassNotFoundException
113 {
114 in.defaultReadObject();
115
116 // Take defensive immutable copy of gen, if present, else make it empty.
117 if(gen == null)
118 { gen = Collections.emptyMap(); }
119 else
120 { gen = Collections.unmodifiableMap(new HashMap<String,String>(gen)); }
121
122 // Take defensive copy of simpleAds[] if present, and re-sort.
123 if(simpleAds != null)
124 {
125 simpleAds = simpleAds.clone();
126 Arrays.sort(simpleAds);
127 }
128 // Take defensive copy of classifiedAds[] if present.
129 if(classifiedAds != null)
130 { classifiedAds = classifiedAds.clone(); }
131
132 // Take defensive copy of authDB if present.
133 if(authDB != null)
134 { authDB = Collections.unmodifiableSortedMap(new TreeMap<String, AuthData>(authDB)); }
135
136 // Take defensive copy of popWeights if present.
137 if(popWeights != null)
138 { popWeights = Collections.unmodifiableSortedMap(new TreeMap<String, Byte>(popWeights)); }
139
140 // Take defensive copies of hotlink hot/cold sets and DNSBLs,
141 // but renormalise hostnames and truncate to size if need be.
142 if(hotLinkAllowHosts != null)
143 { hotLinkAllowHosts = _normaliseHostList(hotLinkAllowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
144 if(hotLinkDisallowHosts != null)
145 { hotLinkDisallowHosts = _normaliseHostList(hotLinkDisallowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
146 if(DNSBLs != null)
147 { DNSBLs = _normaliseHostList(DNSBLs, MAX_DNSBLS); }
148
149
150 validateObject(); // Validate state immediately.
151 }
152
153 /**Validate fields/state.
154 * Called in the constructor and possibly after de-serialising.
155 * <p>
156 * Barf if something bad is found.
157 * (Maybe allow some extra info in debug version.)
158 */
159 public void validateObject()
160 throws InvalidObjectException
161 {
162 // Check that all components are sane and safe.
163 if(timestamp < 0)
164 { throw new InvalidObjectException("bad object: timestamp < 0"); }
165
166 // Verify generic properties.
167 if(gen == null)
168 { throw new InvalidObjectException("bad object: gen == null"); }
169 if(gen.size() > MAX_GEN_PROPS)
170 { throw new InvalidObjectException("bad object: gen too large"); }
171 for(final Object kO : gen.keySet())
172 {
173 if(!(kO instanceof String))
174 { throw new InvalidObjectException("bad object: gen key not a String"); }
175 final String k = (String) kO;
176 if(!isSafeGenPropValue(k))
177 { throw new InvalidObjectException("bad object: gen key not valid"); }
178
179 final Object vO = gen.get(k);
180 if(!(vO instanceof String))
181 { throw new InvalidObjectException("bad object: gen value not a String"); }
182 final String v = (String) vO;
183 if(!isSafeGenPropValue(v))
184 { throw new InvalidObjectException("bad object: gen value not valid"); }
185 }
186
187 // Check that HTML meta-header text is still printable ASCII excluding ".
188 if(!isSafeHTMLMetaHeaderString(WEBSVR_META_KEYWORDS))
189 { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_KEYWORDS); }
190 if(!isSafeHTMLMetaHeaderString(WEBSVR_META_DESCRIPTION))
191 { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_DESCRIPTION); }
192 // Check that background image name, if set, is syntactically valid.
193 // Check that it is a CharSequence or String.
194 // We also check that it is a JPEG image.
195 if(WEBSVR_BG_IMAGE != null)
196 {
197 final boolean isString = (WEBSVR_BG_IMAGE instanceof String);
198 final boolean isExhibitFull = (!isString) && (WEBSVR_BG_IMAGE instanceof Name.ExhibitFull);
199 if((!isString) && (!isExhibitFull))
200 { throw new InvalidObjectException("bad object: unsafe type " + WEBSVR_BG_IMAGE.getClass().getName()); }
201 if((!isExhibitFull) && (!ExhibitName.validNameSyntax(WEBSVR_BG_IMAGE)))
202 { throw new InvalidObjectException("bad object: unsafe name " + PNAME_WEBSVR_BG_IMAGE); }
203 final ExhibitMIME.ExhibitTypeParameters et =
204 ExhibitMIME.getInputFileType(WEBSVR_BG_IMAGE);
205 if((et == null) || (et.type != ExhibitMIME.ET_JPEG))
206 { throw new InvalidObjectException("bad object: unsafe exhibit type " + PNAME_WEBSVR_BG_IMAGE); }
207 }
208
209 // Check authDB for consistency.
210 if(authDB != null)
211 {
212 if((authDB.size() == 0) || (authDB.size() > MAX_AUTH_ENTRIES))
213 { throw new InvalidObjectException("bad object: bad-length authDB"); }
214
215 // Check all entries are of the correct type (AuthData).
216 for(final Iterator it = authDB.keySet().iterator(); it.hasNext(); )
217 {
218 final Object key = it.next();
219 if(!(key instanceof String))
220 { throw new InvalidObjectException("bad object: bad key in authDB"); }
221 if(!(authDB.get(key) instanceof AuthData))
222 { throw new InvalidObjectException("bad object: bad value in authDB"); }
223 }
224 }
225
226 // Check "goodness"/popularity weights for consistency.
227 if(popWeights != null)
228 {
229 if((popWeights.size() == 0) || (popWeights.size() > MAX_POPWT_ENTRIES))
230 { throw new InvalidObjectException("bad object: bad-length popWeights"); }
231
232 // Check all entries are of the correct type (AuthData).
233 for(final Iterator<String> it = popWeights.keySet().iterator(); it.hasNext(); )
234 {
235 final String key = it.next();
236 if(!ExhibitName.validAuthorSyntax(key) &&
237 !ExhibitName.validAttributeWord(key) &&
238 (ExhibitMIME.isValidInputExhibitNameExtension(key) == null))
239 { throw new InvalidObjectException("bad object: bad key in popWeights"); }
240 final Byte val = popWeights.get(key);
241 if((val == null) ||
242 (val.byteValue() < MIN_POPWT_VAL) ||
243 (val.byteValue() > MAX_POPWT_VAL))
244 { throw new InvalidObjectException("bad object: bad value in popWeights"); }
245 }
246 }
247
248 // Check ads for consistency.
249 // Don't waste space with zero-length simpleAds.
250 if((simpleAds != null) &&
251 ((simpleAds.length == 0) || (simpleAds.length > MAX_SIMPLE_ADS)))
252 { throw new InvalidObjectException("bad object: bad-length simpleAds[]"); }
253 // Ensure no nulls in simpleAds,
254 // and that ads are sorted.
255 if(simpleAds != null)
256 {
257 for(int i = simpleAds.length; --i >= 0; )
258 {
259 if(simpleAds[i] == null)
260 { throw new InvalidObjectException("bad object: simpleAds[] contains null"); }
261 // Check pair-wise sort order...
262 // The ordering should be total,
263 // but we'll live with monotonic.
264 if(i > 0)
265 {
266 if(simpleAds[i-1].compareTo(simpleAds[i]) > 0)
267 { throw new InvalidObjectException("bad object: simpleAds[] not ordered"); }
268 }
269 }
270 }
271 // Check correct total for simpleAds.
272 if(totalSimpleAdWeight != _compute_totalSimpleAdWeight(simpleAds))
273 { throw new InvalidObjectException("bad object: totalSimpleAdWeight inconsistent"); }
274 if(totalSimpleAdWeight < 0)
275 { throw new InvalidObjectException("bad object: totalSimpleAdWeight < 0"); }
276 // Don't waste space with zero-length classifiedAds.
277 if((classifiedAds != null) &&
278 ((classifiedAds.length == 0) || (classifiedAds.length > MAX_CLASSIFIED_ADS)))
279 { throw new InvalidObjectException("bad object: bad-length classifiedAds[]"); }
280 // Ensure no nulls in classifiedAds,
281 // TODO: and that ads are unique.
282 if(classifiedAds != null)
283 {
284 for(int i = classifiedAds.length; --i >= 0; )
285 {
286 if(classifiedAds[i] == null)
287 { throw new InvalidObjectException("bad object: classifiedAds[] contains null"); }
288 }
289 }
290 // There must be no ads held internally if ads are switched off.
291 if(!WEBSVR_SIMPLEAD_ALLOW &&
292 ((totalSimpleAdWeight != 0) || (simpleAds != null) || (classifiedAds != null)))
293 { throw new InvalidObjectException("bad object: simple ads loaded though disabled"); }
294
295 // Verify hotlinker diversion URL is valid (http) URL.
296 if(WEBSVR_EX_HOTLINK_DIVERT_URL != null)
297 {
298 try {
299 if(!"http".equals((new URL(WEBSVR_EX_HOTLINK_DIVERT_URL)).getProtocol()))
300 { throw new InvalidObjectException("bad object: invalid non-HTTP hotlink divert URL"); }
301 }
302 catch(final MalformedURLException e)
303 {
304 throw new InvalidObjectException("bad object: invalid unparseable hotlink divert URL");
305 }
306 }
307
308 // FIXME: Should check content to be normalised host names too...
309 if(hotLinkAllowHosts != null)
310 {
311 if(hotLinkAllowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
312 { throw new InvalidObjectException("bad object: too many allow hosts"); }
313 }
314 if(hotLinkDisallowHosts != null)
315 {
316 if(hotLinkDisallowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
317 { throw new InvalidObjectException("bad object: too many disallow hosts"); }
318 }
319 if(DNSBLs != null)
320 {
321 if(DNSBLs.size() > MAX_DNSBLS)
322 { throw new InvalidObjectException("bad object: too many DNSBLs"); }
323 }
324 }
325
326 /**Check if String is save as generic property key or value.
327 * Null and zero-length values are not permitted.
328 * <p>
329 * Only ASCII values in the range 32 to 126 are allowed.
330 *
331 * @param s the putative generic property key or value
332 * @return true iff the argument is a non-null, non-empty, not-too-long
333 * pure-ASCII value
334 */
335 private static final boolean isSafeGenPropValue(final String s)
336 {
337 // Check basic constraints.
338 if(s == null) { return(false); }
339 final int length = s.length();
340 if(length == 0) { return(false); }
341 if(length > MAX_GEN_LEN) { return(false); }
342
343 for(int i = length; --i >= 0; )
344 {
345 final char c = s.charAt(i);
346 if((c < 32) || (c > 126)) { return(false); }
347 }
348
349 return(true); // Benign string.
350 }
351
352 /**Check if String is safe to use as HTML meta-header; return false if not.
353 * Null and empty string are regarded as OK.
354 */
355 private static final boolean isSafeHTMLMetaHeaderString(final String s)
356 {
357 if(s == null) { return(true); }
358 for(int i = s.length(); --i >= 0; )
359 {
360 if(!isSafeHTMLMetaHeaderChar(s.charAt(i)))
361 { return(false); }
362 }
363 return(true); // All seems OK!
364 }
365
366 // /**Flags for User-Agent pattern matching checking for mobile phones. */
367 // private static final int REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
368 //
369 // /**Regex expression for one character safe for an HTML meta-header.
370 // * Basically such a character must be printable ASCII, and
371 // * one of:
372 // * <ul>
373 // * <li>a letter
374 // * <li>a digit
375 // * <li>a space
376 // * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
377 // * </ul>
378 // */
379 // private static final String SAFE_HTML_META_HEADER_CHAR = "[a-zA-Z0-9+?!()\\[\\]{}/_.;:,@-]";
380 //
381 // /**Single 'safe meta header character' match. */
382 // private static final Pattern SAFE_HTML_META_HEADER_CHAR_PATTERN = Pattern.compile(SAFE_HTML_META_HEADER_CHAR, REGEX_FLAGS);
383
384 /**Check that character is safe for an HTML meta-header; return false if not.
385 * Basically such a character must be printable ASCII, and
386 * one of:
387 * <ul>
388 * <li>a letter
389 * <li>a digit
390 * <li>a space
391 * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
392 * </ul>
393 */
394 private static final boolean isSafeHTMLMetaHeaderChar(final char c)
395 {
396 switch(c)
397 {
398 // Letters...
399 case 'a': case 'A':
400 case 'b': case 'B':
401 case 'c': case 'C':
402 case 'd': case 'D':
403 case 'e': case 'E':
404 case 'f': case 'F':
405 case 'g': case 'G':
406 case 'h': case 'H':
407 case 'i': case 'I':
408 case 'j': case 'J':
409 case 'k': case 'K':
410 case 'l': case 'L':
411 case 'm': case 'M':
412 case 'n': case 'N':
413 case 'o': case 'O':
414 case 'p': case 'P':
415 case 'q': case 'Q':
416 case 'r': case 'R':
417 case 's': case 'S':
418 case 't': case 'T':
419 case 'u': case 'U':
420 case 'v': case 'V':
421 case 'w': case 'W':
422 case 'x': case 'X':
423 case 'y': case 'Y':
424 case 'z': case 'Z':
425 // Digits.
426 case '0': case '1': case '2': case '3': case '4':
427 case '5': case '6': case '7': case '8': case '9':
428 // Safe punctuation and space.
429 case ' ':
430 case ',': case '.': case ';': case ':':
431 case '-': case '+':
432 case '!': case '?':
433 case '(': case ')':
434 case '[': case ']':
435 case '{': case '}':
436 case '/': case '_': case '@':
437 { return(true); } // OK!
438
439 default:
440 { return(false); } // Not safe.
441 }
442 }
443
444 /**Construct a default, zero-timestamp set of generic properties.
445 */
446 public GenProps()
447 { this(new Properties(), 0, null); }
448
449 /**Construct a new, immutable, properties set.
450 * The properties must be non-null
451 * and the timestamp must be non-negative.
452 * <p>
453 * Minor problems with the properties themselves will
454 * be silently ignored or logged, and defaults substituted
455 * for broken or missing values.
456 */
457 public GenProps(final Properties props,
458 final long _timestamp)
459 { this(props, _timestamp, GenUtils.systemErrLogger); }
460
461 /**Construct a new, immutable, properties set.
462 * The properties must be non-null
463 * and the timestamp must be non-negative.
464 * <p>
465 * Minor problems with the properties themselves will
466 * be silently ignored or logged, and defaults substituted
467 * for broken or missing values.
468 *
469 * @param logger if non-null than non-fatal problems will be logged here
470 */
471 public GenProps(final Properties props,
472 final long _timestamp,
473 final SimpleLoggerIF logger)
474 {
475 if((props == null) || (_timestamp < 0))
476 { throw new IllegalArgumentException(); }
477 timestamp = _timestamp;
478
479 // Parse properties.
480 // Set up some working variables to help with parsing.
481 int iTmp;
482 // long lTmp;
483 String sTmp;
484 Boolean bTmp;
485
486 // Extract the generic key/value properties, if any.
487 final Map<String,String> g = new HashMap<String, String>();
488 for(final Object keyO : props.keySet())
489 {
490 if(!(keyO instanceof String))
491 { throw new IllegalArgumentException("bad properties key: not String"); }
492 final String key = (String) keyO;
493
494 if(!key.startsWith(GEN_PREFIX)) { continue; }
495
496 final String gKey = key.substring(GEN_PREFIX.length());
497 final String gVal = props.getProperty(key);
498
499 // Validate the values later.
500 g.put(gKey, gVal);
501 }
502 // If no generic keys then save space with shared empty Map.
503 if(g.size() == 0)
504 { gen = Collections.emptyMap(); }
505 else
506 { gen = Collections.unmodifiableMap(g); }
507
508 iTmp = -1;
509 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS, "120000"), 10); }
510 catch(final Exception e) { } // Ignore errors.
511 WEBSVR_MIN_EX_IMATTR_RECHECK_MS = iTmp;
512
513 iTmp = -1;
514 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_SYSPROPS_RECHECK_MS, "10013"), 10); }
515 catch(final Exception e) { } // Ignore errors.
516 WEBSVR_SYSPROPS_RECHECK_MS = iTmp;
517
518 iTmp = -1;
519 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES, "2123456"), 10); }
520 catch(final Exception e) { } // Ignore errors.
521 WEBSVR_MAX_CACHEABLE_EX_BYTES = iTmp;
522
523 iTmp = -1;
524 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_BW_LIMITER, "10"), 10); }
525 catch(final Exception e) { } // Ignore errors.
526 WEBSVR_BW_LIMITER = iTmp;
527
528 iTmp = -1;
529 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_TN_CACHEPC, "10"), 10); }
530 catch(final Exception e) { } // Ignore errors.
531 WEBSVR_TN_CACHEPC = iTmp;
532
533 iTmp = -1;
534 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_EX_BYTES, "1123456789"), 10); }
535 catch(final Exception e) { } // Ignore errors.
536 WEBSVR_MAX_EX_BYTES = iTmp;
537
538 // We canonicalise this keyword list to reduce the
539 // space it consumes (especially after compression), by:
540 // * Converting to lower-case (helps compression; helps drop dups).
541 // * Tokenize with "," and trim whitespace around each token.
542 // * We could eliminate duplicate whitespace within a single token too.
543 // * Eliminate duplicates.
544 // * Sort (to help compression).
545 // * Glue back together again without whitespace.
546 // * Eliminate any quote (" or ') characters.
547 // We object strongly to non-ASCII text (we remove it!).
548 sTmp = null;
549 try {
550 sTmp = props.getProperty(PNAME_WEBSVR_META_KEYWORDS, "").trim().toLowerCase();
551 final SortedSet<String> words = new TreeSet<String>();
552 final StringTokenizer st = new StringTokenizer(sTmp, ",");
553 while(st.hasMoreTokens())
554 {
555 // Take each token,
556 // zap any non-ASCII printable (and unsafe) chars,
557 // and trim of any obvious excess whitespace.
558 String s = st.nextToken();
559 for(int i = s.length(); --i >= 0; )
560 {
561 final char c = s.charAt(i);
562 if(!isSafeHTMLMetaHeaderChar(c)) { s = s.replace(c, ' '); }
563 }
564 s = s.trim();
565 words.add(s);
566 }
567 final StringBuilder sb = new StringBuilder(sTmp.length());
568 for(final Iterator<String> it = words.iterator(); it.hasNext(); )
569 {
570 final String s = it.next();
571 sb.append(s);
572 if(it.hasNext()) { sb.append(','); }
573 }
574 sTmp = sb.toString();
575 }
576 catch(final Exception e) { } // Ignore errors.
577 WEBSVR_META_KEYWORDS = sTmp;
578
579 sTmp = null;
580 try {
581 sTmp = props.getProperty(PNAME_WEBSVR_META_DESCRIPTION, "");
582 // Discard any unsafe characters for a meta header.
583 for(int i = sTmp.length(); --i >= 0; )
584 {
585 final char c = sTmp.charAt(i);
586 if(!isSafeHTMLMetaHeaderChar(c)) { sTmp = sTmp.replace(c, ' '); }
587 }
588 sTmp = sTmp.trim();
589 }
590 catch(final Exception e) { } // Ignore errors.
591 WEBSVR_META_DESCRIPTION = sTmp;
592
593 sTmp = null;
594 try {
595 sTmp = props.getProperty(PNAME_WEBSVR_BG_IMAGE);
596
597 // Drop if not a valid-syntax JPEG exhibit name.
598 // We *do not* attempt to check that it exists here.
599 if(!ExhibitName.validNameSyntax(sTmp))
600 { sTmp = null; }
601 else
602 {
603 final ExhibitMIME.ExhibitTypeParameters tp =
604 ExhibitMIME.getInputFileType(sTmp);
605 if((tp == null) || (tp.type != ExhibitMIME.ET_JPEG))
606 { sTmp = null; }
607 }
608
609 }
610 catch(final Exception e) { } // Ignore errors.
611 WEBSVR_BG_IMAGE = (sTmp == null) ? null : Name.ExhibitFull.create(sTmp);
612
613 bTmp = null;
614 try { bTmp = Boolean.valueOf(props.getProperty(PNAME_WEBSVR_SIMPLEAD_ALLOW)); }
615 catch(final Exception e) { } // Ignore errors.
616 WEBSVR_SIMPLEAD_ALLOW = (bTmp == null) ? true : bTmp.booleanValue();
617 SimpleAd[] simpleAdsTmp = null;
618 if(WEBSVR_SIMPLEAD_ALLOW)
619 {
620 final List<SimpleAd> v = new ArrayList<SimpleAd>(); // Collect ads here...
621 // Only even look for simple ads if they are allowed.
622 for(int N = 1; N < Integer.MAX_VALUE; ++N)
623 {
624 final String prefix = PNAME_WEBSVR_SIMPLEAD_PREFIX + N + ".";
625 final String rawCode = props.getProperty(prefix + "HTML");
626 if(rawCode == null) { break; } // End of ads.
627
628 final String code = rawCode.trim();
629 int weight = 100; // Default;
630 try { weight = Integer.parseInt(props.getProperty(prefix + "wt"), 10); }
631 catch(final Exception e) { } // Ignore errors.
632 if(weight <= 0) { continue; } // Temporarily disabled; skip.
633
634 // Attempt to construct new ad item and save it,
635 // but quietly skip any difficulties.
636 try { v.add(MemoryTools.intern(new SimpleAd(weight, code))); }
637 catch(final Exception e) { } // Ignore errors.
638 }
639 // If we found some ads, extract and sort them, ready to store.
640 if(v.size() > 0)
641 {
642 simpleAdsTmp = new SimpleAd[v.size()];
643 v.toArray(simpleAdsTmp);
644 Arrays.sort(simpleAdsTmp);
645 }
646 }
647 simpleAds = simpleAdsTmp; // Store simple ads, if any.
648 totalSimpleAdWeight = _compute_totalSimpleAdWeight(simpleAds);
649
650 // Parse classified ads...
651 ClassifiedAd[] classifiedAdsTmp = null;
652 // Disable classified ads when we disable simple ads.
653 if(WEBSVR_SIMPLEAD_ALLOW)
654 {
655 final Set<ClassifiedAd> v = new HashSet<ClassifiedAd>(); // Collect ads here...
656 // Trawl through all properties looking for the mandatory regex value(s).
657 for(final Object keyO: props.keySet())
658 {
659 // Skip keys nothing to do with classified ads.
660 assert(keyO instanceof String);
661 final String regexKey = (String) keyO;
662 if(!regexKey.startsWith(PNAME_WEBSVR_CLASSIFIEDAD_PREFIX)) { continue; }
663 // Skip everything but the URI regex key initially.
664 if(!regexKey.endsWith(".URIregex")) { continue; }
665 // Compute the stub up to and including the final '.'
666 // to allow us to verify other keys.
667 final int lastDot = regexKey.lastIndexOf('.');
668 final String stub = regexKey.substring(0, lastDot+1);
669 // Try to create an instance for the classified ad found,
670 // logging any problems from invalid data
671 // (and dropping/ignoring any such 'bad' ad).
672 try {
673 // We interpret/parse start/end UTC dates (if any) here.
674 final long start = parseUTCyyyymmdd(props.getProperty(stub+"start"));
675 final long end = parseUTCyyyymmdd(props.getProperty(stub+"end"));
676 // We omit ads whose end date has already expired by the timestamp
677 // as a minor system-wide optimisation.
678 if((end != 0) && (end < _timestamp)) { continue; }
679 v.add(new ClassifiedAd(props.getProperty(stub+"HTML"),
680 props.getProperty(regexKey),
681 start,
682 end,
683 props.getProperty(stub+"lang"),
684 props.getProperty(stub+"countries")));
685 if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ad loaded: "+stub); }
686 }
687 catch(final Exception e)
688 {
689 logger.log("ERROR: GenProps: unable to load classified ad "+stub+"... "+e.getMessage());
690 if(IsDebug.isDebug) { System.err.println("ERROR: GenProps: classified ad BROKEN: "+stub); }
691 }
692 }
693
694 // If we found some ads, extract and store.
695 if(v.size() > 0)
696 {
697 classifiedAdsTmp = new ClassifiedAd[v.size()];
698 v.toArray(classifiedAdsTmp);
699 if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ads loaded: "+v.size()); }
700 }
701 }
702 classifiedAds = classifiedAdsTmp; // Store classified ads, if any.
703
704
705 // Extract author details.
706 // We iterate over all the properties, looking for suitable candidates.
707 final TreeMap<String,AuthData> putativeAuthDB = new TreeMap<String, AuthData>();
708 for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
709 {
710 try
711 {
712 final String key = (String) it.next();
713 if(!key.startsWith(PPREFIX_AUTH_DETAILS))
714 { continue; /* Not the right prefix for authDB. */ }
715
716 // We have a key that could be a valid author entry...
717 final String auth = key.substring(PPREFIX_AUTH_DETAILS.length());
718 // TODO: add warning for invalid entries?
719 if(!ExhibitName.validAuthorSyntax(auth))
720 { continue; /* Not a valid author initials string. */ }
721
722 // TODO: add warning for duplicate entries?
723 putativeAuthDB.put(MemoryTools.intern(auth),
724 MemoryTools.intern(new AuthData(auth, props.getProperty(key))));
725 }
726 catch(final Exception e)
727 {
728 // Absorb errors...
729 e.printStackTrace();
730 }
731 }
732 // Save immutable version of our authDB.
733 // If empty, save space by storing a null instead.
734 authDB = (putativeAuthDB.size() == 0) ? null :
735 Collections.unmodifiableSortedMap(putativeAuthDB);
736
737
738 // Extract supplied static "goodness"/popularity weightings, if any.
739 // We iterate over all the properties, looking for suitable candidiates.
740 final HashMap<String,Byte> putativePopWeights = new HashMap<String, Byte>();
741 for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
742 {
743 try
744 {
745 final String key = (String) it.next();
746 if(!key.startsWith(PPREFIX_POPWT_DETAILS))
747 { continue; /* Not the right prefix for popWeight. */ }
748
749 final String aWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYAUTH;
750 if(key.startsWith(aWPrefix))
751 {
752 // We have a key that could be a valid author weighting entry...
753 final String auth = key.substring(aWPrefix.length());
754 // TODO: add warning for invalid entries?
755 if(!ExhibitName.validAuthorSyntax(auth))
756 { continue; /* Not a valid author initials string. */ }
757 final Byte v = Byte.decode(props.getProperty(key));
758 if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
759 { continue; /* Not a valid weight. */ }
760 putativePopWeights.put(MemoryTools.intern(auth),
761 MemoryTools.intern(v));
762 continue;
763 }
764
765 final String tWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYTYPE;
766 if(key.startsWith(tWPrefix))
767 {
768 // We have a key that could be a valid type/extension weighting entry...
769 final String type = key.substring(tWPrefix.length());
770 // TODO: add warning for invalid entries?
771 if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
772 { continue; /* Not a valid type string. */ }
773 final Byte v = Byte.decode(props.getProperty(key));
774 if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
775 { continue; /* Not a valid weight. */ }
776 putativePopWeights.put(MemoryTools.intern(type),
777 MemoryTools.intern(v));
778 continue;
779 }
780
781 final String attrWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYATTR;
782 if(key.startsWith(attrWPrefix))
783 {
784 // We have a key that could be a valid attribute-word weighting entry...
785 final String attrWord = key.substring(attrWPrefix.length());
786 // TODO: add warning for invalid entries?
787 if(!ExhibitName.validAttributeWord(attrWord))
788 { continue; /* Not a valid attribute word string. */ }
789 final Byte v = Byte.decode(props.getProperty(key));
790 if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
791 { continue; /* Not a valid weight. */ }
792 putativePopWeights.put(MemoryTools.intern(attrWord),
793 MemoryTools.intern(v));
794 continue;
795 }
796 }
797 catch(final Exception e)
798 {
799 // Absorb errors...
800 e.printStackTrace();
801 }
802 }
803 // Save immutable version of our authDB.
804 // If empty, save space by storing a null instead.
805 popWeights = (putativePopWeights.size() == 0) ? null :
806 Collections.unmodifiableMap(putativePopWeights);
807
808
809 iTmp = -1;
810 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER, "0"), 10); }
811 catch(final Exception e) { } // Ignore errors.
812 WEBSVR_EX_HOTLINK_LIMITER = (byte) Math.max(0, Math.min(255, iTmp));
813
814 // Get alternative URL to divert unwanted exhibit hotlinks to, if any.
815 // Invalid URLs are ignored and valid ones may be canonicalised.
816 sTmp = null;
817 try
818 {
819 final String p = props.getProperty(PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL);
820 if((p != null) && (p.startsWith("http:")))
821 { sTmp = (new URL(p)).toExternalForm(); } // Parse and canonicalise.
822 }
823 catch(final MalformedURLException e) { } // Ignore malformed URLs.
824 WEBSVR_EX_HOTLINK_DIVERT_URL = sTmp;
825
826 // Parse lists of hotlinker allow/deny hosts, DNSBLs...
827 final Set<String> rawAllowNames = _parseHostList(
828 props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW));
829 hotLinkAllowHosts = _normaliseHostList(rawAllowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
830 hotLinkAllowHostsRegex = _makeResidualRegexHostMatch(rawAllowNames, logger);
831 final Set<String> rawDisallowNames = _parseHostList(
832 props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW));
833 hotLinkDisallowHosts = _normaliseHostList(rawDisallowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
834 hotLinkDisallowHostsRegex = _makeResidualRegexHostMatch(rawDisallowNames, logger);
835 DNSBLs = _normaliseHostList(_parseHostList(
836 props.getProperty(PNAME_WEBSVR_DNSBLs)), MAX_DNSBLS);
837
838
839 // Verify complete object state.
840 try { validateObject(); }
841 catch(final InvalidObjectException e)
842 { throw new IllegalArgumentException(e.getMessage()); }
843 }
844
845
846 /**Parse UTC YYYYMMDD date, or zero if absent (null). */
847 private static long parseUTCyyyymmdd(final String property)
848 {
849 if(property == null) { return(0); }
850 // property = property.trim();
851 if(property.length() != 8) { throw new IllegalArgumentException("format required: YYYYMMDD"); }
852
853 // Get GMT/UTC timezone.
854 final TimeZone tz = TimeZone.getTimeZone("GMT");
855 final GregorianCalendar gc = new GregorianCalendar(tz);
856 // Set YMD from property value.
857 gc.set(Calendar.YEAR, Integer.parseInt(property.substring(0, 4), 10));
858 // Not that month is zero-based (January is zero).
859 gc.set(Calendar.MONTH, Integer.parseInt(property.substring(4, 6), 10) - 1);
860 gc.set(Calendar.DAY_OF_MONTH, Integer.parseInt(property.substring(6, 8), 10));
861
862 // Convert to timestamp and return if sensible...
863 final long t = gc.getTimeInMillis();
864 if(t < CoreConsts.GALLERY_EPOC_START)
865 { throw new IllegalArgumentException("classified ad date too early to be sensible"); }
866 return(t);
867 }
868
869 /**Timestamp of this properties set; zero for default/empty set. */
870 public final long timestamp;
871
872
873 /**Maximum number of general properties. */
874 public static final int MAX_GEN_PROPS = 1024;
875
876 /**Maximum length of general property key or value (chars). */
877 public static final int MAX_GEN_LEN = 1024;
878
879 /**Prefix for general properties in file (not needed on lookup). */
880 public static final String GEN_PREFIX = "gen.";
881
882 /**The (immutable) generic properties map; never null. */
883 private Map<String,String> gen;
884
885 /**Get the immutable generic properties; never null.
886 * The keys in this Map here are stripped of the initial GEN_PREFIX
887 * that appears in the properties file.
888 */
889 public Map<String,String> getGen()
890 { return(gen); }
891
892
893 /**Name of Web server minimum exhibit-immutable-attributes check interval (ms) property. */
894 public static final String PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS = "pg2k.websvr.ex.imattr.minrecheckms";
895 /**Web server minimum exhibit-immutable-attributes check interval (ms). */
896 private final int WEBSVR_MIN_EX_IMATTR_RECHECK_MS;
897 /**Get the Web server minimum exhibit-immutable-attributes check interval (ms).
898 * Constrained to the range approx 1s to 6h, default approx 2m.
899 */
900 public final int getWEBSVR_MIN_EX_IMATTR_RECHECK_MS()
901 { return(Math.max(1001, Math.min(6 * 3600 * 1000, WEBSVR_MIN_EX_IMATTR_RECHECK_MS))); }
902
903 /**Name of Web server sysprops check interval (ms) property. */
904 public static final String PNAME_WEBSVR_SYSPROPS_RECHECK_MS = "pg2k.websvr.props.sys.recheckms";
905 /**Web server sysprops (ie most property files including GenProps) (re)check interval (ms). */
906 private final int WEBSVR_SYSPROPS_RECHECK_MS;
907 /**Get the Web server sysprops check interval (ms).
908 * Constrained to the range approx 1s to 1h, default approx 10s (which applies until first props set is fetched).
909 */
910 public final int getWEBSVR_SYSPROPS_RECHECK_MS()
911 { return(Math.max(1003, Math.min(1 * 3600 * 1000, WEBSVR_SYSPROPS_RECHECK_MS))); }
912
913 /**Name of Web server maximum-cached-exhibit-prefix-size (bytes) property. */
914 public static final String PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES = "pg2k.websvr.ex.maxcacheablesize";
915 /**Web server maximum-cached-exhibit-prefix-size (bytes). */
916 private final int WEBSVR_MAX_CACHEABLE_EX_BYTES;
917 /**Get the Web server maximum-cached-exhibit-prefix-size (bytes).
918 * Constrained to the range approx 64kB to 1GB, default approx 2MB.
919 */
920 public final int getWEBSVR_MAX_CACHEABLE_EX_BYTES()
921 { return(Math.max(1<<16, Math.min(1123456789, WEBSVR_MAX_CACHEABLE_EX_BYTES))); }
922
923 /**Name of Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
924 public static final String PNAME_WEBSVR_TN_CACHEPC = "pg2k.websvr.ex.tncachepercent";
925 /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
926 private final int WEBSVR_TN_CACHEPC;
927 /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value.
928 * Constrained to the range 0 to 100, default approx 10.
929 * <p>
930 * A value of zero disables the persistent cache
931 * (though in-memory cacheing may continue).
932 * <p>
933 * Any additional cache of generated thumbnails (exhibit samples)
934 * is limited to a size at most this many percent of the exhibit cache size.
935 * This cache is managed separately from this exhibit cache, but yoking the
936 * values together makes for simpler system management.
937 */
938 public final int getWEBSVR_TN_CACHEPC()
939 { return(Math.max(0, Math.min(100, WEBSVR_TN_CACHEPC))); }
940
941 /**Name of Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
942 public static final String PNAME_WEBSVR_BW_LIMITER = "pg2k.websvr.ex.bwlimiter";
943 /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
944 private final int WEBSVR_BW_LIMITER;
945 /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing).
946 * Constrained to the range approx 1 to 1000, default approx 10.
947 * <p>
948 * If any precacheing operation takes N milliseconds the next one
949 * will not be started until at least N times this value later.
950 * <p>
951 * The effect is to limit resource consumption to at most about
952 * 1/N of the most constrained of slave and master resources
953 * (typically CPU and/or bandwidth).
954 */
955 public final int getWEBSVR_BW_LIMITER()
956 { return(Math.max(1, Math.min(1000, WEBSVR_BW_LIMITER))); }
957
958 /**Name of Web server maximum exhibit size (32-bit value, bytes). */
959 public static final String PNAME_WEBSVR_MAX_EX_BYTES = "pg2k.websvr.ex.maxexhibitsize";
960 /**Web server maximum exhibit size (32-bit value, bytes). */
961 private final int WEBSVR_MAX_EX_BYTES;
962 /**Get the Web server maximum exhibit size (32-bit value, bytes).
963 * Constrained to the range approx 1B to 1GB, default approx 1GB.
964 */
965 public final int getWEBSVR_MAX_EX_BYTES()
966 { return(Math.max(1, Math.min(Integer.MAX_VALUE/2, WEBSVR_MAX_EX_BYTES))); }
967
968 /**Name of keywords list for HTML meta header ("" if none; not null). */
969 public static final String PNAME_WEBSVR_META_KEYWORDS = "pg2k.websvr.meta.keywords";
970 /**Keywords list for HTML meta header ("" if none; not null). */
971 private final String WEBSVR_META_KEYWORDS;
972 /**Get the keywords list for HTML meta header ("" if none; never null).
973 * May be trimmed of excess whitespace and canonicalised; "" if none.
974 * <p>
975 * Guaranteed only to contain characters safe to put directly into
976 * an HTML description meta-header attribute value.
977 * <p>
978 * In particular this result contains no quote marks (" or ')
979 * nor ampersand nor angle-brackets, though may contain whitespace,
980 * so should be quoted.
981 */
982 public final String getWEBSVR_META_KEYWORDS()
983 {
984 final String result = WEBSVR_META_KEYWORDS;
985 if(result == null) { return(""); }
986 assert(result.indexOf('\'') == -1);
987 assert(result.indexOf('"') == -1);
988 assert(result.indexOf('&') == -1);
989 assert(result.indexOf('<') == -1);
990 assert(result.indexOf('>') == -1);
991 return(result);
992 }
993
994 /**Name of description for HTML meta header ("" if none; not null). */
995 public static final String PNAME_WEBSVR_META_DESCRIPTION = "pg2k.websvr.meta.description";
996 /**Description for HTML meta header ("" if none; not null). */
997 private final String WEBSVR_META_DESCRIPTION;
998 /**Get the description list for HTML meta header ("" if none; not null).
999 * May be trimmed of excess whitespace; "" if none.
1000 */
1001 public final String getWEBSVR_META_DESCRIPTION()
1002 {
1003 final String result = WEBSVR_META_DESCRIPTION;
1004 if(result == null) { return(""); }
1005 return(result);
1006 }
1007
1008 /**Name of JPEG exhibit to use as a background image (null if none). */
1009 public static final String PNAME_WEBSVR_BG_IMAGE = "pg2k.websvr.livery.default.bg";
1010 /**JPEG exhibit to use as a background image (null if none).
1011 * Should be Name.ExhibitFull but could be String from deserialising old format.
1012 */
1013 private final CharSequence WEBSVR_BG_IMAGE;
1014 /**Get the JPEG exhibit to use as a background image (null if none).
1015 * Is a syntactically-valid JPEG exhibit name (or null).
1016 */
1017 public final Name.ExhibitFull getWEBSVR_BG_IMAGE()
1018 {
1019 final CharSequence result = WEBSVR_BG_IMAGE;
1020 if(null == result) { return(null); }
1021 if(result.getClass() == Name.ExhibitFull.class) { return((Name.ExhibitFull) result); }
1022 return(Name.ExhibitFull.create(result));
1023 }
1024
1025 /**Class holding details of one simple banner advertisement; immutable, serialisable.
1026 * Weighting must be strictly positive integer; text must be
1027 * non-zero-length, 7-bit printable (32--126) ASCII/HTML data,
1028 * though we also allow CRLF (\r\n) for tricky JavaScript/HTML issues...
1029 * <p>
1030 * We assume that the on-the-wire format for members is reasonably
1031 * efficient, especially if the transport layer includes compression.
1032 * <p>
1033 * This supports equals(), hashCode() and compareTo(); two objects
1034 * are equal if all their fields are, and the sort order is to
1035 * minimise ad-lookup and HTML-bandwidth (ie by highest weight
1036 * then smallest code, with ties being broken by code lexical ordering).
1037 */
1038 private static final class SimpleAd implements Serializable,
1039 ObjectInputValidation,
1040 MemoryTools.Internable,
1041 Comparable<SimpleAd>
1042 {
1043 /**Strictly positive weighting. */
1044 final int weight;
1045 /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1046 final String code;
1047
1048 /**Maximum HTML code length in bytes/characters. */
1049 public static final int MAX_AD_CHARACTERS = 2048;
1050
1051 /**Equality depends on all fields. */
1052 @Override
1053 public boolean equals(final Object o)
1054 {
1055 if(!(o instanceof SimpleAd)) { return(false); }
1056 final SimpleAd other = (SimpleAd) o;
1057 return((weight == other.weight) && (code.equals(other.code)));
1058 }
1059
1060 /**Hash depends on text as good source of variability. */
1061 @Override
1062 public int hashCode() { return(code.hashCode()); }
1063
1064 /**Provides total ordering, with primary sort by decreasing weight.
1065 * Full sort order is:
1066 * <ol>
1067 * <li>decreasing weight, then
1068 * <li>increasing code length, then
1069 * <li>code lexical order.
1070 * </ol>
1071 */
1072 public int compareTo(final SimpleAd o)
1073 {
1074 final SimpleAd other = (SimpleAd) o;
1075
1076 if(weight > other.weight) { return(-1); } // Right order.
1077 if(weight < other.weight) { return( 1); } // Wrong order.
1078
1079 if(code.length() < other.code.length()) { return(-1); } // Right order.
1080 if(code.length() > other.code.length()) { return( 1); } // Wrong order.
1081
1082 // Break ties to make total ordering.
1083 return(code.compareTo(other.code));
1084 }
1085
1086 /**Construct new simple ad details. */
1087 SimpleAd(final int adRelativeWeight, final String HTMLCode)
1088 {
1089 weight = adRelativeWeight;
1090 code = MemoryTools.intern(HTMLCode);
1091 // Verify object state and thus validate parameters...
1092 try { validateObject(); }
1093 catch(final InvalidObjectException e)
1094 { throw new IllegalArgumentException(e.getMessage()); }
1095 }
1096
1097 /**Our serialisation version. */
1098 private static final long serialVersionUID = 6649356978055850149L;
1099
1100 // /**Deserialise. */
1101 // private void readObject(final ObjectInputStream in)
1102 // throws IOException, ClassNotFoundException
1103 // {
1104 // in.defaultReadObject();
1105 // validateObject(); // Validate state immediately.
1106 // }
1107
1108 /**Deserialise: validate and eliminate duplicates coming off the wire.
1109 * @return identical, de-duped, non-null instance
1110 */
1111 protected Object readResolve()
1112 // throws ObjectStreamException
1113 { return(MemoryTools.intern(new SimpleAd(weight, code))); }
1114
1115 /**Validate fields/state.
1116 * Called in the constructor and possibly after de-serialising.
1117 */
1118 public void validateObject()
1119 throws InvalidObjectException
1120 {
1121 // Check that all components are sane and safe.
1122 // Weight must be strictly positive.
1123 if(weight <= 0)
1124 { throw new InvalidObjectException("bad object: weight <= 0"); }
1125 // HTML code must be non-null, non-zero-length,
1126 // 7-bit printable ASCII (and CRLF).
1127 if((code == null) ||
1128 (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1129 { throw new InvalidObjectException("bad object: HTML code length silly"); }
1130 for(int i = code.length(); --i >= 0; )
1131 {
1132 final char c = code.charAt(i);
1133 if(((c < 32) || (c > 126)) &&
1134 ((c != '\r') && (c != '\n')))
1135 { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1136 }
1137 }
1138 }
1139
1140 /**Class holding details of one classified advertisement; immutable, serialisable.
1141 * We assume that the on-the-wire format for members is reasonably efficient,
1142 * especially if the transport layer includes compression such as ZIP.
1143 */
1144 public static final class ClassifiedAd implements Serializable,
1145 ObjectInputValidation,
1146 MemoryTools.Internable
1147 {
1148 /**Construct new classified ad details.
1149 *
1150 * @param HTMLCode HTML code as 7-bit ASCII; non-null, non-zero length
1151 * @param URIregex valid regular expression to match a substring of a URI path;
1152 * never null nor empty
1153 */
1154 public ClassifiedAd(final String HTMLCode,
1155 final String URIregex,
1156 final long start, final long end,
1157 final String language,
1158 final String ccTLDs)
1159 {
1160 regex = MemoryTools.intern(URIregex);
1161 code = MemoryTools.intern(HTMLCode);
1162 lang = MemoryTools.intern(language);
1163 countries = MemoryTools.intern(ccTLDs);
1164 this.start = start;
1165 this.end = end;
1166 // Verify object state and thus validate parameters...
1167 try { validateObject(); }
1168 catch(final InvalidObjectException e)
1169 { throw new IllegalArgumentException(e.getMessage()); }
1170 }
1171
1172 /**Valid case-sensitive regular expression to match a substring of a URI path, never null nor empty. */
1173 public final String regex;
1174
1175 /**Private cache of compiled regex; null until first use.
1176 * Created on first use; not serialised.
1177 * <p>
1178 * Never set null once non-null.
1179 * <p>
1180 * Marked volatile for thread-safe lock-free access,
1181 */
1182 private transient volatile Pattern pattern;
1183 /**Get compiled regex; never null. */
1184 public Pattern getPattern()
1185 {
1186 Pattern p = pattern;
1187 if(p == null) { pattern = p = Pattern.compile(regex); }
1188 return(p);
1189 }
1190
1191 /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1192 public final String code;
1193
1194 /**Maximum HTML code length in bytes/characters, as per SimpleAs; strictly positive. */
1195 public static final int MAX_AD_CHARACTERS = SimpleAd.MAX_AD_CHARACTERS;
1196
1197 /**Start time (+ve UTC ms) or zero if no start time. */
1198 public final long start;
1199 /**Start time (+ve UTC ms greater than 'start') or zero if no end time. */
1200 public final long end;
1201
1202 /**Language/locale filter; two-letter lower-case language code or null if none. */
1203 public final String lang;
1204
1205 /**Returns true if the supplied (visitor's) locale is inappropriate for this ad.
1206 * If the lang value is null then this returns false.
1207 * Else if the visitor's locale/language is null (unknown) then this returns true.
1208 * Else this returns true iff the visitor's locale language matches lang.
1209 */
1210 public boolean wrongLanguage(final Locale visitorLocale)
1211 {
1212 if(lang == null) { return(false); }
1213 if(visitorLocale == null) { return(true); }
1214 return(!lang.equals(visitorLocale.getLanguage()));
1215 }
1216
1217 /**Country code; comma-separated list of lower-case ccTLD codes or null if none. */
1218 public final String countries;
1219
1220 /**Returns true if the supplied (visitor's) country is inappropriate for this ad.
1221 * If the countries list is null then this returns false.
1222 * Else if the visitor's ccTLD (country code) is null (unknown) then this returns true.
1223 * Else this returns true iff the visitor's ccTLD is found on our countries list.
1224 */
1225 public boolean wrongCountry(final CCTLD visitorCcTLD)
1226 {
1227 if(countries == null) { return(false); }
1228 if(visitorCcTLD == null) { return(true); }
1229 // Check for presense in the 'allowed' list.
1230 // If the list is correctly normalised then a simple substring search suffices
1231 // (though might be inefficient for VERY long lists).
1232 // We search backwards since common values may be late (us, uk)
1233 // and we may have normalised to natural sorted (lexical/ASCII) order,
1234 // but the search direction is not necessary for correctness.
1235 return(countries.lastIndexOf(visitorCcTLD.code) == -1);
1236 }
1237
1238 /**Equality depends on all fields. */
1239 @Override
1240 public boolean equals(final Object o)
1241 {
1242 if(!(o instanceof ClassifiedAd)) { return(false); }
1243 final ClassifiedAd other = (ClassifiedAd) o;
1244 if(start != other.start) { return(false); }
1245 if(end != other.end) { return(false); }
1246 if(!code.equals(other.code)) { return(false); }
1247 if(lang == null) { if(other.lang != null) { return(false); } }
1248 else if(!lang.equals(other.lang)) { return(false); }
1249 if(countries == null) { if(other.countries != null) { return(false); } }
1250 else if(!countries.equals(other.countries)) { return(false); }
1251 return(true); // Identical.
1252 }
1253
1254 /**Hash depends on the regex and the end date as good/quick sources of variability. */
1255 @Override
1256 public int hashCode() { return(regex.hashCode() ^ (int)(end >> 26)); }
1257
1258 /**Validate fields/state.
1259 * Called in the constructor and possibly after de-serialising.
1260 */
1261 public void validateObject()
1262 throws InvalidObjectException
1263 {
1264 // HTML code must be non-null, non-zero-length,
1265 // 7-bit printable ASCII (and CRLF).
1266 if((code == null) ||
1267 (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1268 { throw new InvalidObjectException("bad object: HTML code length silly"); }
1269 for(int i = code.length(); --i >= 0; )
1270 {
1271 final char c = code.charAt(i);
1272 if(((c < 32) || (c > 126)) &&
1273 ((c != '\r') && (c != '\n')))
1274 { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1275 }
1276 if((regex == null) || (regex.length() == 0) || (regex.length() > 2*ExhibitName.MAX_NAME_LENGTH))
1277 { throw new InvalidObjectException("bad object: regex null or length invalid"); }
1278 // Validate the regex pattern (and cache it too; naughty but efficient).
1279 try { pattern = Pattern.compile(regex); }
1280 catch(final PatternSyntaxException e)
1281 { throw new InvalidObjectException("bad object: regex invalid"); }
1282 if((start < 0) || (end < 0))
1283 { throw new InvalidObjectException("bad object: negative start/end"); }
1284 if((start != 0) && (start < CoreConsts.GALLERY_EPOC_START))
1285 { throw new InvalidObjectException("bad object: start date invalid: " + new Date(start)); }
1286 if((end != 0) && (end <= start))
1287 { throw new InvalidObjectException("bad object: end < start"); }
1288 if((lang != null) && !lang.matches("^[a-z][a-z]$"))
1289 { throw new InvalidObjectException("bad object: bad language not [a-z][a-z]"); }
1290 if((countries != null) && !countries.matches("^[a-z][a-z](,[a-z][a-z])*$"))
1291 { throw new InvalidObjectException("bad object: bad countries list not [a-z][a-z](,[a-z][a-z])*"); }
1292 // Avoid huge countries list with arbitrary limit...
1293 if((countries != null) && (countries.length() > 128))
1294 { throw new InvalidObjectException("bad object: bad countries list too long"); }
1295 }
1296
1297 /**Deserialise: validate and eliminate duplicates coming off the wire.
1298 * @return identical, de-duped, non-null instance
1299 */
1300 protected Object readResolve()
1301 // throws ObjectStreamException
1302 { return(MemoryTools.intern(new ClassifiedAd(code, regex, start, end, lang, countries))); }
1303
1304 /**Unique serialisation UID. */
1305 private static final long serialVersionUID = 8848447664623542985L;
1306 }
1307
1308
1309 /**Immutable Set of String tokens to replace with random values in banner HTML.
1310 * These tokens, when encountered in HTML banner ad text,
1311 * are replaced with random (positive, decimal) integers.
1312 * These are to help with "cache-busting".
1313 * <p>
1314 * These tokens must be chosen so as to be very unlikely to appear
1315 * where they are <em>not</em> intended for replacement.
1316 * <p>
1317 * This set may become replaceable at run-time in future.
1318 */
1319 public static final Set<String> AD_BANNER_RND_TOKENS = Collections.unmodifiableSet(
1320 new HashSet<String>(Arrays.asList(new String[]{
1321 "$RND$", // TribalFusion cache-buster token.
1322 })));
1323
1324 /**Name of prefix for simple banner ad code.
1325 * There is one sub-property ({prefix}.allow, either "true" or "false")
1326 * that allows or disallows all simple ads at once.
1327 * <p>
1328 * Then for N starting at 1, and stopping at the first gap,
1329 * there must be a non-empty {prefix}.{N}.HTML containing
1330 * non-empty, pure-printable 7-bit ASCII (32--126, +CRLF) HTML ad code.
1331 * There may be an optional {prefix}.{N}.wt strictly-positive
1332 * integer relative weighting (default 100) of this ad to others,
1333 * the higher the weighting the more of the available space taken.
1334 */
1335 public static final String PNAME_WEBSVR_SIMPLEAD_PREFIX = "pg2k.websvr.ad.";
1336 /**Name of on/off switch for simple and classified ad code, enabled by default. */
1337 public static final String PNAME_WEBSVR_SIMPLEAD_ALLOW = PNAME_WEBSVR_SIMPLEAD_PREFIX + "allow";
1338 /**On/off switch for simple banner ad code, enabled by default. */
1339 private final boolean WEBSVR_SIMPLEAD_ALLOW;
1340 /**Get on/off switch for simple banner ad code, enabled by default.
1341 */
1342 public final boolean getWEBSVR_SIMPLEAD_ALLOW() { return(WEBSVR_SIMPLEAD_ALLOW); }
1343 /**Get HTML code for a simple ad at random; null if none available.
1344 * This returns ads with frequencies in proportion to their weightings.
1345 * <p>
1346 * This will always return null if getWEBSVR_SIMPLEAD_ALLOW()
1347 * returns false.
1348 * <p>
1349 * This algorithm will take O(n) time to return one of n available ads.
1350 * <p>
1351 * Will be faster when ads are unevenly weigted.
1352 * <p>
1353 * This will automatically do substitution of random "cache-buster" tokens
1354 * if required.
1355 * (Any of the recognised tokens will be substituted with a
1356 * random (positive decimal) integer value, different each time,
1357 * with all occurrences of any one token the same in any one call.)
1358 */
1359 public final String getSimpleAd()
1360 {
1361 if(/* !WEBSVR_SIMPLEAD_ALLOW || */ /* Test is redundant. */
1362 /* (simpleAds == null) || */ /* Test is redundant. */
1363 (totalSimpleAdWeight == 0)) { return(null); }
1364
1365 // Pick uniformly-distributed selector value.
1366 // We primarily require speed from this generator,
1367 // not perfection nor security.
1368 final int r = Rnd.fastRnd.nextInt(totalSimpleAdWeight);
1369
1370 // Total weight so far.
1371 int tw = 0;
1372
1373 // Look until cumulative sum of weights passes selector,
1374 // then take the ad that passes that threshold.
1375 // If performance proves to be a problem we could
1376 // replace this with a binary chop by storing an auxillary
1377 // array containing the sum of the weights.
1378 // In the interim, if we discover that this array is not
1379 // sorted biggest weight first (which makes for fastest
1380 // linear search on average) then we can sort it when
1381 // we have finished selecting our ad this time.
1382 for(int i = 0; i < simpleAds.length; ++i)
1383 {
1384 tw += simpleAds[i].weight;
1385 if(r < tw)
1386 {
1387 String code = simpleAds[i].code;
1388 // Substitute cache-buster tokens as needed.
1389 if((code != null) && (AD_BANNER_RND_TOKENS.size() > 0))
1390 {
1391 for(final String token : AD_BANNER_RND_TOKENS)
1392 {
1393 // If the cache-buster token exists then replace all occurrences.
1394 if(code.indexOf(token) != -1)
1395 {
1396 final int rnd = (Rnd.fastRnd.nextInt() >>> 1);
1397 code = code.replace(token, String.valueOf(rnd));
1398 }
1399 }
1400 }
1401 return(code);
1402 }
1403 }
1404
1405 return(null); // No ad available. (Probably should not be able to happen.)
1406 }
1407
1408 /**Total weight of all simple ads; non-negative.
1409 * Strictly positive if there are some simple ads.
1410 */
1411 private final int totalSimpleAdWeight;
1412
1413 /**Recompute totalSimpleAdWeight; zero if arg is null.
1414 * @throws IllegalArgumentException if sum of weights would
1415 * exceed Integer.MAX_VALUE.
1416 */
1417 private static int _compute_totalSimpleAdWeight(final SimpleAd _simpleAds[])
1418 {
1419 if(_simpleAds == null) { return(0); }
1420 int result = 0;
1421 for(int i = _simpleAds.length; --i >= 0; )
1422 {
1423 final long r = result + (long) (_simpleAds[i].weight);
1424 if(r > Integer.MAX_VALUE)
1425 { throw new IllegalArgumentException("weights too large"); }
1426 result = (int) r;
1427 }
1428 return(result);
1429 }
1430
1431 /**Sorted array of simple ads, no empty slots, non-zero length; or null if no simple ads. */
1432 private /* final */ SimpleAd simpleAds[];
1433
1434 /**The maximum number of simple ads that we will entertain at once. */
1435 public static final int MAX_SIMPLE_ADS = 64;
1436
1437
1438 /**Name of prefix for classified ad code. */
1439 public static final String PNAME_WEBSVR_CLASSIFIEDAD_PREFIX = "pg2k.websvr.clad.";
1440
1441 /**Array of classified ads, no empty slots or duplicates, non-zero length; or null if no classified ads. */
1442 private /* final */ ClassifiedAd classifiedAds[];
1443
1444 /**The maximum number of classified ads that we will entertain at once. */
1445 public static final int MAX_CLASSIFIED_ADS = 64;
1446
1447 /**Overall URI regex to check for match against any classified ad; null iff no classified ads.
1448 * Not serialised; created on first use.
1449 * Never set null once non-null.
1450 * <p>
1451 * Marked volatile for thread-safe lock-free access.
1452 */
1453 private transient volatile Pattern _masterClassifiedMatchPattern;
1454 /**Returns List of possible classifieds that match the filters; null if no such ads.
1455 * The URI passed should usually be absolute, ie starting with '/'.
1456 * <p>
1457 * This inspects and filters by:
1458 * <ul>
1459 * <li>the URI regex,
1460 * <li>the current date vs any start/end dates,
1461 * <li>the user's ccTLD if supplied vs the countries list if supplied,
1462 * <li>the user's locale language if supplied vs the ad language if supplied.
1463 * </ul>
1464 * <p>
1465 * It may be desirable to shuffle the result
1466 * to avoid any unwanted bias/ordering
1467 * and to minimise ad-blindness.
1468 *
1469 * @param uri the display URI; must be non-null
1470 * @param locale if non-null then ads are filtered by the ad language,
1471 * if null then we do not filter by locale/language
1472 * @param addr if non-null then ads are filtered by the country indicated
1473 * by this client IP address,
1474 * if null then we do not filter by country
1475 *
1476 * @return undefined-order non-empty List of possible matching classifieds, else null.
1477 */
1478 public List<ClassifiedAd> getClassifieds(final URI uri,
1479 final Locale locale,
1480 final InetAddress addr)
1481 {
1482 // If no classifieds then always return null immediately.
1483 if(classifiedAds == null) { return(null); }
1484
1485 // Match against URI path component only.
1486 final String path = uri.getPath();
1487 // No match is possible if there is no path component.
1488 if(path == null) { return(null); }
1489
1490 // If there is a matcher filter/matcher
1491 // to check against all the URI patterns in one go
1492 // then test against it now.
1493 final Pattern master = _masterClassifiedMatchPattern;
1494 if((master != null) && !master.matcher(path).find()) { return(null); }
1495
1496 // Prepare to filter by current time/date.
1497 final long now = System.currentTimeMillis();
1498
1499 final List<ClassifiedAd> result = new ArrayList<ClassifiedAd>(classifiedAds.length);
1500 for(final ClassifiedAd clad : classifiedAds)
1501 {
1502 // Filter by (optional) start/end dates. Should be very fast.
1503 if((clad.start != 0) && (now < clad.start)) { continue; }
1504 if((clad.end != 0) && (now >= clad.end)) { continue; }
1505 // Filter by (optional) language, if provided. Should be quick.
1506 if((locale != null) && clad.wrongLanguage(locale)) { continue; }
1507 // Filter by URI regex. May be moderately slow.
1508 if(!clad.getPattern().matcher(path).find()) { continue; }
1509 // Filter by (optional) allowed countries. May be (very) slow, so do last.
1510 if((addr != null) && clad.wrongCountry(GeoUtils.getCCTLDByAddress(addr, false))) { continue; }
1511 // Add the successfully-matched ad to the result.
1512 result.add(clad);
1513 }
1514 if(result.isEmpty())
1515 {
1516 // If there are many classifieds but none matched this time
1517 // (and we don't already have a master matcher)
1518 // then it may be worthwhile for subsequent calls
1519 // to have available an overall master matcher
1520 // to quickly reject any URI that will never match any current ad.
1521 if((master == null) && (classifiedAds.length > 2))
1522 {
1523 // Build a compound expression accepting the union of the matches.
1524 final StringBuilder sb = new StringBuilder();
1525 for(final ClassifiedAd clad : classifiedAds)
1526 {
1527 if(sb.length() == 0) { sb.append('('); }
1528 else { sb.append("|("); }
1529 sb.append(clad.regex);
1530 sb.append(')');
1531 }
1532 _masterClassifiedMatchPattern = Pattern.compile(sb.toString());
1533 }
1534
1535 return(null);
1536 }
1537
1538 return(result);
1539 }
1540
1541
1542 /**Get AuthData for specified author, or null if none.
1543 * @param auth author initials of desired data.
1544 */
1545 public synchronized AuthData getAuthData(final CharSequence auth)
1546 {
1547 if(authDB == null) { return(null); }
1548 return(authDB.get(auth.toString()));
1549 }
1550
1551 /**Property name prefix for author details. */
1552 public static final String PPREFIX_AUTH_DETAILS = "pg2k.authdb.";
1553
1554 /**Immutable SortedMap of author details, no empty or duplicate slots; non-zero length, or null if none. */
1555 private /* final */ SortedMap<String,AuthData> authDB;
1556
1557 /**The maximum number of author database entries. */
1558 public static final int MAX_AUTH_ENTRIES = 1024;
1559
1560 /**Class holding details of one author; immutable, serialisable.
1561 * Author initial must be valid syntactically;
1562 * text must be 7-bit printable (32--126) ASCII/HTML data.
1563 * <p>
1564 * We assume that the on-the-wire format for members is reasonably
1565 * efficient, especially if the transport layer includes compression.
1566 * <p>
1567 * This supports equals(), hashCode() and compareTo(); two objects
1568 * are equal iff all the fields are, and the sort order is by author.
1569 */
1570 public static final class AuthData implements Serializable,
1571 ObjectInputValidation,
1572 Comparable<AuthData>,
1573 MemoryTools.Internable
1574 {
1575 /**Author initials: must be syntactically valid; non-null. */
1576 public final String auth;
1577
1578 /**Author name: must be pure 7-bit printable ASCII HTML with possible entities; not empty, non-null. */
1579 public final String name;
1580
1581 /**Author home-page URL: should be syntactically valid URL or null. */
1582 public final String www;
1583
1584 /**Author email: should be syntactically valid email address or null. */
1585 public final String email;
1586
1587 /**Description HTML code as 7-bit ASCII; never empty but can be null. */
1588 public final String desc;
1589
1590 /**Maximum HTML description length in bytes/characters. */
1591 public static final int MAX_DESC_CHARACTERS = 2048;
1592
1593 /**Equality depends on all fields. */
1594 @Override
1595 public boolean equals(final Object o)
1596 {
1597 if(!(o instanceof AuthData)) { return(false); }
1598 final AuthData other = (AuthData) o;
1599 if(!auth.equals(other.auth)) { return(false); }
1600 if(!name.equals(other.name)) { return(false); }
1601 if((www == null) ? (other.www != null) : !www.equals(other.www)) { return(false); }
1602 if((email == null) ? (other.email != null) : !email.equals(other.email)) { return(false); }
1603 if((desc == null) ? (other.desc != null) : !desc.equals(other.desc)) { return(false); }
1604 return(true);
1605 }
1606
1607 /**Hash depends on author. */
1608 @Override
1609 public int hashCode() { return(auth.hashCode()); }
1610
1611 /**Provides total ordering, by author. */
1612 public int compareTo(final AuthData o)
1613 { return(auth.compareTo(((AuthData) o).auth)); }
1614
1615 /**Construct new author details from single string.
1616 * The String should be of the form:
1617 * <samp>Real Name|HomePageURL|email|description</samp>
1618 * where any item can be blank except the name.
1619 */
1620 AuthData(final String authInitials,
1621 final String pipeDelimitedString)
1622 {
1623 this(authInitials,
1624 _getSection(pipeDelimitedString, 0),
1625 _getSection(pipeDelimitedString, 1),
1626 _getSection(pipeDelimitedString, 2),
1627 _getSection(pipeDelimitedString, 3));
1628 }
1629
1630 /**Extract given section from pipe-delimited String.
1631 * Skip given number of pipe symbols ("|")
1632 * then return next up to following one if present.
1633 * Returns null if section not present.
1634 */
1635 private static String _getSection(final String s, final int section)
1636 {
1637 int prevPipePos = -1;
1638 for(int i = section; --i >= 0; )
1639 {
1640 final int nextPipe = s.indexOf('|', prevPipePos+1);
1641 // Return null if requested section not present.
1642 if(nextPipe == -1) { return(null); }
1643 // Else adjust start marker.
1644 prevPipePos = nextPipe;
1645 }
1646
1647 // Find the start of the following section, if any.
1648 final int nextPipePos = s.indexOf('|', prevPipePos+1);
1649
1650 // No following section; return the rest of the string.
1651 if(nextPipePos == -1)
1652 { return(s.substring(prevPipePos+1)); }
1653
1654 // Return the requested section.
1655 return(s.substring(prevPipePos+1, nextPipePos));
1656 }
1657
1658 /**Construct new author details from individial components.
1659 * @param authInitials the syntactally-valid author initials; non-null
1660 * @param authName real name of author; not empty nor null
1661 * @param homePageURL valid http URL, will be stripped,
1662 * empty text is converted to null
1663 * @param emailAddress valid email address, will be stripped,
1664 * empty text is converted to null
1665 * @param descriptionText the descriptive text, will be stripped,
1666 * empty text is converted to null
1667 *
1668 * Not given public access as only GenProps should need to construct.
1669 */
1670 AuthData(final String authInitials,
1671 final String authName,
1672 String homePageURL,
1673 String emailAddress,
1674 String descriptionText)
1675 {
1676 // Canonicalise URL.
1677 if(homePageURL != null)
1678 {
1679 homePageURL = homePageURL.trim();
1680 if(homePageURL.length() == 0)
1681 { homePageURL = null; }
1682 }
1683
1684 // Canonicalise email address.
1685 if(emailAddress != null)
1686 {
1687 emailAddress = emailAddress.trim();
1688 if(emailAddress.length() == 0)
1689 { emailAddress = null; }
1690 }
1691
1692 // Canonicalise description text.
1693 if(descriptionText != null)
1694 {
1695 descriptionText = descriptionText.trim();
1696 if(descriptionText.length() == 0)
1697 { descriptionText = null; }
1698 }
1699
1700 // Use intern() mainly to reduce old-heap memory churn.
1701 auth = MemoryTools.intern(authInitials);
1702 name = MemoryTools.intern(authName.trim());
1703 www = MemoryTools.intern(homePageURL);
1704 email = MemoryTools.intern(emailAddress);
1705 desc = MemoryTools.intern(descriptionText);
1706
1707 // Verify object state and thus validate parameters...
1708 try { validateObject(); }
1709 catch(final InvalidObjectException e)
1710 { throw new IllegalArgumentException(e.getMessage()); }
1711 }
1712
1713 /**Our serialisation version. */
1714 private static final long serialVersionUID = 2647433856891011944L;
1715
1716 /**Deserialise. */
1717 private void readObject(final ObjectInputStream in)
1718 throws IOException, ClassNotFoundException
1719 {
1720 in.defaultReadObject();
1721 validateObject(); // Validate state immediately.
1722 }
1723
1724 /**Validate fields/state.
1725 * Called in the constructor and possibly after de-serialising.
1726 * <p>
1727 * Barf if something bad is found.
1728 * (Maybe allow some extra info in debug version.)
1729 */
1730 public void validateObject()
1731 throws InvalidObjectException
1732 {
1733 // Validate author initials...
1734 if(!ExhibitName.validAuthorSyntax(auth))
1735 { throw new InvalidObjectException("bad object: author initials invalid"); }
1736
1737 if((name == null) || (name.length() == 0) ||
1738 !name.equals(name.trim()))
1739 { throw new InvalidObjectException("bad object: author name missing or too short or non-canonical"); }
1740 for(int i = name.length(); --i >= 0; )
1741 {
1742 final char c = name.charAt(i);
1743 if((c < 32) || (c > 126))
1744 { throw new InvalidObjectException("bad object: author name contains bad char " + ((int) c)); }
1745 }
1746
1747 // Validate URL if not null.
1748 // * Start with "http://".
1749 // * Parseable...
1750 if(www != null)
1751 {
1752 if(!www.equals(www.trim()))
1753 { throw new InvalidObjectException("bad object: WWW address non-canonical"); }
1754 if(!www.startsWith("http://"))
1755 { throw new InvalidObjectException("bad object: WWW address seems invalid"); }
1756 try { new URL(www); }
1757 catch(final MalformedURLException e)
1758 { throw new InvalidObjectException("bad object: WWW address unparseable"); }
1759 }
1760
1761 // Validate email address if not null.
1762 // * At least 5 characters (eg "a@b.c").
1763 // * Contains exactly one "@".
1764 if(email != null)
1765 {
1766 if(!email.equals(email.trim()))
1767 { throw new InvalidObjectException("bad object: email address non-canonical"); }
1768 if(email.length() < 5)
1769 { throw new InvalidObjectException("bad object: email address too short"); }
1770 final int firstAt = email.indexOf('@');
1771 if(firstAt < 1)
1772 { throw new InvalidObjectException("bad object: email address user portion missing"); }
1773 if(firstAt != email.lastIndexOf('@'))
1774 { throw new InvalidObjectException("bad object: email address contains more than one @"); }
1775 }
1776
1777 // HTML code must be non-empty (but can be null),
1778 // 7-bit printable ASCII (and CRLF).
1779 if(desc != null)
1780 {
1781 if(!desc.equals(desc.trim()))
1782 { throw new InvalidObjectException("bad object: HTML desc text non-canonical"); }
1783 if((desc.length() == 0) || (desc.length() > MAX_DESC_CHARACTERS))
1784 { throw new InvalidObjectException("bad object: HTML desc text length silly"); }
1785 for(int i = desc.length(); --i >= 0; )
1786 {
1787 final char c = desc.charAt(i);
1788 if(((c < 32) || (c > 126)) &&
1789 ((c != '\r') && (c != '\n')))
1790 { throw new InvalidObjectException("bad object: HTML desc text contains bad char " + ((int) c)); }
1791 }
1792 }
1793 }
1794 }
1795
1796 /**Prefix for static "goodness"/popularity weighting values. */
1797 public static final String PPREFIX_POPWT_DETAILS = "pg2k.popweight.";
1798
1799 /**Goodness subcomponent by author; includes trailing dot. */
1800 public static final String PCOMP_POPWR_BYAUTH = "byAuth.";
1801
1802 /**Goodness subcomponent by type/extension; includes trailing dot. */
1803 public static final String PCOMP_POPWR_BYTYPE = "byType.";
1804
1805 /**Goodness subcomponent by attribute; includes trailing dot. */
1806 public static final String PCOMP_POPWR_BYATTR = "byAttr.";
1807
1808 /**The maximum number of static "goodness"/popularity database entries. */
1809 public static final int MAX_POPWT_ENTRIES = 256;
1810
1811 /**The maximum allowed "goodness"/popularity weight (maximally good); strictly negative. */
1812 public static final byte MAX_POPWT_VAL = 100;
1813
1814 /**The minimum allowed "goodness"/popularity weight (maximally bad); strictly negative. */
1815 public static final byte MIN_POPWT_VAL = -MAX_POPWT_VAL;
1816
1817 /**Map from String token to Byte ([-100,+100]) weighting.
1818 * We cheat and assume the that sets of author, attribute words and extensions
1819 * are disjoint, and so we keep all the mappings in a single table!
1820 */
1821 private /* final */ Map<String,Byte> popWeights;
1822
1823 /**Get weighting by author in range ([-100,+100]); null if no weighting for specified author.
1824 * Author initials specified must be syntactically valid.
1825 * <p>
1826 * Note that this should be treated as if a value of +1.0
1827 * with a correlation of the returned value divided by MAX_POPWT_VAL.
1828 */
1829 public synchronized Byte getPopWeightForAuth(final CharSequence auth)
1830 {
1831 if(!ExhibitName.validAuthorSyntax(auth))
1832 { throw new IllegalArgumentException(); }
1833 final Map<String,Byte> p = popWeights;
1834 if(p == null) { return(null); }
1835 return(p.get(auth.toString()));
1836 }
1837
1838 /**Get weighting by type/extension in range ([-100,+100]); null if no weighting for specified type.
1839 * Extension/type specified must be syntactically valid and known to the system.
1840 * <p>
1841 * Note that this should be treated as if a value of +1.0
1842 * with a correlation of the returned value divided by MAX_POPWT_VAL.
1843 */
1844 public synchronized Byte getPopWeightForType(final CharSequence type)
1845 {
1846 if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
1847 { throw new IllegalArgumentException(); }
1848 final Map<String,Byte> p = popWeights;
1849 if(p == null) { return(null); }
1850 return(p.get(type.toString()));
1851 }
1852
1853 /**Get weighting by attribute word in range ([-100,+100]); null if no weighting for specified attribute.
1854 * Attribute word specified must be syntactically valid and known to the system.
1855 * <p>
1856 * Note that this should be treated as if a value of +1.0
1857 * with a correlation of the returned value divided by MAX_POPWT_VAL.
1858 */
1859 public synchronized Byte getPopWeightForAttr(final String attrWord)
1860 {
1861 if(!ExhibitName.validAttributeWord(attrWord))
1862 { throw new IllegalArgumentException(); }
1863 final Map<String,Byte> p = popWeights;
1864 if(p == null) { return(null); }
1865 return(p.get(attrWord));
1866 }
1867
1868
1869 /**Name of Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1870 public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER = "pg2k.websvr.ex.hotlinklimiter";
1871 /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1872 private final byte WEBSVR_EX_HOTLINK_LIMITER;
1873 /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads).
1874 * Constrained to the range approx 0 to 255, default 0.
1875 * <p>
1876 * This limit is too prevent too much (lazy or bandwidth-theft) direct hot-linking
1877 * to exhibits (and/or thumbnails) by external Web sites.
1878 * <p>
1879 * There <em>are</em> bona fide reasons for some direct hotlinks, eg:
1880 * <ul>
1881 * <li>From some image search engines.
1882 * <li>From small Web sites and/or from contributors' sites.
1883 * </ul>
1884 */
1885 public final int getWEBSVR_EX_HOTLINK_LIMITER()
1886 { return(WEBSVR_EX_HOTLINK_LIMITER & 0xff); }
1887
1888 /**Name of Web server hotlinked-download-limiter divert graphic URL value. */
1889 public static final String PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL = "pg2k.websvr.ex.hotlinklimiter.altURL";
1890 /**Web server hotlinked-download-limiter divert graphic URL value. */
1891 private final String WEBSVR_EX_HOTLINK_DIVERT_URL;
1892 /**Web server hotlinked-download-limiter value divert graphic URL value, or null if none.
1893 * Absolute URL of an alternate graphic to divert a user's brower to
1894 * when they have been hotlinked to one of our exhibits,
1895 * or null if the user is simply to get a 5XX error.
1896 * <p>
1897 * Preferably a small image with a long cache time.
1898 */
1899 public final String getWEBSVR_EX_HOTLINK_DIVERT_URL()
1900 { return(WEBSVR_EX_HOTLINK_DIVERT_URL); }
1901
1902 /**Maximum number of allow/disallow hotlink hosts that may be specified. */
1903 public static final int MAX_ALLOW_DISALLOW_HOTLINK_HOSTS = 64;
1904
1905 /**Name of Web server hotlinked-download-host-allow set. */
1906 public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW = "pg2k.websvr.ex.hotlinklimiter.allowhosts";
1907
1908 /**Name of Web server hotlinked-download-host-disallow set. */
1909 public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW = "pg2k.websvr.ex.hotlinklimiter.disallowhosts";
1910
1911 /**Immutable Set of "allow" hotlink hosts (normalised host names); can be null for no such hosts. */
1912 private /* final */ Set<String> hotLinkAllowHosts;
1913
1914 /**Immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1915 private final java.util.regex.Pattern hotLinkAllowHostsRegex;
1916
1917 /**Immutable Set of "disallow" hotlink hosts (normalised/stripped/lower-cased host names); can be null for no such hosts. */
1918 private /* final */ Set<String> hotLinkDisallowHosts;
1919
1920 /**Immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1921 private final java.util.regex.Pattern hotLinkDisallowHostsRegex;
1922
1923 /**Get immutable Set of "allow" hotlink hosts (normalised host names); never null. */
1924 public final Set<String> getHotLinkAllowHosts()
1925 {
1926 final Set<String> r = hotLinkAllowHosts;
1927 if(r == null)
1928 {
1929 final Set<String> noHosts = Collections.emptySet();
1930 return(noHosts);
1931 }
1932 return(r);
1933 }
1934
1935 /**Get immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1936 public final Pattern getHotLinkAllowHostsRegex()
1937 {
1938 return(hotLinkAllowHostsRegex);
1939 }
1940
1941 /**Get immutable Set of "disallow" hotlink hosts (normalised host names); never null. */
1942 public final Set<String> getHotLinkDisallowHosts()
1943 {
1944 final Set<String> r = hotLinkDisallowHosts;
1945 if(r == null)
1946 {
1947 final Set<String> noHosts = Collections.emptySet();
1948 return(noHosts);
1949 }
1950 return(r);
1951 }
1952
1953 /**Get immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1954 public final Pattern getHotLinkDisallowHostsRegex()
1955 {
1956 return(hotLinkDisallowHostsRegex);
1957 }
1958
1959 /**Returns true if this may be a regex (ie contains non-DNS-safe chars).
1960 * If a user-specified hostname contains characters
1961 * other than [-a-zA-Z0-9.]
1962 * then we assume that it may be intended as a regex.
1963 *
1964 * @param n non-null non-empty user-specified non-normalised hostname
1965 */
1966 private static boolean _mayBeRegexHostname(final String n)
1967 {
1968 assert((n != null) && (n.length() != 0));
1969 for(int i = n.length(); --i >= 0; )
1970 {
1971 final char c = n.charAt(i);
1972 if((c >= 'a') && (c <= 'z')) { continue; }
1973 if((c >= 'A') && (c <= 'Z')) { continue; }
1974 if((c >= '0') && (c <= '9')) { continue; }
1975 if((c == '.') || (c == '-')) { continue; }
1976 return(true); // Not a plain DNS-safe name.
1977 }
1978 return(false); // Seems to be a plain name.
1979 }
1980
1981 /**Make single compiled pattern from all regex-match hostname expressions; null if no such expressions.
1982 * The pattern will match if any of the sub-patterns match.
1983 * <p>
1984 * This will whinge about and drop any indiviual non-compilable patterns.
1985 * No whingeing is done, however, if the passed log is null!
1986 */
1987 private static Pattern _makeResidualRegexHostMatch(final Set<String> rawNames,
1988 final SimpleLoggerIF logger)
1989 {
1990 if(rawNames == null) { return(null); }
1991
1992 final StringBuilder compoundExpr = new StringBuilder();
1993
1994 for(final String rawName : rawNames)
1995 {
1996 try
1997 {
1998 // Ignore plain host names that are not regexes...
1999 if(!_mayBeRegexHostname(rawName)) { continue; }
2000
2001 // See if this expression can be compiled, ie is valid...
2002 try { Pattern.compile(rawName, Pattern.CASE_INSENSITIVE); }
2003 catch(final Exception e)
2004 {
2005 if(logger != null)
2006 { logger.log("WARNING: GenProps: ignoring unusable/bad hostname regex: `"+rawName+"'"); }
2007 continue; // Skip this unusable pattern.
2008 }
2009
2010 // Append this pattern to the final set...
2011 if(compoundExpr.length() != 0) { compoundExpr.append('|'); }
2012 compoundExpr.append('(').append(rawName).append(')');
2013 }
2014 catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2015 }
2016
2017 // If no regexes then return null.
2018 if(compoundExpr.length() == 0) { return(null); }
2019
2020 // Return compiled expression...
2021 return(Pattern.compile(compoundExpr.toString(), Pattern.CASE_INSENSITIVE));
2022 }
2023
2024 /**Normalise Set of normalised host names; result is immutable and not empty, or is null.
2025 * Normalises the set of names provided,
2026 * discarding any that seem to be invalid,
2027 * and stopping when/if we reach the size limit specified.
2028 * <p>
2029 * This skips any that seem to be regexes.
2030 *
2031 * @param rawNames set of raw host names; if null then null is returned
2032 * @param maxSize maximum size of result set; strictly positive
2033 */
2034 private static Set<String> _normaliseHostList(final Set<String> rawNames,
2035 final int maxSize)
2036 {
2037 if((maxSize < 1))
2038 { throw new IllegalArgumentException(); }
2039
2040 if(rawNames == null) { return(null); }
2041
2042 final Set<String> rawResult = new HashSet<String>(1 + 2*Math.min(maxSize, rawNames.size()));
2043
2044 for(final String rawName : rawNames)
2045 {
2046 try
2047 {
2048 if(_mayBeRegexHostname(rawName)) { continue; }
2049 final String normName = MemoryTools.intern(HostUtils.normaliseVirtualHostName(rawName));
2050 rawResult.add(normName); // Add to the result...
2051 if(rawResult.size() >= maxSize) { break; /* Stop when we have enough. */ }
2052 }
2053 catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2054 }
2055
2056 // Return null rather than an empty set.
2057 if(rawResult.size() == 0) { return(null); }
2058
2059 // Make sure that the result is immutable.
2060 return(Collections.unmodifiableSet(rawResult));
2061 }
2062
2063 /**Parse set of (space-separated) hostnames; returns null for an empty list.
2064 * This does not validate or normalise the names supplied.
2065 *
2066 * @param hostList space-separate list of hostnames (or null for none)
2067 */
2068 private Set<String> _parseHostList(final String hostList)
2069 {
2070 if((hostList == null) || (hostList.length() == 0)) { return(null); }
2071
2072 final StringTokenizer st = new StringTokenizer(hostList);
2073 if(st.countTokens() == 0) { return(null); }
2074
2075 final Set<String> result = new HashSet<String>(1 + 2*st.countTokens());
2076 while(st.hasMoreTokens())
2077 { result.add(st.nextToken()); }
2078
2079 return(result);
2080 }
2081
2082
2083 /**Maximum number of DNSBLs that may be specified. */
2084 public static final int MAX_DNSBLS = 8;
2085
2086 /**Name of Web server DNSRBLs set. */
2087 public static final String PNAME_WEBSVR_DNSBLs = "org.hd.d.pg2k.DNSBLs";
2088
2089 /**Immutable Set of DNS BLs (normalised DNS zone names); can be null for no such hosts. */
2090 private /* final */ Set<String> DNSBLs;
2091
2092 /**Get immutable Set of DNSBLs (normalised DNS zone names); never null. */
2093 public final Set<String> getDNSBLs()
2094 {
2095 final Set<String> r = DNSBLs;
2096 if(r == null)
2097 {
2098 // Return empty set if none.
2099 final Set<String> noHosts = Collections.emptySet();
2100 return(noHosts);
2101 }
2102 return(r);
2103 }
2104 }