001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.svrCore.props;
030
031 import java.io.IOException;
032 import java.io.InvalidObjectException;
033 import java.io.ObjectInputStream;
034 import java.io.ObjectInputValidation;
035 import java.io.Serializable;
036 import java.net.InetAddress;
037 import java.net.MalformedURLException;
038 import java.net.URI;
039 import java.net.URL;
040 import java.util.ArrayList;
041 import java.util.Arrays;
042 import java.util.Calendar;
043 import java.util.Collections;
044 import java.util.Date;
045 import java.util.GregorianCalendar;
046 import java.util.HashMap;
047 import java.util.HashSet;
048 import java.util.Iterator;
049 import java.util.List;
050 import java.util.Locale;
051 import java.util.Map;
052 import java.util.Properties;
053 import java.util.Set;
054 import java.util.SortedMap;
055 import java.util.SortedSet;
056 import java.util.StringTokenizer;
057 import java.util.TimeZone;
058 import java.util.TreeMap;
059 import java.util.TreeSet;
060 import java.util.regex.Pattern;
061 import java.util.regex.PatternSyntaxException;
062
063 import org.hd.d.pg2k.svrCore.CoreConsts;
064 import org.hd.d.pg2k.svrCore.ExhibitName;
065 import org.hd.d.pg2k.svrCore.GenUtils;
066 import org.hd.d.pg2k.svrCore.HostUtils;
067 import org.hd.d.pg2k.svrCore.MemoryTools;
068 import org.hd.d.pg2k.svrCore.Name;
069 import org.hd.d.pg2k.svrCore.Rnd;
070 import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
071 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
072 import org.hd.d.pg2k.svrCore.location.GeoUtils;
073 import org.hd.d.pg2k.svrCore.location.GeoUtils.CCTLD;
074
075 import ORG.hd.d.IsDebug;
076
077 /**This holds run-time settable properties for the master and mirrors.
078 * It is immutable, and can be persisted or sent over RMI-IIOP.
079 * <p>
080 * All values are accessed through getXXX() methods
081 * (this object can be used as a bean)
082 * to allow us to always constrain values to sensible limits
083 * (and, for example, to patch up up any values that have `gone funny'
084 * in serialissation because (say) because the class version changed
085 * and fields were added)
086 * and to allow for local overrides from properties.
087 * This means that some of the getXXXX() methods have to be synchronized.
088 * <p>
089 * We also extensively check object state at construction and
090 * deserialisation; and we try to impose explicit or implicit limits
091 * on the amount of space that an instance of this object can consume,
092 * especially because at a transition between an old version and a new one
093 * there may be multiple instances floating about in memory.
094 * <p>
095 * We don't retain the original raw properties internally, but parse them at
096 * construction time, to do most of the expensive work once if possible,
097 * and to keep the serialised form of the object small.
098 * Defaults are set at parse/construction time,
099 * whereas limits are imposed at getXXX() time which makes for
100 * a lot of robustness if the object is damaged in transit or if
101 * the class details change slightly.
102 * <p>
103 * The version manufactured with a default constructor has a zero timestamp.
104 */
105 public final class GenProps implements Serializable, ObjectInputValidation
106 {
107 /**Our serialisation version. */
108 private static final long serialVersionUID = 8715644348905699663L;
109
110 /**Deserialise. */
111 private void readObject(final ObjectInputStream in)
112 throws IOException, ClassNotFoundException
113 {
114 in.defaultReadObject();
115
116 // Take defensive immutable copy of gen, if present, else make it empty.
117 if(gen == null)
118 { gen = Collections.emptyMap(); }
119 else
120 { gen = Collections.unmodifiableMap(new HashMap<String,String>(gen)); }
121
122 // Take defensive copy of simpleAds[] if present, and re-sort.
123 if(simpleAds != null)
124 {
125 simpleAds = simpleAds.clone();
126 Arrays.sort(simpleAds);
127 }
128 // Take defensive copy of classifiedAds[] if present.
129 if(classifiedAds != null)
130 { classifiedAds = classifiedAds.clone(); }
131
132 // Take defensive copy of authDB if present.
133 if(authDB != null)
134 { authDB = Collections.unmodifiableSortedMap(new TreeMap<String, AuthData>(authDB)); }
135
136 // Take defensive copy of popWeights if present.
137 if(popWeights != null)
138 { popWeights = Collections.unmodifiableSortedMap(new TreeMap<String, Byte>(popWeights)); }
139
140 // Take defensive copies of hotlink hot/cold sets and DNSBLs,
141 // but renormalise hostnames and truncate to size if need be.
142 if(hotLinkAllowHosts != null)
143 { hotLinkAllowHosts = _normaliseHostList(hotLinkAllowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
144 if(hotLinkDisallowHosts != null)
145 { hotLinkDisallowHosts = _normaliseHostList(hotLinkDisallowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
146 if(DNSBLs != null)
147 { DNSBLs = _normaliseHostList(DNSBLs, MAX_DNSBLS); }
148
149
150 validateObject(); // Validate state immediately.
151 }
152
153 /**Validate fields/state.
154 * Called in the constructor and possibly after de-serialising.
155 * <p>
156 * Barf if something bad is found.
157 * (Maybe allow some extra info in debug version.)
158 */
159 public void validateObject()
160 throws InvalidObjectException
161 {
162 // Check that all components are sane and safe.
163 if(timestamp < 0)
164 { throw new InvalidObjectException("bad object: timestamp < 0"); }
165
166 // Verify generic properties.
167 if(gen == null)
168 { throw new InvalidObjectException("bad object: gen == null"); }
169 if(gen.size() > MAX_GEN_PROPS)
170 { throw new InvalidObjectException("bad object: gen too large"); }
171 for(final Object kO : gen.keySet())
172 {
173 if(!(kO instanceof String))
174 { throw new InvalidObjectException("bad object: gen key not a String"); }
175 final String k = (String) kO;
176 if(!isSafeGenPropValue(k))
177 { throw new InvalidObjectException("bad object: gen key not valid"); }
178
179 final Object vO = gen.get(k);
180 if(!(vO instanceof String))
181 { throw new InvalidObjectException("bad object: gen value not a String"); }
182 final String v = (String) vO;
183 if(!isSafeGenPropValue(v))
184 { throw new InvalidObjectException("bad object: gen value not valid"); }
185 }
186
187 // Check that HTML meta-header text is still printable ASCII excluding ".
188 if(!isSafeHTMLMetaHeaderString(WEBSVR_META_KEYWORDS))
189 { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_KEYWORDS); }
190 if(!isSafeHTMLMetaHeaderString(WEBSVR_META_DESCRIPTION))
191 { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_DESCRIPTION); }
192 // Check that background image name, if set, is syntactically valid.
193 // Check that it is a CharSequence or String.
194 // We also check that it is a JPEG image.
195 if(WEBSVR_BG_IMAGE != null)
196 {
197 final boolean isString = (WEBSVR_BG_IMAGE instanceof String);
198 final boolean isExhibitFull = (!isString) && (WEBSVR_BG_IMAGE instanceof Name.ExhibitFull);
199 if((!isString) && (!isExhibitFull))
200 { throw new InvalidObjectException("bad object: unsafe type " + WEBSVR_BG_IMAGE.getClass().getName()); }
201 if((!isExhibitFull) && (!ExhibitName.validNameSyntax(WEBSVR_BG_IMAGE)))
202 { throw new InvalidObjectException("bad object: unsafe name " + PNAME_WEBSVR_BG_IMAGE); }
203 final ExhibitMIME.ExhibitTypeParameters et =
204 ExhibitMIME.getInputFileType(WEBSVR_BG_IMAGE);
205 if((et == null) || (et.type != ExhibitMIME.ET_JPEG))
206 { throw new InvalidObjectException("bad object: unsafe exhibit type " + PNAME_WEBSVR_BG_IMAGE); }
207 }
208
209 // Check authDB for consistency.
210 if(authDB != null)
211 {
212 if((authDB.size() == 0) || (authDB.size() > MAX_AUTH_ENTRIES))
213 { throw new InvalidObjectException("bad object: bad-length authDB"); }
214
215 // Check all entries are of the correct type (AuthData).
216 for(final Iterator it = authDB.keySet().iterator(); it.hasNext(); )
217 {
218 final Object key = it.next();
219 if(!(key instanceof String))
220 { throw new InvalidObjectException("bad object: bad key in authDB"); }
221 if(!(authDB.get(key) instanceof AuthData))
222 { throw new InvalidObjectException("bad object: bad value in authDB"); }
223 }
224 }
225
226 // Check "goodness"/popularity weights for consistency.
227 if(popWeights != null)
228 {
229 if((popWeights.size() == 0) || (popWeights.size() > MAX_POPWT_ENTRIES))
230 { throw new InvalidObjectException("bad object: bad-length popWeights"); }
231
232 // Check all entries are of the correct type (AuthData).
233 for(final Iterator<String> it = popWeights.keySet().iterator(); it.hasNext(); )
234 {
235 final String key = it.next();
236 if(!ExhibitName.validAuthorSyntax(key) &&
237 !ExhibitName.validAttributeWord(key) &&
238 (ExhibitMIME.isValidInputExhibitNameExtension(key) == null))
239 { throw new InvalidObjectException("bad object: bad key in popWeights"); }
240 final Byte val = popWeights.get(key);
241 if((val == null) ||
242 (val.byteValue() < MIN_POPWT_VAL) ||
243 (val.byteValue() > MAX_POPWT_VAL))
244 { throw new InvalidObjectException("bad object: bad value in popWeights"); }
245 }
246 }
247
248 // Check ads for consistency.
249 // Don't waste space with zero-length simpleAds.
250 if((simpleAds != null) &&
251 ((simpleAds.length == 0) || (simpleAds.length > MAX_SIMPLE_ADS)))
252 { throw new InvalidObjectException("bad object: bad-length simpleAds[]"); }
253 // Ensure no nulls in simpleAds,
254 // and that ads are sorted.
255 if(simpleAds != null)
256 {
257 for(int i = simpleAds.length; --i >= 0; )
258 {
259 if(simpleAds[i] == null)
260 { throw new InvalidObjectException("bad object: simpleAds[] contains null"); }
261 // Check pair-wise sort order...
262 // The ordering should be total,
263 // but we'll live with monotonic.
264 if(i > 0)
265 {
266 if(simpleAds[i-1].compareTo(simpleAds[i]) > 0)
267 { throw new InvalidObjectException("bad object: simpleAds[] not ordered"); }
268 }
269 }
270 }
271 // Check correct total for simpleAds.
272 if(totalSimpleAdWeight != _compute_totalSimpleAdWeight(simpleAds))
273 { throw new InvalidObjectException("bad object: totalSimpleAdWeight inconsistent"); }
274 if(totalSimpleAdWeight < 0)
275 { throw new InvalidObjectException("bad object: totalSimpleAdWeight < 0"); }
276 // Don't waste space with zero-length classifiedAds.
277 if((classifiedAds != null) &&
278 ((classifiedAds.length == 0) || (classifiedAds.length > MAX_CLASSIFIED_ADS)))
279 { throw new InvalidObjectException("bad object: bad-length classifiedAds[]"); }
280 // Ensure no nulls in classifiedAds,
281 // TODO: and that ads are unique.
282 if(classifiedAds != null)
283 {
284 for(int i = classifiedAds.length; --i >= 0; )
285 {
286 if(classifiedAds[i] == null)
287 { throw new InvalidObjectException("bad object: classifiedAds[] contains null"); }
288 }
289 }
290 // There must be no ads held internally if ads are switched off.
291 if(!WEBSVR_SIMPLEAD_ALLOW &&
292 ((totalSimpleAdWeight != 0) || (simpleAds != null) || (classifiedAds != null)))
293 { throw new InvalidObjectException("bad object: simple ads loaded though disabled"); }
294
295 // Verify hotlinker diversion URL is valid (http) URL.
296 if(WEBSVR_EX_HOTLINK_DIVERT_URL != null)
297 {
298 try {
299 if(!"http".equals((new URL(WEBSVR_EX_HOTLINK_DIVERT_URL)).getProtocol()))
300 { throw new InvalidObjectException("bad object: invalid non-HTTP hotlink divert URL"); }
301 }
302 catch(final MalformedURLException e)
303 {
304 throw new InvalidObjectException("bad object: invalid unparseable hotlink divert URL");
305 }
306 }
307
308 // FIXME: Should check content to be normalised host names too...
309 if(hotLinkAllowHosts != null)
310 {
311 if(hotLinkAllowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
312 { throw new InvalidObjectException("bad object: too many allow hosts"); }
313 }
314 if(hotLinkDisallowHosts != null)
315 {
316 if(hotLinkDisallowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
317 { throw new InvalidObjectException("bad object: too many disallow hosts"); }
318 }
319 if(DNSBLs != null)
320 {
321 if(DNSBLs.size() > MAX_DNSBLS)
322 { throw new InvalidObjectException("bad object: too many DNSBLs"); }
323 }
324 }
325
326 /**Check if String is save as generic property key or value.
327 * Null and zero-length values are not permitted.
328 * <p>
329 * Only ASCII values in the range 32 to 126 (plus \r\n\t) are allowed.
330 *
331 * @param s the putative generic property key or value
332 * @return true iff the argument is a non-null, non-empty, not-too-long
333 * pure-ASCII value
334 */
335 private static final boolean isSafeGenPropValue(final String s)
336 {
337 // Check basic constraints.
338 if(s == null) { return(false); }
339 final int length = s.length();
340 if(length == 0) { return(false); }
341 if(length > MAX_GEN_LEN) { return(false); }
342
343 for(int i = length; --i >= 0; )
344 {
345 final char c = s.charAt(i);
346 if((c >= 32) || (c <= 126)) { continue; } // Usual case.
347 if((c == '\n') || (c == '\r') || (c == '\t')) { continue; } // Allowed whitespace.
348 return(false); // Bad character.
349 }
350
351 return(true); // Benign string.
352 }
353
354 /**Check if String is safe to use as HTML meta-header; return false if not.
355 * Null and empty string are regarded as OK.
356 */
357 private static final boolean isSafeHTMLMetaHeaderString(final String s)
358 {
359 if(s == null) { return(true); }
360 for(int i = s.length(); --i >= 0; )
361 {
362 if(!isSafeHTMLMetaHeaderChar(s.charAt(i)))
363 { return(false); }
364 }
365 return(true); // All seems OK!
366 }
367
368 // /**Flags for User-Agent pattern matching checking for mobile phones. */
369 // private static final int REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
370 //
371 // /**Regex expression for one character safe for an HTML meta-header.
372 // * Basically such a character must be printable ASCII, and
373 // * one of:
374 // * <ul>
375 // * <li>a letter
376 // * <li>a digit
377 // * <li>a space
378 // * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
379 // * </ul>
380 // */
381 // private static final String SAFE_HTML_META_HEADER_CHAR = "[a-zA-Z0-9+?!()\\[\\]{}/_.;:,@-]";
382 //
383 // /**Single 'safe meta header character' match. */
384 // private static final Pattern SAFE_HTML_META_HEADER_CHAR_PATTERN = Pattern.compile(SAFE_HTML_META_HEADER_CHAR, REGEX_FLAGS);
385
386 /**Check that character is safe for an HTML meta-header; return false if not.
387 * Basically such a character must be printable ASCII, and
388 * one of:
389 * <ul>
390 * <li>a letter
391 * <li>a digit
392 * <li>a space
393 * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
394 * </ul>
395 */
396 private static final boolean isSafeHTMLMetaHeaderChar(final char c)
397 {
398 switch(c)
399 {
400 // Letters...
401 case 'a': case 'A':
402 case 'b': case 'B':
403 case 'c': case 'C':
404 case 'd': case 'D':
405 case 'e': case 'E':
406 case 'f': case 'F':
407 case 'g': case 'G':
408 case 'h': case 'H':
409 case 'i': case 'I':
410 case 'j': case 'J':
411 case 'k': case 'K':
412 case 'l': case 'L':
413 case 'm': case 'M':
414 case 'n': case 'N':
415 case 'o': case 'O':
416 case 'p': case 'P':
417 case 'q': case 'Q':
418 case 'r': case 'R':
419 case 's': case 'S':
420 case 't': case 'T':
421 case 'u': case 'U':
422 case 'v': case 'V':
423 case 'w': case 'W':
424 case 'x': case 'X':
425 case 'y': case 'Y':
426 case 'z': case 'Z':
427 // Digits.
428 case '0': case '1': case '2': case '3': case '4':
429 case '5': case '6': case '7': case '8': case '9':
430 // Safe punctuation and space.
431 case ' ':
432 case ',': case '.': case ';': case ':':
433 case '-': case '+':
434 case '!': case '?':
435 case '(': case ')':
436 case '[': case ']':
437 case '{': case '}':
438 case '/': case '_': case '@':
439 { return(true); } // OK!
440
441 default:
442 { return(false); } // Not safe.
443 }
444 }
445
446 /**Construct a default, zero-timestamp set of generic properties.
447 */
448 public GenProps()
449 { this(new Properties(), 0, null); }
450
451 /**Construct a new, immutable, properties set.
452 * The properties must be non-null
453 * and the timestamp must be non-negative.
454 * <p>
455 * Minor problems with the properties themselves will
456 * be silently ignored or logged, and defaults substituted
457 * for broken or missing values.
458 */
459 public GenProps(final Properties props,
460 final long _timestamp)
461 { this(props, _timestamp, GenUtils.systemErrLogger); }
462
463 /**Construct a new, immutable, properties set.
464 * The properties must be non-null
465 * and the timestamp must be non-negative.
466 * <p>
467 * Minor problems with the properties themselves will
468 * be silently ignored or logged, and defaults substituted
469 * for broken or missing values.
470 *
471 * @param logger if non-null than non-fatal problems will be logged here
472 */
473 public GenProps(final Properties props,
474 final long _timestamp,
475 final SimpleLoggerIF logger)
476 {
477 if((props == null) || (_timestamp < 0))
478 { throw new IllegalArgumentException(); }
479 timestamp = _timestamp;
480
481 // Parse properties.
482 // Set up some working variables to help with parsing.
483 int iTmp;
484 // long lTmp;
485 String sTmp;
486 Boolean bTmp;
487
488 // Extract the generic key/value properties, if any.
489 final Map<String,String> g = new HashMap<String, String>();
490 for(final Object keyO : props.keySet())
491 {
492 if(!(keyO instanceof String))
493 { throw new IllegalArgumentException("bad properties key: not String"); }
494 final String key = (String) keyO;
495
496 if(!key.startsWith(GenPropsGenNames.GEN_PREFIX)) { continue; }
497
498 final String gKey = key.substring(GenPropsGenNames.GEN_PREFIX.length());
499 final String gVal = props.getProperty(key);
500
501 // Validate the values later.
502 g.put(gKey, gVal);
503 }
504 // If no generic keys then save space with shared empty Map.
505 if(g.size() == 0)
506 { gen = Collections.emptyMap(); }
507 else
508 { gen = Collections.unmodifiableMap(g); }
509
510 iTmp = -1;
511 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS, "120000"), 10); }
512 catch(final Exception e) { } // Ignore errors.
513 WEBSVR_MIN_EX_IMATTR_RECHECK_MS = iTmp;
514
515 iTmp = -1;
516 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_SYSPROPS_RECHECK_MS, "10013"), 10); }
517 catch(final Exception e) { } // Ignore errors.
518 WEBSVR_SYSPROPS_RECHECK_MS = iTmp;
519
520 iTmp = -1;
521 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES, "2123456"), 10); }
522 catch(final Exception e) { } // Ignore errors.
523 WEBSVR_MAX_CACHEABLE_EX_BYTES = iTmp;
524
525 iTmp = -1;
526 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_BW_LIMITER, "10"), 10); }
527 catch(final Exception e) { } // Ignore errors.
528 WEBSVR_BW_LIMITER = iTmp;
529
530 iTmp = -1;
531 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_TN_CACHEPC, "10"), 10); }
532 catch(final Exception e) { } // Ignore errors.
533 WEBSVR_TN_CACHEPC = iTmp;
534
535 iTmp = -1;
536 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_EX_BYTES, "1123456789"), 10); }
537 catch(final Exception e) { } // Ignore errors.
538 WEBSVR_MAX_EX_BYTES = iTmp;
539
540 // We canonicalise this keyword list to reduce the
541 // space it consumes (especially after compression), by:
542 // * Converting to lower-case (helps compression; helps drop dups).
543 // * Tokenize with "," and trim whitespace around each token.
544 // * We could eliminate duplicate whitespace within a single token too.
545 // * Eliminate duplicates.
546 // * Sort (to help compression).
547 // * Glue back together again without whitespace.
548 // * Eliminate any quote (" or ') characters.
549 // We object strongly to non-ASCII text (we remove it!).
550 sTmp = null;
551 try {
552 sTmp = props.getProperty(PNAME_WEBSVR_META_KEYWORDS, "").trim().toLowerCase();
553 final SortedSet<String> words = new TreeSet<String>();
554 final StringTokenizer st = new StringTokenizer(sTmp, ",");
555 while(st.hasMoreTokens())
556 {
557 // Take each token,
558 // zap any non-ASCII printable (and unsafe) chars,
559 // and trim of any obvious excess whitespace.
560 String s = st.nextToken();
561 for(int i = s.length(); --i >= 0; )
562 {
563 final char c = s.charAt(i);
564 if(!isSafeHTMLMetaHeaderChar(c)) { s = s.replace(c, ' '); }
565 }
566 s = s.trim();
567 words.add(s);
568 }
569 final StringBuilder sb = new StringBuilder(sTmp.length());
570 for(final Iterator<String> it = words.iterator(); it.hasNext(); )
571 {
572 final String s = it.next();
573 sb.append(s);
574 if(it.hasNext()) { sb.append(','); }
575 }
576 sTmp = sb.toString();
577 }
578 catch(final Exception e) { } // Ignore errors.
579 WEBSVR_META_KEYWORDS = sTmp;
580
581 sTmp = null;
582 try {
583 sTmp = props.getProperty(PNAME_WEBSVR_META_DESCRIPTION, "");
584 // Discard any unsafe characters for a meta header.
585 for(int i = sTmp.length(); --i >= 0; )
586 {
587 final char c = sTmp.charAt(i);
588 if(!isSafeHTMLMetaHeaderChar(c)) { sTmp = sTmp.replace(c, ' '); }
589 }
590 sTmp = sTmp.trim();
591 }
592 catch(final Exception e) { } // Ignore errors.
593 WEBSVR_META_DESCRIPTION = sTmp;
594
595 sTmp = null;
596 try {
597 sTmp = props.getProperty(PNAME_WEBSVR_BG_IMAGE);
598
599 // Drop if not a valid-syntax JPEG exhibit name.
600 // We *do not* attempt to check that it exists here.
601 if(!ExhibitName.validNameSyntax(sTmp))
602 { sTmp = null; }
603 else
604 {
605 final ExhibitMIME.ExhibitTypeParameters tp =
606 ExhibitMIME.getInputFileType(sTmp);
607 if((tp == null) || (tp.type != ExhibitMIME.ET_JPEG))
608 { sTmp = null; }
609 }
610
611 }
612 catch(final Exception e) { } // Ignore errors.
613 WEBSVR_BG_IMAGE = (sTmp == null) ? null : Name.ExhibitFull.create(sTmp);
614
615 bTmp = null;
616 try { bTmp = Boolean.valueOf(props.getProperty(PNAME_WEBSVR_SIMPLEAD_ALLOW)); }
617 catch(final Exception e) { } // Ignore errors.
618 WEBSVR_SIMPLEAD_ALLOW = (bTmp == null) ? true : bTmp.booleanValue();
619 SimpleAd[] simpleAdsTmp = null;
620 if(WEBSVR_SIMPLEAD_ALLOW)
621 {
622 final List<SimpleAd> v = new ArrayList<SimpleAd>(); // Collect ads here...
623 // Only even look for simple ads if they are allowed.
624 for(int N = 1; N < Integer.MAX_VALUE; ++N)
625 {
626 final String prefix = PNAME_WEBSVR_SIMPLEAD_PREFIX + N + ".";
627 final String rawCode = props.getProperty(prefix + "HTML");
628 if(rawCode == null) { break; } // End of ads.
629
630 final String code = rawCode.trim();
631 int weight = 100; // Default;
632 try { weight = Integer.parseInt(props.getProperty(prefix + "wt"), 10); }
633 catch(final Exception e) { } // Ignore errors.
634 if(weight <= 0) { continue; } // Temporarily disabled; skip.
635
636 // Attempt to construct new ad item and save it,
637 // but quietly skip any difficulties.
638 try { v.add(MemoryTools.intern(new SimpleAd(weight, code))); }
639 catch(final Exception e) { } // Ignore errors.
640 }
641 // If we found some ads, extract and sort them, ready to store.
642 if(v.size() > 0)
643 {
644 simpleAdsTmp = new SimpleAd[v.size()];
645 v.toArray(simpleAdsTmp);
646 Arrays.sort(simpleAdsTmp);
647 }
648 }
649 simpleAds = simpleAdsTmp; // Store simple ads, if any.
650 totalSimpleAdWeight = _compute_totalSimpleAdWeight(simpleAds);
651
652 // Parse classified ads...
653 ClassifiedAd[] classifiedAdsTmp = null;
654 // Disable classified ads when we disable simple ads.
655 if(WEBSVR_SIMPLEAD_ALLOW)
656 {
657 final Set<ClassifiedAd> v = new HashSet<ClassifiedAd>(); // Collect ads here...
658 // Trawl through all properties looking for the mandatory regex value(s).
659 for(final Object keyO: props.keySet())
660 {
661 // Skip keys nothing to do with classified ads.
662 assert(keyO instanceof String);
663 final String regexKey = (String) keyO;
664 if(!regexKey.startsWith(PNAME_WEBSVR_CLASSIFIEDAD_PREFIX)) { continue; }
665 // Skip everything but the URI regex key initially.
666 if(!regexKey.endsWith(".URIregex")) { continue; }
667 // Compute the stub up to and including the final '.'
668 // to allow us to verify other keys.
669 final int lastDot = regexKey.lastIndexOf('.');
670 final String stub = regexKey.substring(0, lastDot+1);
671 // Try to create an instance for the classified ad found,
672 // logging any problems from invalid data
673 // (and dropping/ignoring any such 'bad' ad).
674 try {
675 // We interpret/parse start/end UTC dates (if any) here.
676 final long start = parseUTCyyyymmdd(props.getProperty(stub+"start"));
677 final long end = parseUTCyyyymmdd(props.getProperty(stub+"end"));
678 // We omit ads whose end date has already expired by the timestamp
679 // as a minor system-wide optimisation.
680 if((end != 0) && (end < _timestamp)) { continue; }
681 v.add(new ClassifiedAd(props.getProperty(stub+"HTML"),
682 props.getProperty(regexKey),
683 start,
684 end,
685 props.getProperty(stub+"lang"),
686 props.getProperty(stub+"countries")));
687 if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ad loaded: "+stub); }
688 }
689 catch(final Exception e)
690 {
691 logger.log("ERROR: GenProps: unable to load classified ad "+stub+"... "+e.getMessage());
692 if(IsDebug.isDebug) { System.err.println("ERROR: GenProps: classified ad BROKEN: "+stub); }
693 }
694 }
695
696 // If we found some ads, extract and store.
697 if(v.size() > 0)
698 {
699 classifiedAdsTmp = new ClassifiedAd[v.size()];
700 v.toArray(classifiedAdsTmp);
701 if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ads loaded: "+v.size()); }
702 }
703 }
704 classifiedAds = classifiedAdsTmp; // Store classified ads, if any.
705
706
707 // Extract author details.
708 // We iterate over all the properties, looking for suitable candidates.
709 final TreeMap<String,AuthData> putativeAuthDB = new TreeMap<String, AuthData>();
710 for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
711 {
712 try
713 {
714 final String key = (String) it.next();
715 if(!key.startsWith(PPREFIX_AUTH_DETAILS))
716 { continue; /* Not the right prefix for authDB. */ }
717
718 // We have a key that could be a valid author entry...
719 final String auth = key.substring(PPREFIX_AUTH_DETAILS.length());
720 // TODO: add warning for invalid entries?
721 if(!ExhibitName.validAuthorSyntax(auth))
722 { continue; /* Not a valid author initials string. */ }
723
724 // TODO: add warning for duplicate entries?
725 putativeAuthDB.put(MemoryTools.intern(auth),
726 MemoryTools.intern(new AuthData(auth, props.getProperty(key))));
727 }
728 catch(final Exception e)
729 {
730 // Absorb errors...
731 e.printStackTrace();
732 }
733 }
734 // Save immutable version of our authDB.
735 // If empty, save space by storing a null instead.
736 authDB = (putativeAuthDB.size() == 0) ? null :
737 Collections.unmodifiableSortedMap(putativeAuthDB);
738
739
740 // Extract supplied static "goodness"/popularity weightings, if any.
741 // We iterate over all the properties, looking for suitable candidiates.
742 final HashMap<String,Byte> putativePopWeights = new HashMap<String, Byte>();
743 for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
744 {
745 try
746 {
747 final String key = (String) it.next();
748 if(!key.startsWith(PPREFIX_POPWT_DETAILS))
749 { continue; /* Not the right prefix for popWeight. */ }
750
751 final String aWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYAUTH;
752 if(key.startsWith(aWPrefix))
753 {
754 // We have a key that could be a valid author weighting entry...
755 final String auth = key.substring(aWPrefix.length());
756 // TODO: add warning for invalid entries?
757 if(!ExhibitName.validAuthorSyntax(auth))
758 { continue; /* Not a valid author initials string. */ }
759 final Byte v = Byte.decode(props.getProperty(key));
760 if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
761 { continue; /* Not a valid weight. */ }
762 putativePopWeights.put(MemoryTools.intern(auth),
763 MemoryTools.intern(v));
764 continue;
765 }
766
767 final String tWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYTYPE;
768 if(key.startsWith(tWPrefix))
769 {
770 // We have a key that could be a valid type/extension weighting entry...
771 final String type = key.substring(tWPrefix.length());
772 // TODO: add warning for invalid entries?
773 if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
774 { continue; /* Not a valid type string. */ }
775 final Byte v = Byte.decode(props.getProperty(key));
776 if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
777 { continue; /* Not a valid weight. */ }
778 putativePopWeights.put(MemoryTools.intern(type),
779 MemoryTools.intern(v));
780 continue;
781 }
782
783 final String attrWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYATTR;
784 if(key.startsWith(attrWPrefix))
785 {
786 // We have a key that could be a valid attribute-word weighting entry...
787 final String attrWord = key.substring(attrWPrefix.length());
788 // TODO: add warning for invalid entries?
789 if(!ExhibitName.validAttributeWord(attrWord))
790 { continue; /* Not a valid attribute word string. */ }
791 final Byte v = Byte.decode(props.getProperty(key));
792 if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
793 { continue; /* Not a valid weight. */ }
794 putativePopWeights.put(MemoryTools.intern(attrWord),
795 MemoryTools.intern(v));
796 continue;
797 }
798 }
799 catch(final Exception e)
800 {
801 // Absorb errors...
802 e.printStackTrace();
803 }
804 }
805 // Save immutable version of our authDB.
806 // If empty, save space by storing a null instead.
807 popWeights = (putativePopWeights.size() == 0) ? null :
808 Collections.unmodifiableMap(putativePopWeights);
809
810
811 iTmp = -1;
812 try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER, "0"), 10); }
813 catch(final Exception e) { } // Ignore errors.
814 WEBSVR_EX_HOTLINK_LIMITER = (byte) Math.max(0, Math.min(255, iTmp));
815
816 // Get alternative URL to divert unwanted exhibit hotlinks to, if any.
817 // Invalid URLs are ignored and valid ones may be canonicalised.
818 sTmp = null;
819 try
820 {
821 final String p = props.getProperty(PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL);
822 if((p != null) && (p.startsWith("http:")))
823 { sTmp = (new URL(p)).toExternalForm(); } // Parse and canonicalise.
824 }
825 catch(final MalformedURLException e) { } // Ignore malformed URLs.
826 WEBSVR_EX_HOTLINK_DIVERT_URL = sTmp;
827
828 // Parse lists of hotlinker allow/deny hosts, DNSBLs...
829 final Set<String> rawAllowNames = _parseHostList(
830 props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW));
831 hotLinkAllowHosts = _normaliseHostList(rawAllowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
832 hotLinkAllowHostsRegex = _makeResidualRegexHostMatch(rawAllowNames, logger);
833 final Set<String> rawDisallowNames = _parseHostList(
834 props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW));
835 hotLinkDisallowHosts = _normaliseHostList(rawDisallowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
836 hotLinkDisallowHostsRegex = _makeResidualRegexHostMatch(rawDisallowNames, logger);
837 DNSBLs = _normaliseHostList(_parseHostList(
838 props.getProperty(PNAME_WEBSVR_DNSBLs)), MAX_DNSBLS);
839
840
841 // Verify complete object state.
842 try { validateObject(); }
843 catch(final InvalidObjectException e)
844 { throw new IllegalArgumentException(e); }
845 }
846
847
848 /**Parse UTC YYYYMMDD date, or zero if absent (null). */
849 private static long parseUTCyyyymmdd(final String property)
850 {
851 if(property == null) { return(0); }
852 // property = property.trim();
853 if(property.length() != 8) { throw new IllegalArgumentException("format required: YYYYMMDD"); }
854
855 // Get GMT/UTC timezone.
856 final TimeZone tz = TimeZone.getTimeZone("GMT");
857 final GregorianCalendar gc = new GregorianCalendar(tz);
858 // Set YMD from property value.
859 gc.set(Calendar.YEAR, Integer.parseInt(property.substring(0, 4), 10));
860 // Not that month is zero-based (January is zero).
861 gc.set(Calendar.MONTH, Integer.parseInt(property.substring(4, 6), 10) - 1);
862 gc.set(Calendar.DAY_OF_MONTH, Integer.parseInt(property.substring(6, 8), 10));
863
864 // Convert to timestamp and return if sensible...
865 final long t = gc.getTimeInMillis();
866 if(t < CoreConsts.GALLERY_EPOC_START)
867 { throw new IllegalArgumentException("classified ad date too early to be sensible"); }
868 return(t);
869 }
870
871 /**Timestamp of this properties set; zero for default/empty set. */
872 public final long timestamp;
873
874
875 /**Maximum number of general properties. */
876 public static final int MAX_GEN_PROPS = 1024;
877
878 /**Maximum length of general property key or value (chars). */
879 public static final int MAX_GEN_LEN = 1024;
880
881 /**The (immutable) generic properties map; never null. */
882 private Map<String,String> gen;
883
884 /**Get the immutable generic properties; never null.
885 * The keys in this Map here are stripped of the initial GEN_PREFIX
886 * that appears in the properties file.
887 */
888 public Map<String,String> getGen()
889 { return(gen); }
890
891
892 /**Name of Web server minimum exhibit-immutable-attributes check interval (ms) property. */
893 public static final String PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS = "pg2k.websvr.ex.imattr.minrecheckms";
894 /**Web server minimum exhibit-immutable-attributes check interval (ms). */
895 private final int WEBSVR_MIN_EX_IMATTR_RECHECK_MS;
896 /**Get the Web server minimum exhibit-immutable-attributes check interval (ms).
897 * Constrained to the range approx 1s to 6h, default approx 2m.
898 */
899 public final int getWEBSVR_MIN_EX_IMATTR_RECHECK_MS()
900 { return(Math.max(1001, Math.min(6 * 3600 * 1000, WEBSVR_MIN_EX_IMATTR_RECHECK_MS))); }
901
902 /**Name of Web server sysprops check interval (ms) property. */
903 public static final String PNAME_WEBSVR_SYSPROPS_RECHECK_MS = "pg2k.websvr.props.sys.recheckms";
904 /**Web server sysprops (ie most property files including GenProps) (re)check interval (ms). */
905 private final int WEBSVR_SYSPROPS_RECHECK_MS;
906 /**Get the Web server sysprops check interval (ms).
907 * Constrained to the range approx 1s to 1h, default approx 10s (which applies until first props set is fetched).
908 */
909 public final int getWEBSVR_SYSPROPS_RECHECK_MS()
910 { return(Math.max(1003, Math.min(1 * 3600 * 1000, WEBSVR_SYSPROPS_RECHECK_MS))); }
911
912 /**Name of Web server maximum-cached-exhibit-prefix-size (bytes) property. */
913 public static final String PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES = "pg2k.websvr.ex.maxcacheablesize";
914 /**Web server maximum-cached-exhibit-prefix-size (bytes). */
915 private final int WEBSVR_MAX_CACHEABLE_EX_BYTES;
916 /**Get the Web server maximum-cached-exhibit-prefix-size (bytes).
917 * Constrained to the range approx 64kB to 1GB, default approx 2MB.
918 */
919 public final int getWEBSVR_MAX_CACHEABLE_EX_BYTES()
920 { return(Math.max(1<<16, Math.min(1123456789, WEBSVR_MAX_CACHEABLE_EX_BYTES))); }
921
922 /**Name of Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
923 public static final String PNAME_WEBSVR_TN_CACHEPC = "pg2k.websvr.ex.tncachepercent";
924 /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
925 private final int WEBSVR_TN_CACHEPC;
926 /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value.
927 * Constrained to the range 0 to 100, default approx 10.
928 * <p>
929 * A value of zero disables the persistent cache
930 * (though in-memory cacheing may continue).
931 * <p>
932 * Any additional cache of generated thumbnails (exhibit samples)
933 * is limited to a size at most this many percent of the exhibit cache size.
934 * This cache is managed separately from this exhibit cache, but yoking the
935 * values together makes for simpler system management.
936 */
937 public final int getWEBSVR_TN_CACHEPC()
938 { return(Math.max(0, Math.min(100, WEBSVR_TN_CACHEPC))); }
939
940 /**Name of Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
941 public static final String PNAME_WEBSVR_BW_LIMITER = "pg2k.websvr.ex.bwlimiter";
942 /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
943 private final int WEBSVR_BW_LIMITER;
944 /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing).
945 * Constrained to the range approx 1 to 1000, default approx 10.
946 * <p>
947 * If any precacheing operation takes N milliseconds the next one
948 * will not be started until at least N times this value later.
949 * <p>
950 * The effect is to limit resource consumption to at most about
951 * 1/N of the most constrained of slave and master resources
952 * (typically CPU and/or bandwidth).
953 */
954 public final int getWEBSVR_BW_LIMITER()
955 { return(Math.max(1, Math.min(1000, WEBSVR_BW_LIMITER))); }
956
957 /**Name of Web server maximum exhibit size (32-bit value, bytes). */
958 public static final String PNAME_WEBSVR_MAX_EX_BYTES = "pg2k.websvr.ex.maxexhibitsize";
959 /**Web server maximum exhibit size (32-bit value, bytes). */
960 private final int WEBSVR_MAX_EX_BYTES;
961 /**Get the Web server maximum exhibit size (32-bit value, bytes).
962 * Constrained to the range approx 1B to 1GB, default approx 1GB.
963 */
964 public final int getWEBSVR_MAX_EX_BYTES()
965 { return(Math.max(1, Math.min(Integer.MAX_VALUE/2, WEBSVR_MAX_EX_BYTES))); }
966
967 /**Name of keywords list for HTML meta header ("" if none; not null). */
968 public static final String PNAME_WEBSVR_META_KEYWORDS = "pg2k.websvr.meta.keywords";
969 /**Keywords list for HTML meta header ("" if none; not null). */
970 private final String WEBSVR_META_KEYWORDS;
971 /**Get the keywords list for HTML meta header ("" if none; never null).
972 * May be trimmed of excess whitespace and canonicalised; "" if none.
973 * <p>
974 * Guaranteed only to contain characters safe to put directly into
975 * an HTML description meta-header attribute value.
976 * <p>
977 * In particular this result contains no quote marks (" or ')
978 * nor ampersand nor angle-brackets, though may contain whitespace,
979 * so should be quoted.
980 */
981 public final String getWEBSVR_META_KEYWORDS()
982 {
983 final String result = WEBSVR_META_KEYWORDS;
984 if(result == null) { return(""); }
985 assert(result.indexOf('\'') == -1);
986 assert(result.indexOf('"') == -1);
987 assert(result.indexOf('&') == -1);
988 assert(result.indexOf('<') == -1);
989 assert(result.indexOf('>') == -1);
990 return(result);
991 }
992
993 /**Name of description for HTML meta header ("" if none; not null). */
994 public static final String PNAME_WEBSVR_META_DESCRIPTION = "pg2k.websvr.meta.description";
995 /**Description for HTML meta header ("" if none; not null). */
996 private final String WEBSVR_META_DESCRIPTION;
997 /**Get the description list for HTML meta header ("" if none; not null).
998 * May be trimmed of excess whitespace; "" if none.
999 */
1000 public final String getWEBSVR_META_DESCRIPTION()
1001 {
1002 final String result = WEBSVR_META_DESCRIPTION;
1003 if(result == null) { return(""); }
1004 return(result);
1005 }
1006
1007 /**Name of JPEG exhibit to use as a background image (null if none). */
1008 public static final String PNAME_WEBSVR_BG_IMAGE = "pg2k.websvr.livery.default.bg";
1009 /**JPEG exhibit to use as a background image (null if none).
1010 * Should be Name.ExhibitFull but could be String from deserialising old format.
1011 */
1012 private final CharSequence WEBSVR_BG_IMAGE;
1013 /**Get the JPEG exhibit to use as a background image (null if none).
1014 * Is a syntactically-valid JPEG exhibit name (or null).
1015 */
1016 public final Name.ExhibitFull getWEBSVR_BG_IMAGE()
1017 {
1018 final CharSequence result = WEBSVR_BG_IMAGE;
1019 if(null == result) { return(null); }
1020 if(result.getClass() == Name.ExhibitFull.class) { return((Name.ExhibitFull) result); }
1021 return(Name.ExhibitFull.create(result));
1022 }
1023
1024 /**Class holding details of one simple banner advertisement; immutable, serialisable.
1025 * Weighting must be strictly positive integer; text must be
1026 * non-zero-length, 7-bit printable (32--126) ASCII/HTML data,
1027 * though we also allow CRLF (\r\n) for tricky JavaScript/HTML issues...
1028 * <p>
1029 * We assume that the on-the-wire format for members is reasonably
1030 * efficient, especially if the transport layer includes compression.
1031 * <p>
1032 * This supports equals(), hashCode() and compareTo(); two objects
1033 * are equal if all their fields are, and the sort order is to
1034 * minimise ad-lookup and HTML-bandwidth (ie by highest weight
1035 * then smallest code, with ties being broken by code lexical ordering).
1036 */
1037 private static final class SimpleAd implements Serializable,
1038 ObjectInputValidation,
1039 MemoryTools.Internable,
1040 Comparable<SimpleAd>
1041 {
1042 /**Strictly positive weighting. */
1043 final int weight;
1044 /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1045 final String code;
1046
1047 /**Maximum HTML code length in bytes/characters. */
1048 public static final int MAX_AD_CHARACTERS = 2048;
1049
1050 /**Equality depends on all fields. */
1051 @Override
1052 public boolean equals(final Object o)
1053 {
1054 if(!(o instanceof SimpleAd)) { return(false); }
1055 final SimpleAd other = (SimpleAd) o;
1056 return((weight == other.weight) && (code.equals(other.code)));
1057 }
1058
1059 /**Hash depends on text as good source of variability. */
1060 @Override
1061 public int hashCode() { return(code.hashCode()); }
1062
1063 /**Provides total ordering, with primary sort by decreasing weight.
1064 * Full sort order is:
1065 * <ol>
1066 * <li>decreasing weight, then
1067 * <li>increasing code length, then
1068 * <li>code lexical order.
1069 * </ol>
1070 */
1071 public int compareTo(final SimpleAd o)
1072 {
1073 final SimpleAd other = (SimpleAd) o;
1074
1075 if(weight > other.weight) { return(-1); } // Right order.
1076 if(weight < other.weight) { return( 1); } // Wrong order.
1077
1078 if(code.length() < other.code.length()) { return(-1); } // Right order.
1079 if(code.length() > other.code.length()) { return( 1); } // Wrong order.
1080
1081 // Break ties to make total ordering.
1082 return(code.compareTo(other.code));
1083 }
1084
1085 /**Construct new simple ad details. */
1086 SimpleAd(final int adRelativeWeight, final String HTMLCode)
1087 {
1088 weight = adRelativeWeight;
1089 code = MemoryTools.intern(HTMLCode);
1090 // Verify object state and thus validate parameters...
1091 try { validateObject(); }
1092 catch(final InvalidObjectException e)
1093 { throw new IllegalArgumentException(e.getMessage()); }
1094 }
1095
1096 /**Our serialisation version. */
1097 private static final long serialVersionUID = 6649356978055850149L;
1098
1099 // /**Deserialise. */
1100 // private void readObject(final ObjectInputStream in)
1101 // throws IOException, ClassNotFoundException
1102 // {
1103 // in.defaultReadObject();
1104 // validateObject(); // Validate state immediately.
1105 // }
1106
1107 /**Deserialise: validate and eliminate duplicates coming off the wire.
1108 * @return identical, de-duped, non-null instance
1109 */
1110 protected Object readResolve()
1111 // throws ObjectStreamException
1112 { return(MemoryTools.intern(new SimpleAd(weight, code))); }
1113
1114 /**Validate fields/state.
1115 * Called in the constructor and possibly after de-serialising.
1116 */
1117 public void validateObject()
1118 throws InvalidObjectException
1119 {
1120 // Check that all components are sane and safe.
1121 // Weight must be strictly positive.
1122 if(weight <= 0)
1123 { throw new InvalidObjectException("bad object: weight <= 0"); }
1124 // HTML code must be non-null, non-zero-length,
1125 // 7-bit printable ASCII (and CRLF).
1126 if((code == null) ||
1127 (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1128 { throw new InvalidObjectException("bad object: HTML code length silly"); }
1129 for(int i = code.length(); --i >= 0; )
1130 {
1131 final char c = code.charAt(i);
1132 if(((c < 32) || (c > 126)) &&
1133 ((c != '\r') && (c != '\n')))
1134 { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1135 }
1136 }
1137 }
1138
1139 /**Class holding details of one classified advertisement; immutable, serialisable.
1140 * We assume that the on-the-wire format for members is reasonably efficient,
1141 * especially if the transport layer includes compression such as ZIP.
1142 */
1143 public static final class ClassifiedAd implements Serializable,
1144 ObjectInputValidation,
1145 MemoryTools.Internable
1146 {
1147 /**Construct new classified ad details.
1148 *
1149 * @param HTMLCode HTML code as 7-bit ASCII; non-null, non-zero length
1150 * @param URIregex valid regular expression to match a substring of a URI path;
1151 * never null nor empty
1152 */
1153 public ClassifiedAd(final String HTMLCode,
1154 final String URIregex,
1155 final long start, final long end,
1156 final String language,
1157 final String ccTLDs)
1158 {
1159 regex = MemoryTools.intern(URIregex);
1160 code = MemoryTools.intern(HTMLCode);
1161 lang = MemoryTools.intern(language);
1162 countries = MemoryTools.intern(ccTLDs);
1163 this.start = start;
1164 this.end = end;
1165 // Verify object state and thus validate parameters...
1166 try { validateObject(); }
1167 catch(final InvalidObjectException e)
1168 { throw new IllegalArgumentException(e.getMessage()); }
1169 }
1170
1171 /**Valid case-sensitive regular expression to match a substring of a URI path, never null nor empty. */
1172 public final String regex;
1173
1174 /**Private cache of compiled regex; null until first use.
1175 * Created on first use; not serialised.
1176 * <p>
1177 * Never set null once non-null.
1178 * <p>
1179 * Marked volatile for thread-safe lock-free access,
1180 */
1181 private transient volatile Pattern pattern;
1182 /**Get compiled regex; never null. */
1183 public Pattern getPattern()
1184 {
1185 Pattern p = pattern;
1186 if(p == null) { pattern = p = Pattern.compile(regex); }
1187 return(p);
1188 }
1189
1190 /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1191 public final String code;
1192
1193 /**Maximum HTML code length in bytes/characters, as per SimpleAs; strictly positive. */
1194 public static final int MAX_AD_CHARACTERS = SimpleAd.MAX_AD_CHARACTERS;
1195
1196 /**Start time (+ve UTC ms) or zero if no start time. */
1197 public final long start;
1198 /**Start time (+ve UTC ms greater than 'start') or zero if no end time. */
1199 public final long end;
1200
1201 /**Language/locale filter; two-letter lower-case language code or null if none. */
1202 public final String lang;
1203
1204 /**Returns true if the supplied (visitor's) locale is inappropriate for this ad.
1205 * If the lang value is null then this returns false.
1206 * Else if the visitor's locale/language is null (unknown) then this returns true.
1207 * Else this returns true iff the visitor's locale language matches lang.
1208 */
1209 public boolean wrongLanguage(final Locale visitorLocale)
1210 {
1211 if(lang == null) { return(false); }
1212 if(visitorLocale == null) { return(true); }
1213 return(!lang.equals(visitorLocale.getLanguage()));
1214 }
1215
1216 /**Country code; comma-separated list of lower-case ccTLD codes or null if none. */
1217 public final String countries;
1218
1219 /**Returns true if the supplied (visitor's) country is inappropriate for this ad.
1220 * If the countries list is null then this returns false.
1221 * Else if the visitor's ccTLD (country code) is null (unknown) then this returns true.
1222 * Else this returns true iff the visitor's ccTLD is found on our countries list.
1223 */
1224 public boolean wrongCountry(final CCTLD visitorCcTLD)
1225 {
1226 if(countries == null) { return(false); }
1227 if(visitorCcTLD == null) { return(true); }
1228 // Check for presense in the 'allowed' list.
1229 // If the list is correctly normalised then a simple substring search suffices
1230 // (though might be inefficient for VERY long lists).
1231 // We search backwards since common values may be late (us, uk)
1232 // and we may have normalised to natural sorted (lexical/ASCII) order,
1233 // but the search direction is not necessary for correctness.
1234 return(countries.lastIndexOf(visitorCcTLD.code) == -1);
1235 }
1236
1237 /**Equality depends on all fields. */
1238 @Override
1239 public boolean equals(final Object o)
1240 {
1241 if(!(o instanceof ClassifiedAd)) { return(false); }
1242 final ClassifiedAd other = (ClassifiedAd) o;
1243 if(start != other.start) { return(false); }
1244 if(end != other.end) { return(false); }
1245 if(!code.equals(other.code)) { return(false); }
1246 if(lang == null) { if(other.lang != null) { return(false); } }
1247 else if(!lang.equals(other.lang)) { return(false); }
1248 if(countries == null) { if(other.countries != null) { return(false); } }
1249 else if(!countries.equals(other.countries)) { return(false); }
1250 return(true); // Identical.
1251 }
1252
1253 /**Hash depends on the regex and the end date as good/quick sources of variability. */
1254 @Override
1255 public int hashCode() { return(regex.hashCode() ^ (int)(end >> 26)); }
1256
1257 /**Validate fields/state.
1258 * Called in the constructor and possibly after de-serialising.
1259 */
1260 public void validateObject()
1261 throws InvalidObjectException
1262 {
1263 // HTML code must be non-null, non-zero-length,
1264 // 7-bit printable ASCII (and CRLF).
1265 if((code == null) ||
1266 (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1267 { throw new InvalidObjectException("bad object: HTML code length silly"); }
1268 for(int i = code.length(); --i >= 0; )
1269 {
1270 final char c = code.charAt(i);
1271 if(((c < 32) || (c > 126)) &&
1272 ((c != '\r') && (c != '\n')))
1273 { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1274 }
1275 if((regex == null) || (regex.length() == 0) || (regex.length() > 2*ExhibitName.MAX_NAME_LENGTH))
1276 { throw new InvalidObjectException("bad object: regex null or length invalid"); }
1277 // Validate the regex pattern (and cache it too; naughty but efficient).
1278 try { pattern = Pattern.compile(regex); }
1279 catch(final PatternSyntaxException e)
1280 { throw new InvalidObjectException("bad object: regex invalid"); }
1281 if((start < 0) || (end < 0))
1282 { throw new InvalidObjectException("bad object: negative start/end"); }
1283 if((start != 0) && (start < CoreConsts.GALLERY_EPOC_START))
1284 { throw new InvalidObjectException("bad object: start date invalid: " + new Date(start)); }
1285 if((end != 0) && (end <= start))
1286 { throw new InvalidObjectException("bad object: end < start"); }
1287 if((lang != null) && !lang.matches("^[a-z][a-z]$"))
1288 { throw new InvalidObjectException("bad object: bad language not [a-z][a-z]"); }
1289 if((countries != null) && !countries.matches("^[a-z][a-z](,[a-z][a-z])*$"))
1290 { throw new InvalidObjectException("bad object: bad countries list not [a-z][a-z](,[a-z][a-z])*"); }
1291 // Avoid huge countries list with arbitrary limit...
1292 if((countries != null) && (countries.length() > 128))
1293 { throw new InvalidObjectException("bad object: bad countries list too long"); }
1294 }
1295
1296 /**Deserialise: validate and eliminate duplicates coming off the wire.
1297 * @return identical, de-duped, non-null instance
1298 */
1299 protected Object readResolve()
1300 // throws ObjectStreamException
1301 { return(MemoryTools.intern(new ClassifiedAd(code, regex, start, end, lang, countries))); }
1302
1303 /**Unique serialisation UID. */
1304 private static final long serialVersionUID = 8848447664623542985L;
1305 }
1306
1307
1308 /**Immutable Set of String tokens to replace with random values in banner HTML.
1309 * These tokens, when encountered in HTML banner ad text,
1310 * are replaced with random (positive, decimal) integers.
1311 * These are to help with "cache-busting".
1312 * <p>
1313 * These tokens must be chosen so as to be very unlikely to appear
1314 * where they are <em>not</em> intended for replacement.
1315 * <p>
1316 * This set may become replaceable at run-time in future.
1317 */
1318 public static final Set<String> AD_BANNER_RND_TOKENS = Collections.unmodifiableSet(
1319 new HashSet<String>(Arrays.asList(new String[]{
1320 "$RND$", // TribalFusion cache-buster token.
1321 })));
1322
1323 /**Name of prefix for simple banner ad code.
1324 * There is one sub-property ({prefix}.allow, either "true" or "false")
1325 * that allows or disallows all simple ads at once.
1326 * <p>
1327 * Then for N starting at 1, and stopping at the first gap,
1328 * there must be a non-empty {prefix}.{N}.HTML containing
1329 * non-empty, pure-printable 7-bit ASCII (32--126, +CRLF) HTML ad code.
1330 * There may be an optional {prefix}.{N}.wt strictly-positive
1331 * integer relative weighting (default 100) of this ad to others,
1332 * the higher the weighting the more of the available space taken.
1333 */
1334 public static final String PNAME_WEBSVR_SIMPLEAD_PREFIX = "pg2k.websvr.ad.";
1335 /**Name of on/off switch for simple and classified ad code, enabled by default. */
1336 public static final String PNAME_WEBSVR_SIMPLEAD_ALLOW = PNAME_WEBSVR_SIMPLEAD_PREFIX + "allow";
1337 /**On/off switch for simple banner ad code, enabled by default. */
1338 private final boolean WEBSVR_SIMPLEAD_ALLOW;
1339 /**Get on/off switch for simple banner ad code, enabled by default.
1340 */
1341 public final boolean getWEBSVR_SIMPLEAD_ALLOW() { return(WEBSVR_SIMPLEAD_ALLOW); }
1342 /**Get HTML code for a simple ad at random; null if none available.
1343 * This returns ads with frequencies in proportion to their weightings.
1344 * <p>
1345 * This will always return null if getWEBSVR_SIMPLEAD_ALLOW()
1346 * returns false.
1347 * <p>
1348 * This algorithm will take O(n) time to return one of n available ads.
1349 * <p>
1350 * Will be faster when ads are unevenly weigted.
1351 * <p>
1352 * This will automatically do substitution of random "cache-buster" tokens
1353 * if required.
1354 * (Any of the recognised tokens will be substituted with a
1355 * random (positive decimal) integer value, different each time,
1356 * with all occurrences of any one token the same in any one call.)
1357 */
1358 public final String getSimpleAd()
1359 {
1360 if(/* !WEBSVR_SIMPLEAD_ALLOW || */ /* Test is redundant. */
1361 /* (simpleAds == null) || */ /* Test is redundant. */
1362 (totalSimpleAdWeight == 0)) { return(null); }
1363
1364 // Pick uniformly-distributed selector value.
1365 // We primarily require speed from this generator,
1366 // not perfection nor security.
1367 final int r = Rnd.fastRnd.nextInt(totalSimpleAdWeight);
1368
1369 // Total weight so far.
1370 int tw = 0;
1371
1372 // Look until cumulative sum of weights passes selector,
1373 // then take the ad that passes that threshold.
1374 // If performance proves to be a problem we could
1375 // replace this with a binary chop by storing an auxillary
1376 // array containing the sum of the weights.
1377 // In the interim, if we discover that this array is not
1378 // sorted biggest weight first (which makes for fastest
1379 // linear search on average) then we can sort it when
1380 // we have finished selecting our ad this time.
1381 for(int i = 0; i < simpleAds.length; ++i)
1382 {
1383 tw += simpleAds[i].weight;
1384 if(r < tw)
1385 {
1386 String code = simpleAds[i].code;
1387 // Substitute cache-buster tokens as needed.
1388 if((code != null) && (AD_BANNER_RND_TOKENS.size() > 0))
1389 {
1390 for(final String token : AD_BANNER_RND_TOKENS)
1391 {
1392 // If the cache-buster token exists then replace all occurrences.
1393 if(code.indexOf(token) != -1)
1394 {
1395 final int rnd = (Rnd.fastRnd.nextInt() >>> 1);
1396 code = code.replace(token, String.valueOf(rnd));
1397 }
1398 }
1399 }
1400 return(code);
1401 }
1402 }
1403
1404 return(null); // No ad available. (Probably should not be able to happen.)
1405 }
1406
1407 /**Total weight of all simple ads; non-negative.
1408 * Strictly positive if there are some simple ads.
1409 */
1410 private final int totalSimpleAdWeight;
1411
1412 /**Recompute totalSimpleAdWeight; zero if arg is null.
1413 * @throws IllegalArgumentException if sum of weights would
1414 * exceed Integer.MAX_VALUE.
1415 */
1416 private static int _compute_totalSimpleAdWeight(final SimpleAd _simpleAds[])
1417 {
1418 if(_simpleAds == null) { return(0); }
1419 int result = 0;
1420 for(int i = _simpleAds.length; --i >= 0; )
1421 {
1422 final long r = result + (long) (_simpleAds[i].weight);
1423 if(r > Integer.MAX_VALUE)
1424 { throw new IllegalArgumentException("weights too large"); }
1425 result = (int) r;
1426 }
1427 return(result);
1428 }
1429
1430 /**Sorted array of simple ads, no empty slots, non-zero length; or null if no simple ads. */
1431 private /* final */ SimpleAd simpleAds[];
1432
1433 /**The maximum number of simple ads that we will entertain at once. */
1434 public static final int MAX_SIMPLE_ADS = 64;
1435
1436
1437 /**Name of prefix for classified ad code. */
1438 public static final String PNAME_WEBSVR_CLASSIFIEDAD_PREFIX = "pg2k.websvr.clad.";
1439
1440 /**Array of classified ads, no empty slots or duplicates, non-zero length; or null if no classified ads. */
1441 private /* final */ ClassifiedAd classifiedAds[];
1442
1443 /**The maximum number of classified ads that we will entertain at once. */
1444 public static final int MAX_CLASSIFIED_ADS = 64;
1445
1446 /**Overall URI regex to check for match against any classified ad; null iff no classified ads.
1447 * Not serialised; created on first use.
1448 * Never set null once non-null.
1449 * <p>
1450 * Marked volatile for thread-safe lock-free access.
1451 */
1452 private transient volatile Pattern _masterClassifiedMatchPattern;
1453 /**Returns private mutable List of possible classifieds that match the filters; null if no such ads.
1454 * The URI passed should usually be absolute, ie starting with '/'.
1455 * <p>
1456 * This inspects and filters by:
1457 * <ul>
1458 * <li>the URI regex,
1459 * <li>the current date vs any start/end dates,
1460 * <li>the user's ccTLD if supplied vs the countries list if supplied,
1461 * <li>the user's locale language if supplied vs the ad language if supplied.
1462 * </ul>
1463 * <p>
1464 * It may be desirable to shuffle the result
1465 * to avoid any unwanted bias/ordering
1466 * and to minimise ad-blindness.
1467 *
1468 * @param uri the display URI; must be non-null
1469 * @param locale if non-null then ads are filtered by the ad language,
1470 * if null then we do not filter by locale/language
1471 * @param addr if non-null then ads are filtered by the country indicated
1472 * by this client IP address,
1473 * if null then we do not filter by country
1474 *
1475 * @return undefined-order non-empty private mutable List of possible matching classifieds, else null.
1476 */
1477 public List<ClassifiedAd> getClassifieds(final URI uri,
1478 final Locale locale,
1479 final InetAddress addr)
1480 {
1481 // If no classifieds then always return null immediately.
1482 if(classifiedAds == null) { return(null); }
1483
1484 // Match against URI path component only.
1485 final String path = uri.getPath();
1486 // No match is possible if there is no path component.
1487 if(path == null) { return(null); }
1488
1489 // If there is a matcher filter/matcher
1490 // to check against all the URI patterns in one go
1491 // then test against it now.
1492 final Pattern master = _masterClassifiedMatchPattern;
1493 if((master != null) && !master.matcher(path).find()) { return(null); }
1494
1495 // Prepare to filter by current time/date.
1496 final long now = System.currentTimeMillis();
1497
1498 final List<ClassifiedAd> result = new ArrayList<ClassifiedAd>(classifiedAds.length);
1499 for(final ClassifiedAd clad : classifiedAds)
1500 {
1501 // Filter by (optional) start/end dates. Should be very fast.
1502 if((clad.start != 0) && (now < clad.start)) { continue; }
1503 if((clad.end != 0) && (now >= clad.end)) { continue; }
1504 // Filter by (optional) language, if provided. Should be quick.
1505 if((locale != null) && clad.wrongLanguage(locale)) { continue; }
1506 // Filter by URI regex. May be moderately slow.
1507 if(!clad.getPattern().matcher(path).find()) { continue; }
1508 // Filter by (optional) allowed countries. May be (very) slow, so do last.
1509 if((addr != null) && clad.wrongCountry(GeoUtils.getCCTLDByAddress(addr, false))) { continue; }
1510 // Add the successfully-matched ad to the result.
1511 result.add(clad);
1512 }
1513 if(result.isEmpty())
1514 {
1515 // If there are many classifieds but none matched this time
1516 // (and we don't already have a master matcher)
1517 // then it may be worthwhile for subsequent calls
1518 // to have available an overall master matcher
1519 // to quickly reject any URI that will never match any current ad.
1520 if((master == null) && (classifiedAds.length > 2))
1521 {
1522 // Build a compound expression accepting the union of the matches.
1523 final StringBuilder sb = new StringBuilder();
1524 for(final ClassifiedAd clad : classifiedAds)
1525 {
1526 if(sb.length() == 0) { sb.append('('); }
1527 else { sb.append("|("); }
1528 sb.append(clad.regex);
1529 sb.append(')');
1530 }
1531 _masterClassifiedMatchPattern = Pattern.compile(sb.toString());
1532 }
1533
1534 return(null);
1535 }
1536
1537 return(result);
1538 }
1539
1540
1541 /**Get AuthData for specified author, or null if none.
1542 * @param auth author initials of desired data.
1543 */
1544 public synchronized AuthData getAuthData(final CharSequence auth)
1545 {
1546 if(authDB == null) { return(null); }
1547 return(authDB.get(auth.toString()));
1548 }
1549
1550 /**Property name prefix for author details. */
1551 public static final String PPREFIX_AUTH_DETAILS = "pg2k.authdb.";
1552
1553 /**Immutable SortedMap of author details, no empty or duplicate slots; non-zero length, or null if none. */
1554 private /* final */ SortedMap<String,AuthData> authDB;
1555
1556 /**The maximum number of author database entries. */
1557 public static final int MAX_AUTH_ENTRIES = 1024;
1558
1559 /**Class holding details of one author; immutable, serialisable.
1560 * Author initial must be valid syntactically;
1561 * text must be 7-bit printable (32--126) ASCII/HTML data.
1562 * <p>
1563 * We assume that the on-the-wire format for members is reasonably
1564 * efficient, especially if the transport layer includes compression.
1565 * <p>
1566 * This supports equals(), hashCode() and compareTo(); two objects
1567 * are equal iff all the fields are, and the sort order is by author.
1568 */
1569 public static final class AuthData implements Serializable,
1570 ObjectInputValidation,
1571 Comparable<AuthData>,
1572 MemoryTools.Internable
1573 {
1574 /**Author initials: must be syntactically valid; non-null. */
1575 public final String auth;
1576
1577 /**Author name: must be pure 7-bit printable ASCII HTML with possible entities; not empty, non-null. */
1578 public final String name;
1579
1580 /**Author home-page URL: should be syntactically valid URL or null. */
1581 public final String www;
1582
1583 /**Author email: should be syntactically valid email address or null. */
1584 public final String email;
1585
1586 /**Description HTML code as 7-bit ASCII; never empty but can be null. */
1587 public final String desc;
1588
1589 /**Maximum HTML description length in bytes/characters. */
1590 public static final int MAX_DESC_CHARACTERS = 2048;
1591
1592 /**Equality depends on all fields. */
1593 @Override
1594 public boolean equals(final Object o)
1595 {
1596 if(!(o instanceof AuthData)) { return(false); }
1597 final AuthData other = (AuthData) o;
1598 if(!auth.equals(other.auth)) { return(false); }
1599 if(!name.equals(other.name)) { return(false); }
1600 if((www == null) ? (other.www != null) : !www.equals(other.www)) { return(false); }
1601 if((email == null) ? (other.email != null) : !email.equals(other.email)) { return(false); }
1602 if((desc == null) ? (other.desc != null) : !desc.equals(other.desc)) { return(false); }
1603 return(true);
1604 }
1605
1606 /**Hash depends on author. */
1607 @Override
1608 public int hashCode() { return(auth.hashCode()); }
1609
1610 /**Provides total ordering, by author. */
1611 public int compareTo(final AuthData o)
1612 { return(auth.compareTo(((AuthData) o).auth)); }
1613
1614 /**Construct new author details from single string.
1615 * The String should be of the form:
1616 * <samp>Real Name|HomePageURL|email|description</samp>
1617 * where any item can be blank except the name.
1618 */
1619 AuthData(final String authInitials,
1620 final String pipeDelimitedString)
1621 {
1622 this(authInitials,
1623 _getSection(pipeDelimitedString, 0),
1624 _getSection(pipeDelimitedString, 1),
1625 _getSection(pipeDelimitedString, 2),
1626 _getSection(pipeDelimitedString, 3));
1627 }
1628
1629 /**Extract given section from pipe-delimited String.
1630 * Skip given number of pipe symbols ("|")
1631 * then return next up to following one if present.
1632 * Returns null if section not present.
1633 */
1634 private static String _getSection(final String s, final int section)
1635 {
1636 int prevPipePos = -1;
1637 for(int i = section; --i >= 0; )
1638 {
1639 final int nextPipe = s.indexOf('|', prevPipePos+1);
1640 // Return null if requested section not present.
1641 if(nextPipe == -1) { return(null); }
1642 // Else adjust start marker.
1643 prevPipePos = nextPipe;
1644 }
1645
1646 // Find the start of the following section, if any.
1647 final int nextPipePos = s.indexOf('|', prevPipePos+1);
1648
1649 // No following section; return the rest of the string.
1650 if(nextPipePos == -1)
1651 { return(s.substring(prevPipePos+1)); }
1652
1653 // Return the requested section.
1654 return(s.substring(prevPipePos+1, nextPipePos));
1655 }
1656
1657 /**Construct new author details from individial components.
1658 * @param authInitials the syntactally-valid author initials; non-null
1659 * @param authName real name of author; not empty nor null
1660 * @param homePageURL valid http URL, will be stripped,
1661 * empty text is converted to null
1662 * @param emailAddress valid email address, will be stripped,
1663 * empty text is converted to null
1664 * @param descriptionText the descriptive text, will be stripped,
1665 * empty text is converted to null
1666 *
1667 * Not given public access as only GenProps should need to construct.
1668 */
1669 AuthData(final String authInitials,
1670 final String authName,
1671 String homePageURL,
1672 String emailAddress,
1673 String descriptionText)
1674 {
1675 // Canonicalise URL.
1676 if(homePageURL != null)
1677 {
1678 homePageURL = homePageURL.trim();
1679 if(homePageURL.length() == 0)
1680 { homePageURL = null; }
1681 }
1682
1683 // Canonicalise email address.
1684 if(emailAddress != null)
1685 {
1686 emailAddress = emailAddress.trim();
1687 if(emailAddress.length() == 0)
1688 { emailAddress = null; }
1689 }
1690
1691 // Canonicalise description text.
1692 if(descriptionText != null)
1693 {
1694 descriptionText = descriptionText.trim();
1695 if(descriptionText.length() == 0)
1696 { descriptionText = null; }
1697 }
1698
1699 // Use intern() mainly to reduce old-heap memory churn.
1700 auth = MemoryTools.intern(authInitials);
1701 name = MemoryTools.intern(authName.trim());
1702 www = MemoryTools.intern(homePageURL);
1703 email = MemoryTools.intern(emailAddress);
1704 desc = MemoryTools.intern(descriptionText);
1705
1706 // Verify object state and thus validate parameters...
1707 try { validateObject(); }
1708 catch(final InvalidObjectException e)
1709 { throw new IllegalArgumentException(e.getMessage()); }
1710 }
1711
1712 /**Our serialisation version. */
1713 private static final long serialVersionUID = 2647433856891011944L;
1714
1715 /**Deserialise. */
1716 private void readObject(final ObjectInputStream in)
1717 throws IOException, ClassNotFoundException
1718 {
1719 in.defaultReadObject();
1720 validateObject(); // Validate state immediately.
1721 }
1722
1723 /**Validate fields/state.
1724 * Called in the constructor and possibly after de-serialising.
1725 * <p>
1726 * Barf if something bad is found.
1727 * (Maybe allow some extra info in debug version.)
1728 */
1729 public void validateObject()
1730 throws InvalidObjectException
1731 {
1732 // Validate author initials...
1733 if(!ExhibitName.validAuthorSyntax(auth))
1734 { throw new InvalidObjectException("bad object: author initials invalid"); }
1735
1736 if((name == null) || (name.length() == 0) ||
1737 !name.equals(name.trim()))
1738 { throw new InvalidObjectException("bad object: author name missing or too short or non-canonical"); }
1739 for(int i = name.length(); --i >= 0; )
1740 {
1741 final char c = name.charAt(i);
1742 if((c < 32) || (c > 126))
1743 { throw new InvalidObjectException("bad object: author name contains bad char " + ((int) c)); }
1744 }
1745
1746 // Validate URL if not null.
1747 // * Start with "http://".
1748 // * Parseable...
1749 if(www != null)
1750 {
1751 if(!www.equals(www.trim()))
1752 { throw new InvalidObjectException("bad object: WWW address non-canonical"); }
1753 if(!www.startsWith("http://"))
1754 { throw new InvalidObjectException("bad object: WWW address seems invalid"); }
1755 try { new URL(www); }
1756 catch(final MalformedURLException e)
1757 { throw new InvalidObjectException("bad object: WWW address unparseable"); }
1758 }
1759
1760 // Validate email address if not null.
1761 // * At least 5 characters (eg "a@b.c").
1762 // * Contains exactly one "@".
1763 if(email != null)
1764 {
1765 if(!email.equals(email.trim()))
1766 { throw new InvalidObjectException("bad object: email address non-canonical"); }
1767 if(email.length() < 5)
1768 { throw new InvalidObjectException("bad object: email address too short"); }
1769 final int firstAt = email.indexOf('@');
1770 if(firstAt < 1)
1771 { throw new InvalidObjectException("bad object: email address user portion missing"); }
1772 if(firstAt != email.lastIndexOf('@'))
1773 { throw new InvalidObjectException("bad object: email address contains more than one @"); }
1774 }
1775
1776 // HTML code must be non-empty (but can be null),
1777 // 7-bit printable ASCII (and CRLF).
1778 if(desc != null)
1779 {
1780 if(!desc.equals(desc.trim()))
1781 { throw new InvalidObjectException("bad object: HTML desc text non-canonical"); }
1782 if((desc.length() == 0) || (desc.length() > MAX_DESC_CHARACTERS))
1783 { throw new InvalidObjectException("bad object: HTML desc text length silly"); }
1784 for(int i = desc.length(); --i >= 0; )
1785 {
1786 final char c = desc.charAt(i);
1787 if(((c < 32) || (c > 126)) &&
1788 ((c != '\r') && (c != '\n')))
1789 { throw new InvalidObjectException("bad object: HTML desc text contains bad char " + ((int) c)); }
1790 }
1791 }
1792 }
1793 }
1794
1795 /**Prefix for static "goodness"/popularity weighting values. */
1796 public static final String PPREFIX_POPWT_DETAILS = "pg2k.popweight.";
1797
1798 /**Goodness subcomponent by author; includes trailing dot. */
1799 public static final String PCOMP_POPWR_BYAUTH = "byAuth.";
1800
1801 /**Goodness subcomponent by type/extension; includes trailing dot. */
1802 public static final String PCOMP_POPWR_BYTYPE = "byType.";
1803
1804 /**Goodness subcomponent by attribute; includes trailing dot. */
1805 public static final String PCOMP_POPWR_BYATTR = "byAttr.";
1806
1807 /**The maximum number of static "goodness"/popularity database entries. */
1808 public static final int MAX_POPWT_ENTRIES = 256;
1809
1810 /**The maximum allowed "goodness"/popularity weight (maximally good); strictly negative. */
1811 public static final byte MAX_POPWT_VAL = 100;
1812
1813 /**The minimum allowed "goodness"/popularity weight (maximally bad); strictly negative. */
1814 public static final byte MIN_POPWT_VAL = -MAX_POPWT_VAL;
1815
1816 /**Map from String token to Byte ([-100,+100]) weighting.
1817 * We cheat and assume the that sets of author, attribute words and extensions
1818 * are disjoint, and so we keep all the mappings in a single table!
1819 */
1820 private /* final */ Map<String,Byte> popWeights;
1821
1822 /**Get weighting by author in range ([-100,+100]); null if no weighting for specified author.
1823 * Author initials specified must be syntactically valid.
1824 * <p>
1825 * Note that this should be treated as if a value of +1.0
1826 * with a correlation of the returned value divided by MAX_POPWT_VAL.
1827 */
1828 public synchronized Byte getPopWeightForAuth(final CharSequence auth)
1829 {
1830 if(!ExhibitName.validAuthorSyntax(auth))
1831 { throw new IllegalArgumentException(); }
1832 final Map<String,Byte> p = popWeights;
1833 if(p == null) { return(null); }
1834 return(p.get(auth.toString()));
1835 }
1836
1837 /**Get weighting by type/extension in range ([-100,+100]); null if no weighting for specified type.
1838 * Extension/type specified must be syntactically valid and known to the system.
1839 * <p>
1840 * Note that this should be treated as if a value of +1.0
1841 * with a correlation of the returned value divided by MAX_POPWT_VAL.
1842 */
1843 public synchronized Byte getPopWeightForType(final CharSequence type)
1844 {
1845 if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
1846 { throw new IllegalArgumentException(); }
1847 final Map<String,Byte> p = popWeights;
1848 if(p == null) { return(null); }
1849 return(p.get(type.toString()));
1850 }
1851
1852 /**Get weighting by attribute word in range ([-100,+100]); null if no weighting for specified attribute.
1853 * Attribute word specified must be syntactically valid and known to the system.
1854 * <p>
1855 * Note that this should be treated as if a value of +1.0
1856 * with a correlation of the returned value divided by MAX_POPWT_VAL.
1857 */
1858 public synchronized Byte getPopWeightForAttr(final String attrWord)
1859 {
1860 if(!ExhibitName.validAttributeWord(attrWord))
1861 { throw new IllegalArgumentException(); }
1862 final Map<String,Byte> p = popWeights;
1863 if(p == null) { return(null); }
1864 return(p.get(attrWord));
1865 }
1866
1867
1868 /**Name of Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1869 public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER = "pg2k.websvr.ex.hotlinklimiter";
1870 /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1871 private final byte WEBSVR_EX_HOTLINK_LIMITER;
1872 /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads).
1873 * Constrained to the range approx 0 to 255, default 0.
1874 * <p>
1875 * This limit is too prevent too much (lazy or bandwidth-theft) direct hot-linking
1876 * to exhibits (and/or thumbnails) by external Web sites.
1877 * <p>
1878 * There <em>are</em> bona fide reasons for some direct hotlinks, eg:
1879 * <ul>
1880 * <li>From some image search engines.
1881 * <li>From small Web sites and/or from contributors' sites.
1882 * </ul>
1883 */
1884 public final int getWEBSVR_EX_HOTLINK_LIMITER()
1885 { return(WEBSVR_EX_HOTLINK_LIMITER & 0xff); }
1886
1887 /**Name of Web server hotlinked-download-limiter divert graphic URL value. */
1888 public static final String PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL = "pg2k.websvr.ex.hotlinklimiter.altURL";
1889 /**Web server hotlinked-download-limiter divert graphic URL value. */
1890 private final String WEBSVR_EX_HOTLINK_DIVERT_URL;
1891 /**Web server hotlinked-download-limiter value divert graphic URL value, or null if none.
1892 * Absolute URL of an alternate graphic to divert a user's brower to
1893 * when they have been hotlinked to one of our exhibits,
1894 * or null if the user is simply to get a 5XX error.
1895 * <p>
1896 * Preferably a small image with a long cache time.
1897 */
1898 public final String getWEBSVR_EX_HOTLINK_DIVERT_URL()
1899 { return(WEBSVR_EX_HOTLINK_DIVERT_URL); }
1900
1901 /**Maximum number of allow/disallow hotlink hosts that may be specified. */
1902 public static final int MAX_ALLOW_DISALLOW_HOTLINK_HOSTS = 64;
1903
1904 /**Name of Web server hotlinked-download-host-allow set. */
1905 public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW = "pg2k.websvr.ex.hotlinklimiter.allowhosts";
1906
1907 /**Name of Web server hotlinked-download-host-disallow set. */
1908 public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW = "pg2k.websvr.ex.hotlinklimiter.disallowhosts";
1909
1910 /**Immutable Set of "allow" hotlink hosts (normalised host names); can be null for no such hosts. */
1911 private /* final */ Set<String> hotLinkAllowHosts;
1912
1913 /**Immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1914 private final java.util.regex.Pattern hotLinkAllowHostsRegex;
1915
1916 /**Immutable Set of "disallow" hotlink hosts (normalised/stripped/lower-cased host names); can be null for no such hosts. */
1917 private /* final */ Set<String> hotLinkDisallowHosts;
1918
1919 /**Immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1920 private final java.util.regex.Pattern hotLinkDisallowHostsRegex;
1921
1922 /**Get immutable Set of "allow" hotlink hosts (normalised host names); never null. */
1923 public final Set<String> getHotLinkAllowHosts()
1924 {
1925 final Set<String> r = hotLinkAllowHosts;
1926 if(r == null)
1927 {
1928 final Set<String> noHosts = Collections.emptySet();
1929 return(noHosts);
1930 }
1931 return(r);
1932 }
1933
1934 /**Get immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1935 public final Pattern getHotLinkAllowHostsRegex()
1936 {
1937 return(hotLinkAllowHostsRegex);
1938 }
1939
1940 /**Get immutable Set of "disallow" hotlink hosts (normalised host names); never null. */
1941 public final Set<String> getHotLinkDisallowHosts()
1942 {
1943 final Set<String> r = hotLinkDisallowHosts;
1944 if(r == null)
1945 {
1946 final Set<String> noHosts = Collections.emptySet();
1947 return(noHosts);
1948 }
1949 return(r);
1950 }
1951
1952 /**Get immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1953 public final Pattern getHotLinkDisallowHostsRegex()
1954 {
1955 return(hotLinkDisallowHostsRegex);
1956 }
1957
1958 /**Returns true if this may be a regex (ie contains non-DNS-safe chars).
1959 * If a user-specified hostname contains characters
1960 * other than [-a-zA-Z0-9.]
1961 * then we assume that it may be intended as a regex.
1962 *
1963 * @param n non-null non-empty user-specified non-normalised hostname
1964 */
1965 private static boolean _mayBeRegexHostname(final String n)
1966 {
1967 assert((n != null) && (n.length() != 0));
1968 for(int i = n.length(); --i >= 0; )
1969 {
1970 final char c = n.charAt(i);
1971 if((c >= 'a') && (c <= 'z')) { continue; }
1972 if((c >= 'A') && (c <= 'Z')) { continue; }
1973 if((c >= '0') && (c <= '9')) { continue; }
1974 if((c == '.') || (c == '-')) { continue; }
1975 return(true); // Not a plain DNS-safe name.
1976 }
1977 return(false); // Seems to be a plain name.
1978 }
1979
1980 /**Make single compiled pattern from all regex-match hostname expressions; null if no such expressions.
1981 * The pattern will match if any of the sub-patterns match.
1982 * <p>
1983 * This will whinge about and drop any indiviual non-compilable patterns.
1984 * No whingeing is done, however, if the passed log is null!
1985 */
1986 private static Pattern _makeResidualRegexHostMatch(final Set<String> rawNames,
1987 final SimpleLoggerIF logger)
1988 {
1989 if(rawNames == null) { return(null); }
1990
1991 final StringBuilder compoundExpr = new StringBuilder();
1992
1993 for(final String rawName : rawNames)
1994 {
1995 try
1996 {
1997 // Ignore plain host names that are not regexes...
1998 if(!_mayBeRegexHostname(rawName)) { continue; }
1999
2000 // See if this expression can be compiled, ie is valid...
2001 try { Pattern.compile(rawName, Pattern.CASE_INSENSITIVE); }
2002 catch(final Exception e)
2003 {
2004 if(logger != null)
2005 { logger.log("WARNING: GenProps: ignoring unusable/bad hostname regex: `"+rawName+"'"); }
2006 continue; // Skip this unusable pattern.
2007 }
2008
2009 // Append this pattern to the final set...
2010 if(compoundExpr.length() != 0) { compoundExpr.append('|'); }
2011 compoundExpr.append('(').append(rawName).append(')');
2012 }
2013 catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2014 }
2015
2016 // If no regexes then return null.
2017 if(compoundExpr.length() == 0) { return(null); }
2018
2019 // Return compiled expression...
2020 return(Pattern.compile(compoundExpr.toString(), Pattern.CASE_INSENSITIVE));
2021 }
2022
2023 /**Normalise Set of normalised host names; result is immutable and not empty, or is null.
2024 * Normalises the set of names provided,
2025 * discarding any that seem to be invalid,
2026 * and stopping when/if we reach the size limit specified.
2027 * <p>
2028 * This skips any that seem to be regexes.
2029 *
2030 * @param rawNames set of raw host names; if null then null is returned
2031 * @param maxSize maximum size of result set; strictly positive
2032 */
2033 private static Set<String> _normaliseHostList(final Set<String> rawNames,
2034 final int maxSize)
2035 {
2036 if((maxSize < 1))
2037 { throw new IllegalArgumentException(); }
2038
2039 if(rawNames == null) { return(null); }
2040
2041 final Set<String> rawResult = new HashSet<String>(1 + 2*Math.min(maxSize, rawNames.size()));
2042
2043 for(final String rawName : rawNames)
2044 {
2045 try
2046 {
2047 if(_mayBeRegexHostname(rawName)) { continue; }
2048 final String normName = MemoryTools.intern(HostUtils.normaliseVirtualHostName(rawName));
2049 rawResult.add(normName); // Add to the result...
2050 if(rawResult.size() >= maxSize) { break; /* Stop when we have enough. */ }
2051 }
2052 catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2053 }
2054
2055 // Return null rather than an empty set.
2056 if(rawResult.size() == 0) { return(null); }
2057
2058 // Make sure that the result is immutable.
2059 return(Collections.unmodifiableSet(rawResult));
2060 }
2061
2062 /**Parse set of (space-separated) hostnames; returns null for an empty list.
2063 * This does not validate or normalise the names supplied.
2064 *
2065 * @param hostList space-separate list of hostnames (or null for none)
2066 */
2067 private Set<String> _parseHostList(final String hostList)
2068 {
2069 if((hostList == null) || (hostList.length() == 0)) { return(null); }
2070
2071 final StringTokenizer st = new StringTokenizer(hostList);
2072 if(st.countTokens() == 0) { return(null); }
2073
2074 final Set<String> result = new HashSet<String>(1 + 2*st.countTokens());
2075 while(st.hasMoreTokens())
2076 { result.add(st.nextToken()); }
2077
2078 return(result);
2079 }
2080
2081
2082 /**Maximum number of DNSBLs that may be specified. */
2083 public static final int MAX_DNSBLS = 8;
2084
2085 /**Name of Web server DNSRBLs set. */
2086 public static final String PNAME_WEBSVR_DNSBLs = "org.hd.d.pg2k.DNSBLs";
2087
2088 /**Immutable Set of DNS BLs (normalised DNS zone names); can be null for no such hosts. */
2089 private /* final */ Set<String> DNSBLs;
2090
2091 /**Get immutable Set of DNSBLs (normalised DNS zone names); never null. */
2092 public final Set<String> getDNSBLs()
2093 {
2094 final Set<String> r = DNSBLs;
2095 if(r == null)
2096 {
2097 // Return empty set if none.
2098 final Set<String> noHosts = Collections.emptySet();
2099 return(noHosts);
2100 }
2101 return(r);
2102 }
2103 }