001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore.props;
030    
031    import java.io.IOException;
032    import java.io.InvalidObjectException;
033    import java.io.ObjectInputStream;
034    import java.io.ObjectInputValidation;
035    import java.io.Serializable;
036    import java.net.InetAddress;
037    import java.net.MalformedURLException;
038    import java.net.URI;
039    import java.net.URL;
040    import java.util.ArrayList;
041    import java.util.Arrays;
042    import java.util.Calendar;
043    import java.util.Collections;
044    import java.util.Date;
045    import java.util.GregorianCalendar;
046    import java.util.HashMap;
047    import java.util.HashSet;
048    import java.util.Iterator;
049    import java.util.List;
050    import java.util.Locale;
051    import java.util.Map;
052    import java.util.Properties;
053    import java.util.Set;
054    import java.util.SortedMap;
055    import java.util.SortedSet;
056    import java.util.StringTokenizer;
057    import java.util.TimeZone;
058    import java.util.TreeMap;
059    import java.util.TreeSet;
060    import java.util.regex.Pattern;
061    import java.util.regex.PatternSyntaxException;
062    
063    import org.hd.d.pg2k.svrCore.CoreConsts;
064    import org.hd.d.pg2k.svrCore.ExhibitName;
065    import org.hd.d.pg2k.svrCore.GenUtils;
066    import org.hd.d.pg2k.svrCore.HostUtils;
067    import org.hd.d.pg2k.svrCore.MemoryTools;
068    import org.hd.d.pg2k.svrCore.Name;
069    import org.hd.d.pg2k.svrCore.Rnd;
070    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
071    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
072    import org.hd.d.pg2k.svrCore.location.GeoUtils;
073    import org.hd.d.pg2k.svrCore.location.GeoUtils.CCTLD;
074    
075    import ORG.hd.d.IsDebug;
076    
077    /**This holds run-time settable properties for the master and mirrors.
078     * It is immutable, and can be persisted or sent over RMI-IIOP.
079     * <p>
080     * All values are accessed through getXXX() methods
081     * (this object can be used as a bean)
082     * to allow us to always constrain values to sensible limits
083     * (and, for example, to patch up up any values that have `gone funny'
084     * in serialissation because (say) because the class version changed
085     * and fields were added)
086     * and to allow for local overrides from properties.
087     * This means that some of the getXXXX() methods have to be synchronized.
088     * <p>
089     * We also extensively check object state at construction and
090     * deserialisation; and we try to impose explicit or implicit limits
091     * on the amount of space that an instance of this object can consume,
092     * especially because at a transition between an old version and a new one
093     * there may be multiple instances floating about in memory.
094     * <p>
095     * We don't retain the original raw properties internally, but parse them at
096     * construction time, to do most of the expensive work once if possible,
097     * and to keep the serialised form of the object small.
098     * Defaults are set at parse/construction time,
099     * whereas limits are imposed at getXXX() time which makes for
100     * a lot of robustness if the object is damaged in transit or if
101     * the class details change slightly.
102     * <p>
103     * The version manufactured with a default constructor has a zero timestamp.
104     */
105    public final class GenProps implements Serializable, ObjectInputValidation
106        {
107        /**Our serialisation version. */
108        private static final long serialVersionUID = 8715644348905699663L;
109    
110        /**Deserialise. */
111        private void readObject(final ObjectInputStream in)
112            throws IOException, ClassNotFoundException
113            {
114            in.defaultReadObject();
115    
116            // Take defensive immutable copy of gen, if present, else make it empty.
117            if(gen == null)
118                { gen = Collections.emptyMap(); }
119            else
120                { gen = Collections.unmodifiableMap(new HashMap<String,String>(gen)); }
121    
122            // Take defensive copy of simpleAds[] if present, and re-sort.
123            if(simpleAds != null)
124                {
125                simpleAds = simpleAds.clone();
126                Arrays.sort(simpleAds);
127                }
128            // Take defensive copy of classifiedAds[] if present.
129            if(classifiedAds != null)
130                { classifiedAds = classifiedAds.clone(); }
131    
132            // Take defensive copy of authDB if present.
133            if(authDB != null)
134                { authDB = Collections.unmodifiableSortedMap(new TreeMap<String, AuthData>(authDB)); }
135    
136            // Take defensive copy of popWeights if present.
137            if(popWeights != null)
138                { popWeights = Collections.unmodifiableSortedMap(new TreeMap<String, Byte>(popWeights)); }
139    
140            // Take defensive copies of hotlink hot/cold sets and DNSBLs,
141            // but renormalise hostnames and truncate to size if need be.
142            if(hotLinkAllowHosts != null)
143                { hotLinkAllowHosts = _normaliseHostList(hotLinkAllowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
144            if(hotLinkDisallowHosts != null)
145                { hotLinkDisallowHosts = _normaliseHostList(hotLinkDisallowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
146            if(DNSBLs != null)
147                { DNSBLs = _normaliseHostList(DNSBLs, MAX_DNSBLS); }
148    
149    
150            validateObject(); // Validate state immediately.
151            }
152    
153        /**Validate fields/state.
154         * Called in the constructor and possibly after de-serialising.
155         * <p>
156         * Barf if something bad is found.
157         * (Maybe allow some extra info in debug version.)
158         */
159        public void validateObject()
160            throws InvalidObjectException
161            {
162            // Check that all components are sane and safe.
163            if(timestamp < 0)
164                { throw new InvalidObjectException("bad object: timestamp < 0"); }
165    
166            // Verify generic properties.
167            if(gen == null)
168                { throw new InvalidObjectException("bad object: gen == null"); }
169            if(gen.size() > MAX_GEN_PROPS)
170                { throw new InvalidObjectException("bad object: gen too large"); }
171            for(final Object kO : gen.keySet())
172                {
173                if(!(kO instanceof String))
174                    { throw new InvalidObjectException("bad object: gen key not a String"); }
175                final String k = (String) kO;
176                if(!isSafeGenPropValue(k))
177                    { throw new InvalidObjectException("bad object: gen key not valid"); }
178    
179                final Object vO = gen.get(k);
180                if(!(vO instanceof String))
181                    { throw new InvalidObjectException("bad object: gen value not a String"); }
182                final String v = (String) vO;
183                if(!isSafeGenPropValue(v))
184                    { throw new InvalidObjectException("bad object: gen value not valid"); }
185                }
186    
187            // Check that HTML meta-header text is still printable ASCII excluding ".
188            if(!isSafeHTMLMetaHeaderString(WEBSVR_META_KEYWORDS))
189                { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_KEYWORDS); }
190            if(!isSafeHTMLMetaHeaderString(WEBSVR_META_DESCRIPTION))
191                { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_DESCRIPTION); }
192            // Check that background image name, if set, is syntactically valid.
193            // Check that it is a CharSequence or String.
194            // We also check that it is a JPEG image.
195            if(WEBSVR_BG_IMAGE != null)
196                {
197                final boolean isString = (WEBSVR_BG_IMAGE instanceof String);
198                final boolean isExhibitFull = (!isString) && (WEBSVR_BG_IMAGE instanceof Name.ExhibitFull);
199                if((!isString) && (!isExhibitFull))
200                    { throw new InvalidObjectException("bad object: unsafe type " + WEBSVR_BG_IMAGE.getClass().getName()); }
201                if((!isExhibitFull) && (!ExhibitName.validNameSyntax(WEBSVR_BG_IMAGE)))
202                    { throw new InvalidObjectException("bad object: unsafe name " + PNAME_WEBSVR_BG_IMAGE); }
203                final ExhibitMIME.ExhibitTypeParameters et =
204                    ExhibitMIME.getInputFileType(WEBSVR_BG_IMAGE);
205                if((et == null) || (et.type != ExhibitMIME.ET_JPEG))
206                    { throw new InvalidObjectException("bad object: unsafe exhibit type " + PNAME_WEBSVR_BG_IMAGE); }
207                }
208    
209            // Check authDB for consistency.
210            if(authDB != null)
211                {
212                if((authDB.size() == 0) || (authDB.size() > MAX_AUTH_ENTRIES))
213                    { throw new InvalidObjectException("bad object: bad-length authDB"); }
214    
215                // Check all entries are of the correct type (AuthData).
216                for(final Iterator it = authDB.keySet().iterator(); it.hasNext(); )
217                    {
218                    final Object key = it.next();
219                    if(!(key instanceof String))
220                        { throw new InvalidObjectException("bad object: bad key in authDB"); }
221                    if(!(authDB.get(key) instanceof AuthData))
222                        { throw new InvalidObjectException("bad object: bad value in authDB"); }
223                    }
224                }
225    
226            // Check "goodness"/popularity weights for consistency.
227            if(popWeights != null)
228                {
229                if((popWeights.size() == 0) || (popWeights.size() > MAX_POPWT_ENTRIES))
230                    { throw new InvalidObjectException("bad object: bad-length popWeights"); }
231    
232                // Check all entries are of the correct type (AuthData).
233                for(final Iterator<String> it = popWeights.keySet().iterator(); it.hasNext(); )
234                    {
235                    final String key = it.next();
236                    if(!ExhibitName.validAuthorSyntax(key) &&
237                       !ExhibitName.validAttributeWord(key) &&
238                       (ExhibitMIME.isValidInputExhibitNameExtension(key) == null))
239                        { throw new InvalidObjectException("bad object: bad key in popWeights"); }
240                    final Byte val = popWeights.get(key);
241                    if((val == null) ||
242                       (val.byteValue() < MIN_POPWT_VAL) ||
243                       (val.byteValue() > MAX_POPWT_VAL))
244                        { throw new InvalidObjectException("bad object: bad value in popWeights"); }
245                    }
246                }
247    
248            // Check ads for consistency.
249            // Don't waste space with zero-length simpleAds.
250            if((simpleAds != null) &&
251                ((simpleAds.length == 0) || (simpleAds.length > MAX_SIMPLE_ADS)))
252                { throw new InvalidObjectException("bad object: bad-length simpleAds[]"); }
253            // Ensure no nulls in simpleAds,
254            // and that ads are sorted.
255            if(simpleAds != null)
256                {
257                for(int i = simpleAds.length; --i >= 0; )
258                    {
259                    if(simpleAds[i] == null)
260                        { throw new InvalidObjectException("bad object: simpleAds[] contains null"); }
261                    // Check pair-wise sort order...
262                    // The ordering should be total,
263                    // but we'll live with monotonic.
264                    if(i > 0)
265                        {
266                        if(simpleAds[i-1].compareTo(simpleAds[i]) > 0)
267                            { throw new InvalidObjectException("bad object: simpleAds[] not ordered"); }
268                        }
269                    }
270                }
271            // Check correct total for simpleAds.
272            if(totalSimpleAdWeight != _compute_totalSimpleAdWeight(simpleAds))
273                { throw new InvalidObjectException("bad object: totalSimpleAdWeight inconsistent"); }
274            if(totalSimpleAdWeight < 0)
275                { throw new InvalidObjectException("bad object: totalSimpleAdWeight < 0"); }
276            // Don't waste space with zero-length classifiedAds.
277            if((classifiedAds != null) &&
278                ((classifiedAds.length == 0) || (classifiedAds.length > MAX_CLASSIFIED_ADS)))
279                { throw new InvalidObjectException("bad object: bad-length classifiedAds[]"); }
280            // Ensure no nulls in classifiedAds,
281            // TODO: and that ads are unique.
282            if(classifiedAds != null)
283                {
284                for(int i = classifiedAds.length; --i >= 0; )
285                    {
286                    if(classifiedAds[i] == null)
287                        { throw new InvalidObjectException("bad object: classifiedAds[] contains null"); }
288                    }
289                }
290            // There must be no ads held internally if ads are switched off.
291            if(!WEBSVR_SIMPLEAD_ALLOW &&
292                ((totalSimpleAdWeight != 0) || (simpleAds != null) || (classifiedAds != null)))
293                { throw new InvalidObjectException("bad object: simple ads loaded though disabled"); }
294    
295            // Verify hotlinker diversion URL is valid (http) URL.
296            if(WEBSVR_EX_HOTLINK_DIVERT_URL != null)
297                {
298                try {
299                    if(!"http".equals((new URL(WEBSVR_EX_HOTLINK_DIVERT_URL)).getProtocol()))
300                        { throw new InvalidObjectException("bad object: invalid non-HTTP hotlink divert URL"); }
301                    }
302                catch(final MalformedURLException e)
303                    {
304                    throw new InvalidObjectException("bad object: invalid unparseable hotlink divert URL");
305                    }
306                }
307    
308            // FIXME: Should check content to be normalised host names too...
309            if(hotLinkAllowHosts != null)
310                {
311                if(hotLinkAllowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
312                    { throw new InvalidObjectException("bad object: too many allow hosts"); }
313                }
314            if(hotLinkDisallowHosts != null)
315                {
316                if(hotLinkDisallowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
317                    { throw new InvalidObjectException("bad object: too many disallow hosts"); }
318                }
319            if(DNSBLs != null)
320                {
321                if(DNSBLs.size() > MAX_DNSBLS)
322                    { throw new InvalidObjectException("bad object: too many DNSBLs"); }
323                }
324            }
325    
326        /**Check if String is save as generic property key or value.
327         * Null and zero-length values are not permitted.
328         * <p>
329         * Only ASCII values in the range 32 to 126 (plus \r\n\t) are allowed.
330         *
331         * @param s  the putative generic property key or value
332         * @return true iff the argument is a non-null, non-empty, not-too-long
333         *     pure-ASCII value
334         */
335        private static final boolean isSafeGenPropValue(final String s)
336            {
337            // Check basic constraints.
338            if(s == null) { return(false); }
339            final int length = s.length();
340            if(length == 0) { return(false); }
341            if(length > MAX_GEN_LEN) { return(false); }
342    
343            for(int i = length; --i >= 0; )
344                {
345                final char c = s.charAt(i);
346                if((c >= 32) || (c <= 126)) { continue; } // Usual case.
347                if((c == '\n') || (c == '\r') || (c == '\t')) { continue; } // Allowed whitespace.
348                return(false); // Bad character.
349                }
350    
351            return(true); // Benign string.
352            }
353    
354        /**Check if String is safe to use as HTML meta-header; return false if not.
355         * Null and empty string are regarded as OK.
356         */
357        private static final boolean isSafeHTMLMetaHeaderString(final String s)
358            {
359            if(s == null) { return(true); }
360            for(int i = s.length(); --i >= 0; )
361                {
362                if(!isSafeHTMLMetaHeaderChar(s.charAt(i)))
363                    { return(false); }
364                }
365            return(true); // All seems OK!
366            }
367    
368    //    /**Flags for User-Agent pattern matching checking for mobile phones. */
369    //    private static final int REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
370    //
371    //    /**Regex expression for one character safe for an HTML meta-header.
372    //     * Basically such a character must be printable ASCII, and
373    //     * one of:
374    //     * <ul>
375    //     * <li>a letter
376    //     * <li>a digit
377    //     * <li>a space
378    //     * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
379    //     * </ul>
380    //     */
381    //    private static final String SAFE_HTML_META_HEADER_CHAR = "[a-zA-Z0-9+?!()\\[\\]{}/_.;:,@-]";
382    //
383    //    /**Single 'safe meta header character' match. */
384    //    private static final Pattern SAFE_HTML_META_HEADER_CHAR_PATTERN = Pattern.compile(SAFE_HTML_META_HEADER_CHAR, REGEX_FLAGS);
385    
386        /**Check that character is safe for an HTML meta-header; return false if not.
387         * Basically such a character must be printable ASCII, and
388         * one of:
389         * <ul>
390         * <li>a letter
391         * <li>a digit
392         * <li>a space
393         * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
394         * </ul>
395         */
396        private static final boolean isSafeHTMLMetaHeaderChar(final char c)
397            {
398            switch(c)
399                {
400                // Letters...
401                case 'a': case 'A':
402                case 'b': case 'B':
403                case 'c': case 'C':
404                case 'd': case 'D':
405                case 'e': case 'E':
406                case 'f': case 'F':
407                case 'g': case 'G':
408                case 'h': case 'H':
409                case 'i': case 'I':
410                case 'j': case 'J':
411                case 'k': case 'K':
412                case 'l': case 'L':
413                case 'm': case 'M':
414                case 'n': case 'N':
415                case 'o': case 'O':
416                case 'p': case 'P':
417                case 'q': case 'Q':
418                case 'r': case 'R':
419                case 's': case 'S':
420                case 't': case 'T':
421                case 'u': case 'U':
422                case 'v': case 'V':
423                case 'w': case 'W':
424                case 'x': case 'X':
425                case 'y': case 'Y':
426                case 'z': case 'Z':
427                // Digits.
428                case '0': case '1': case '2': case '3': case '4':
429                case '5': case '6': case '7': case '8': case '9':
430                // Safe punctuation and space.
431                case ' ':
432                case ',': case '.': case ';': case ':':
433                case '-': case '+':
434                case '!': case '?':
435                case '(': case ')':
436                case '[': case ']':
437                case '{': case '}':
438                case '/': case '_': case '@':
439                    { return(true); } // OK!
440    
441                default:
442                    { return(false); } // Not safe.
443                }
444            }
445    
446        /**Construct a default, zero-timestamp set of generic properties.
447         */
448        public GenProps()
449            { this(new Properties(), 0, null); }
450    
451        /**Construct a new, immutable, properties set.
452         * The properties must be non-null
453         * and the timestamp must be non-negative.
454         * <p>
455         * Minor problems with the properties themselves will
456         * be silently ignored or logged, and defaults substituted
457         * for broken or missing values.
458         */
459        public GenProps(final Properties props,
460                        final long _timestamp)
461            { this(props, _timestamp, GenUtils.systemErrLogger); }
462    
463        /**Construct a new, immutable, properties set.
464         * The properties must be non-null
465         * and the timestamp must be non-negative.
466         * <p>
467         * Minor problems with the properties themselves will
468         * be silently ignored or logged, and defaults substituted
469         * for broken or missing values.
470         *
471         * @param logger if non-null than non-fatal problems will be logged here
472         */
473        public GenProps(final Properties props,
474                        final long _timestamp,
475                        final SimpleLoggerIF logger)
476            {
477            if((props == null) || (_timestamp < 0))
478                { throw new IllegalArgumentException(); }
479            timestamp = _timestamp;
480    
481            // Parse properties.
482            // Set up some working variables to help with parsing.
483            int iTmp;
484    //        long lTmp;
485            String sTmp;
486            Boolean bTmp;
487    
488            // Extract the generic key/value properties, if any.
489            final Map<String,String> g = new HashMap<String, String>();
490            for(final Object keyO : props.keySet())
491                {
492                if(!(keyO instanceof String))
493                    { throw new IllegalArgumentException("bad properties key: not String"); }
494                final String key = (String) keyO;
495    
496                if(!key.startsWith(GenPropsGenNames.GEN_PREFIX)) { continue; }
497    
498                final String gKey = key.substring(GenPropsGenNames.GEN_PREFIX.length());
499                final String gVal = props.getProperty(key);
500    
501                // Validate the values later.
502                g.put(gKey, gVal);
503                }
504            // If no generic keys then save space with shared empty Map.
505            if(g.size() == 0)
506                { gen = Collections.emptyMap(); }
507            else
508                { gen = Collections.unmodifiableMap(g); }
509    
510            iTmp = -1;
511            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS, "120000"), 10); }
512            catch(final Exception e) { } // Ignore errors.
513            WEBSVR_MIN_EX_IMATTR_RECHECK_MS = iTmp;
514    
515            iTmp = -1;
516            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_SYSPROPS_RECHECK_MS, "10013"), 10); }
517            catch(final Exception e) { } // Ignore errors.
518            WEBSVR_SYSPROPS_RECHECK_MS = iTmp;
519    
520            iTmp = -1;
521            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES, "2123456"), 10); }
522            catch(final Exception e) { } // Ignore errors.
523            WEBSVR_MAX_CACHEABLE_EX_BYTES = iTmp;
524    
525            iTmp = -1;
526            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_BW_LIMITER, "10"), 10); }
527            catch(final Exception e) { } // Ignore errors.
528            WEBSVR_BW_LIMITER = iTmp;
529    
530            iTmp = -1;
531            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_TN_CACHEPC, "10"), 10); }
532            catch(final Exception e) { } // Ignore errors.
533            WEBSVR_TN_CACHEPC = iTmp;
534    
535            iTmp = -1;
536            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_EX_BYTES, "1123456789"), 10); }
537            catch(final Exception e) { } // Ignore errors.
538            WEBSVR_MAX_EX_BYTES = iTmp;
539    
540            // We canonicalise this keyword list to reduce the
541            // space it consumes (especially after compression), by:
542            //   * Converting to lower-case (helps compression; helps drop dups).
543            //   * Tokenize with "," and trim whitespace around each token.
544            //   * We could eliminate duplicate whitespace within a single token too.
545            //   * Eliminate duplicates.
546            //   * Sort (to help compression).
547            //   * Glue back together again without whitespace.
548            //   * Eliminate any quote (" or ') characters.
549            // We object strongly to non-ASCII text (we remove it!).
550            sTmp = null;
551            try {
552                sTmp = props.getProperty(PNAME_WEBSVR_META_KEYWORDS, "").trim().toLowerCase();
553                final SortedSet<String> words = new TreeSet<String>();
554                final StringTokenizer st = new StringTokenizer(sTmp, ",");
555                while(st.hasMoreTokens())
556                    {
557                    // Take each token,
558                    // zap any non-ASCII printable (and unsafe) chars,
559                    // and trim of any obvious excess whitespace.
560                    String s = st.nextToken();
561                    for(int i = s.length(); --i >= 0; )
562                        {
563                        final char c = s.charAt(i);
564                        if(!isSafeHTMLMetaHeaderChar(c)) { s = s.replace(c, ' '); }
565                        }
566                    s = s.trim();
567                    words.add(s);
568                    }
569                final StringBuilder sb = new StringBuilder(sTmp.length());
570                for(final Iterator<String> it = words.iterator(); it.hasNext(); )
571                    {
572                    final String s = it.next();
573                    sb.append(s);
574                    if(it.hasNext()) { sb.append(','); }
575                    }
576                sTmp = sb.toString();
577                }
578            catch(final Exception e) { } // Ignore errors.
579            WEBSVR_META_KEYWORDS = sTmp;
580    
581            sTmp = null;
582            try {
583                sTmp = props.getProperty(PNAME_WEBSVR_META_DESCRIPTION, "");
584                // Discard any unsafe characters for a meta header.
585                for(int i = sTmp.length(); --i >= 0; )
586                    {
587                    final char c = sTmp.charAt(i);
588                    if(!isSafeHTMLMetaHeaderChar(c)) { sTmp = sTmp.replace(c, ' '); }
589                    }
590                sTmp = sTmp.trim();
591                }
592            catch(final Exception e) { } // Ignore errors.
593            WEBSVR_META_DESCRIPTION = sTmp;
594    
595            sTmp = null;
596            try {
597                sTmp = props.getProperty(PNAME_WEBSVR_BG_IMAGE);
598    
599                // Drop if not a valid-syntax JPEG exhibit name.
600                // We *do not* attempt to check that it exists here.
601                if(!ExhibitName.validNameSyntax(sTmp))
602                    { sTmp = null; }
603                else
604                    {
605                    final ExhibitMIME.ExhibitTypeParameters tp =
606                        ExhibitMIME.getInputFileType(sTmp);
607                    if((tp == null) || (tp.type != ExhibitMIME.ET_JPEG))
608                        { sTmp = null; }
609                    }
610    
611                }
612            catch(final Exception e) { } // Ignore errors.
613            WEBSVR_BG_IMAGE = (sTmp == null) ? null : Name.ExhibitFull.create(sTmp);
614    
615            bTmp = null;
616            try { bTmp = Boolean.valueOf(props.getProperty(PNAME_WEBSVR_SIMPLEAD_ALLOW)); }
617            catch(final Exception e) { } // Ignore errors.
618            WEBSVR_SIMPLEAD_ALLOW = (bTmp == null) ? true : bTmp.booleanValue();
619            SimpleAd[] simpleAdsTmp = null;
620            if(WEBSVR_SIMPLEAD_ALLOW)
621                {
622                final List<SimpleAd> v = new ArrayList<SimpleAd>(); // Collect ads here...
623                // Only even look for simple ads if they are allowed.
624                for(int N = 1; N < Integer.MAX_VALUE; ++N)
625                    {
626                    final String prefix = PNAME_WEBSVR_SIMPLEAD_PREFIX + N + ".";
627                    final String rawCode = props.getProperty(prefix + "HTML");
628                    if(rawCode == null) { break; } // End of ads.
629    
630                    final String code = rawCode.trim();
631                    int weight = 100; // Default;
632                    try { weight = Integer.parseInt(props.getProperty(prefix + "wt"), 10); }
633                    catch(final Exception e) { } // Ignore errors.
634                    if(weight <= 0) { continue; } // Temporarily disabled; skip.
635    
636                    // Attempt to construct new ad item and save it,
637                    // but quietly skip any difficulties.
638                    try { v.add(MemoryTools.intern(new SimpleAd(weight, code))); }
639                    catch(final Exception e) { } // Ignore errors.
640                    }
641                // If we found some ads, extract and sort them, ready to store.
642                if(v.size() > 0)
643                    {
644                    simpleAdsTmp = new SimpleAd[v.size()];
645                    v.toArray(simpleAdsTmp);
646                    Arrays.sort(simpleAdsTmp);
647                    }
648                }
649            simpleAds = simpleAdsTmp; // Store simple ads, if any.
650            totalSimpleAdWeight = _compute_totalSimpleAdWeight(simpleAds);
651    
652            // Parse classified ads...
653            ClassifiedAd[] classifiedAdsTmp = null;
654            // Disable classified ads when we disable simple ads.
655            if(WEBSVR_SIMPLEAD_ALLOW)
656                {
657                final Set<ClassifiedAd> v = new HashSet<ClassifiedAd>(); // Collect ads here...
658                // Trawl through all properties looking for the mandatory regex value(s).
659                for(final Object keyO: props.keySet())
660                    {
661                    // Skip keys nothing to do with classified ads.
662                    assert(keyO instanceof String);
663                    final String regexKey = (String) keyO;
664                    if(!regexKey.startsWith(PNAME_WEBSVR_CLASSIFIEDAD_PREFIX)) { continue; }
665                    // Skip everything but the URI regex key initially.
666                    if(!regexKey.endsWith(".URIregex")) { continue; }
667                    // Compute the stub up to and including the final '.'
668                    // to allow us to verify other keys.
669                    final int lastDot = regexKey.lastIndexOf('.');
670                    final String stub = regexKey.substring(0, lastDot+1);
671                    // Try to create an instance for the classified ad found,
672                    // logging any problems from invalid data
673                    // (and dropping/ignoring any such 'bad' ad).
674                   try {
675                       // We interpret/parse start/end UTC dates (if any) here.
676                       final long start = parseUTCyyyymmdd(props.getProperty(stub+"start"));
677                       final long end = parseUTCyyyymmdd(props.getProperty(stub+"end"));
678                       // We omit ads whose end date has already expired by the timestamp
679                       // as a minor system-wide optimisation.
680                       if((end != 0) && (end < _timestamp)) { continue; }
681                       v.add(new ClassifiedAd(props.getProperty(stub+"HTML"),
682                                               props.getProperty(regexKey),
683                                               start,
684                                               end,
685                                               props.getProperty(stub+"lang"),
686                                               props.getProperty(stub+"countries")));
687    if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ad loaded: "+stub); }
688                        }
689                    catch(final Exception e)
690                        {
691                        logger.log("ERROR: GenProps: unable to load classified ad "+stub+"... "+e.getMessage());
692    if(IsDebug.isDebug) { System.err.println("ERROR: GenProps: classified ad BROKEN: "+stub); }
693                        }
694                    }
695    
696                // If we found some ads, extract and store.
697                if(v.size() > 0)
698                    {
699                    classifiedAdsTmp = new ClassifiedAd[v.size()];
700                    v.toArray(classifiedAdsTmp);
701    if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ads loaded: "+v.size()); }
702                    }
703                }
704            classifiedAds = classifiedAdsTmp; // Store classified ads, if any.
705    
706    
707            // Extract author details.
708            // We iterate over all the properties, looking for suitable candidates.
709            final TreeMap<String,AuthData> putativeAuthDB = new TreeMap<String, AuthData>();
710            for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
711                {
712                try
713                    {
714                    final String key = (String) it.next();
715                    if(!key.startsWith(PPREFIX_AUTH_DETAILS))
716                        { continue; /* Not the right prefix for authDB. */ }
717    
718                    // We have a key that could be a valid author entry...
719                    final String auth = key.substring(PPREFIX_AUTH_DETAILS.length());
720                    // TODO: add warning for invalid entries?
721                    if(!ExhibitName.validAuthorSyntax(auth))
722                        { continue; /* Not a valid author initials string. */ }
723    
724                    // TODO: add warning for duplicate entries?
725                    putativeAuthDB.put(MemoryTools.intern(auth),
726                        MemoryTools.intern(new AuthData(auth, props.getProperty(key))));
727                    }
728                catch(final Exception e)
729                    {
730                    // Absorb errors...
731                    e.printStackTrace();
732                    }
733                }
734            // Save immutable version of our authDB.
735            // If empty, save space by storing a null instead.
736            authDB = (putativeAuthDB.size() == 0) ? null :
737                Collections.unmodifiableSortedMap(putativeAuthDB);
738    
739    
740            // Extract supplied static "goodness"/popularity weightings, if any.
741            // We iterate over all the properties, looking for suitable candidiates.
742            final HashMap<String,Byte> putativePopWeights = new HashMap<String, Byte>();
743            for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
744                {
745                try
746                    {
747                    final String key = (String) it.next();
748                    if(!key.startsWith(PPREFIX_POPWT_DETAILS))
749                        { continue; /* Not the right prefix for popWeight. */ }
750    
751                    final String aWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYAUTH;
752                    if(key.startsWith(aWPrefix))
753                        {
754                        // We have a key that could be a valid author weighting entry...
755                        final String auth = key.substring(aWPrefix.length());
756                        // TODO: add warning for invalid entries?
757                        if(!ExhibitName.validAuthorSyntax(auth))
758                            { continue; /* Not a valid author initials string. */ }
759                        final Byte v = Byte.decode(props.getProperty(key));
760                        if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
761                            { continue; /* Not a valid weight. */ }
762                        putativePopWeights.put(MemoryTools.intern(auth),
763                                               MemoryTools.intern(v));
764                        continue;
765                        }
766    
767                    final String tWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYTYPE;
768                    if(key.startsWith(tWPrefix))
769                        {
770                        // We have a key that could be a valid type/extension weighting entry...
771                        final String type = key.substring(tWPrefix.length());
772                        // TODO: add warning for invalid entries?
773                        if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
774                            { continue; /* Not a valid type string. */ }
775                        final Byte v = Byte.decode(props.getProperty(key));
776                        if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
777                            { continue; /* Not a valid weight. */ }
778                        putativePopWeights.put(MemoryTools.intern(type),
779                                               MemoryTools.intern(v));
780                        continue;
781                        }
782    
783                    final String attrWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYATTR;
784                    if(key.startsWith(attrWPrefix))
785                        {
786                        // We have a key that could be a valid attribute-word weighting entry...
787                        final String attrWord = key.substring(attrWPrefix.length());
788                        // TODO: add warning for invalid entries?
789                        if(!ExhibitName.validAttributeWord(attrWord))
790                            { continue; /* Not a valid attribute word string. */ }
791                        final Byte v = Byte.decode(props.getProperty(key));
792                        if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
793                            { continue; /* Not a valid weight. */ }
794                        putativePopWeights.put(MemoryTools.intern(attrWord),
795                                               MemoryTools.intern(v));
796                        continue;
797                        }
798                    }
799                catch(final Exception e)
800                    {
801                    // Absorb errors...
802                    e.printStackTrace();
803                    }
804                }
805            // Save immutable version of our authDB.
806            // If empty, save space by storing a null instead.
807            popWeights = (putativePopWeights.size() == 0) ? null :
808                Collections.unmodifiableMap(putativePopWeights);
809    
810    
811            iTmp = -1;
812            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER, "0"), 10); }
813            catch(final Exception e) { } // Ignore errors.
814            WEBSVR_EX_HOTLINK_LIMITER = (byte) Math.max(0, Math.min(255, iTmp));
815    
816            // Get alternative URL to divert unwanted exhibit hotlinks to, if any.
817            // Invalid URLs are ignored and valid ones may be canonicalised.
818            sTmp = null;
819            try
820                {
821                final String p = props.getProperty(PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL);
822                if((p != null) && (p.startsWith("http:")))
823                    { sTmp = (new URL(p)).toExternalForm(); } // Parse and canonicalise.
824                }
825            catch(final MalformedURLException e) { } // Ignore malformed URLs.
826            WEBSVR_EX_HOTLINK_DIVERT_URL = sTmp;
827    
828            // Parse lists of hotlinker allow/deny hosts, DNSBLs...
829            final Set<String> rawAllowNames = _parseHostList(
830                            props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW));
831            hotLinkAllowHosts = _normaliseHostList(rawAllowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
832            hotLinkAllowHostsRegex = _makeResidualRegexHostMatch(rawAllowNames, logger);
833            final Set<String> rawDisallowNames = _parseHostList(
834                            props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW));
835            hotLinkDisallowHosts = _normaliseHostList(rawDisallowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
836            hotLinkDisallowHostsRegex = _makeResidualRegexHostMatch(rawDisallowNames, logger);
837            DNSBLs = _normaliseHostList(_parseHostList(
838                    props.getProperty(PNAME_WEBSVR_DNSBLs)), MAX_DNSBLS);
839    
840    
841            // Verify complete object state.
842            try { validateObject(); }
843            catch(final InvalidObjectException e)
844                { throw new IllegalArgumentException(e); }
845            }
846    
847    
848        /**Parse UTC YYYYMMDD date, or zero if absent (null). */
849        private static long parseUTCyyyymmdd(final String property)
850            {
851            if(property == null) { return(0); }
852    //        property = property.trim();
853            if(property.length() != 8) { throw new IllegalArgumentException("format required: YYYYMMDD"); }
854    
855            // Get GMT/UTC timezone.
856            final TimeZone tz = TimeZone.getTimeZone("GMT");
857            final GregorianCalendar gc = new GregorianCalendar(tz);
858            // Set YMD from property value.
859            gc.set(Calendar.YEAR, Integer.parseInt(property.substring(0, 4), 10));
860            // Not that month is zero-based (January is zero).
861            gc.set(Calendar.MONTH, Integer.parseInt(property.substring(4, 6), 10) - 1);
862            gc.set(Calendar.DAY_OF_MONTH, Integer.parseInt(property.substring(6, 8), 10));
863    
864            // Convert to timestamp and return if sensible...
865            final long t = gc.getTimeInMillis();
866            if(t < CoreConsts.GALLERY_EPOC_START)
867                { throw new IllegalArgumentException("classified ad date too early to be sensible"); }
868            return(t);
869            }
870    
871        /**Timestamp of this properties set; zero for default/empty set. */
872        public final long timestamp;
873    
874    
875        /**Maximum number of general properties. */
876        public static final int MAX_GEN_PROPS = 1024;
877    
878        /**Maximum length of general property key or value (chars). */
879        public static final int MAX_GEN_LEN = 1024;
880    
881        /**The (immutable) generic properties map; never null. */
882        private Map<String,String> gen;
883    
884        /**Get the immutable generic properties; never null.
885         * The keys in this Map here are stripped of the initial GEN_PREFIX
886         * that appears in the properties file.
887         */
888        public Map<String,String> getGen()
889            { return(gen); }
890    
891    
892        /**Name of Web server minimum exhibit-immutable-attributes check interval (ms) property. */
893        public static final String PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS = "pg2k.websvr.ex.imattr.minrecheckms";
894        /**Web server minimum exhibit-immutable-attributes check interval (ms). */
895        private final int WEBSVR_MIN_EX_IMATTR_RECHECK_MS;
896        /**Get the Web server minimum exhibit-immutable-attributes check interval (ms).
897         * Constrained to the range approx 1s to 6h, default approx 2m.
898         */
899        public final int getWEBSVR_MIN_EX_IMATTR_RECHECK_MS()
900            { return(Math.max(1001, Math.min(6 * 3600 * 1000, WEBSVR_MIN_EX_IMATTR_RECHECK_MS))); }
901    
902        /**Name of Web server sysprops check interval (ms) property. */
903        public static final String PNAME_WEBSVR_SYSPROPS_RECHECK_MS = "pg2k.websvr.props.sys.recheckms";
904        /**Web server sysprops (ie most property files including GenProps) (re)check interval (ms). */
905        private final int WEBSVR_SYSPROPS_RECHECK_MS;
906        /**Get the Web server sysprops check interval (ms).
907         * Constrained to the range approx 1s to 1h, default approx 10s (which applies until first props set is fetched).
908         */
909        public final int getWEBSVR_SYSPROPS_RECHECK_MS()
910            { return(Math.max(1003, Math.min(1 * 3600 * 1000, WEBSVR_SYSPROPS_RECHECK_MS))); }
911    
912        /**Name of Web server maximum-cached-exhibit-prefix-size (bytes) property. */
913        public static final String PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES = "pg2k.websvr.ex.maxcacheablesize";
914        /**Web server maximum-cached-exhibit-prefix-size (bytes). */
915        private final int WEBSVR_MAX_CACHEABLE_EX_BYTES;
916        /**Get the Web server maximum-cached-exhibit-prefix-size (bytes).
917         * Constrained to the range approx 64kB to 1GB, default approx 2MB.
918         */
919        public final int getWEBSVR_MAX_CACHEABLE_EX_BYTES()
920            { return(Math.max(1<<16, Math.min(1123456789, WEBSVR_MAX_CACHEABLE_EX_BYTES))); }
921    
922        /**Name of Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
923        public static final String PNAME_WEBSVR_TN_CACHEPC = "pg2k.websvr.ex.tncachepercent";
924        /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
925        private final int WEBSVR_TN_CACHEPC;
926        /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value.
927         * Constrained to the range 0 to 100, default approx 10.
928         * <p>
929         * A value of zero disables the persistent cache
930         * (though in-memory cacheing may continue).
931         * <p>
932         * Any additional cache of generated thumbnails (exhibit samples)
933         * is limited to a size at most this many percent of the exhibit cache size.
934         * This cache is managed separately from this exhibit cache, but yoking the
935         * values together makes for simpler system management.
936         */
937        public final int getWEBSVR_TN_CACHEPC()
938            { return(Math.max(0, Math.min(100, WEBSVR_TN_CACHEPC))); }
939    
940        /**Name of Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
941        public static final String PNAME_WEBSVR_BW_LIMITER = "pg2k.websvr.ex.bwlimiter";
942        /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
943        private final int WEBSVR_BW_LIMITER;
944        /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing).
945         * Constrained to the range approx 1 to 1000, default approx 10.
946         * <p>
947         * If any precacheing operation takes N milliseconds the next one
948         * will not be started until at least N times this value later.
949         * <p>
950         * The effect is to limit resource consumption to at most about
951         * 1/N of the most constrained of slave and master resources
952         * (typically CPU and/or bandwidth).
953         */
954        public final int getWEBSVR_BW_LIMITER()
955            { return(Math.max(1, Math.min(1000, WEBSVR_BW_LIMITER))); }
956    
957        /**Name of Web server maximum exhibit size (32-bit value, bytes). */
958        public static final String PNAME_WEBSVR_MAX_EX_BYTES = "pg2k.websvr.ex.maxexhibitsize";
959        /**Web server maximum exhibit size (32-bit value, bytes). */
960        private final int WEBSVR_MAX_EX_BYTES;
961        /**Get the Web server maximum exhibit size (32-bit value, bytes).
962         * Constrained to the range approx 1B to 1GB, default approx 1GB.
963         */
964        public final int getWEBSVR_MAX_EX_BYTES()
965            { return(Math.max(1, Math.min(Integer.MAX_VALUE/2, WEBSVR_MAX_EX_BYTES))); }
966    
967        /**Name of keywords list for HTML meta header ("" if none; not null). */
968        public static final String PNAME_WEBSVR_META_KEYWORDS = "pg2k.websvr.meta.keywords";
969        /**Keywords list for HTML meta header ("" if none; not null). */
970        private final String WEBSVR_META_KEYWORDS;
971        /**Get the keywords list for HTML meta header ("" if none; never null).
972         * May be trimmed of excess whitespace and canonicalised; "" if none.
973         * <p>
974         * Guaranteed only to contain characters safe to put directly into
975         * an HTML description meta-header attribute value.
976         * <p>
977         * In particular this result contains no quote marks (" or ')
978         * nor ampersand nor angle-brackets, though may contain whitespace,
979         * so should be quoted.
980         */
981        public final String getWEBSVR_META_KEYWORDS()
982            {
983            final String result = WEBSVR_META_KEYWORDS;
984            if(result == null) { return(""); }
985            assert(result.indexOf('\'') == -1);
986            assert(result.indexOf('"') == -1);
987            assert(result.indexOf('&') == -1);
988            assert(result.indexOf('<') == -1);
989            assert(result.indexOf('>') == -1);
990            return(result);
991            }
992    
993        /**Name of description for HTML meta header ("" if none; not null). */
994        public static final String PNAME_WEBSVR_META_DESCRIPTION = "pg2k.websvr.meta.description";
995        /**Description for HTML meta header ("" if none; not null). */
996        private final String WEBSVR_META_DESCRIPTION;
997        /**Get the description list for HTML meta header ("" if none; not null).
998         * May be trimmed of excess whitespace; "" if none.
999         */
1000        public final String getWEBSVR_META_DESCRIPTION()
1001            {
1002            final String result = WEBSVR_META_DESCRIPTION;
1003            if(result == null) { return(""); }
1004            return(result);
1005            }
1006    
1007        /**Name of JPEG exhibit to use as a background image (null if none). */
1008        public static final String PNAME_WEBSVR_BG_IMAGE = "pg2k.websvr.livery.default.bg";
1009        /**JPEG exhibit to use as a background image (null if none).
1010         * Should be Name.ExhibitFull but could be String from deserialising old format.
1011         */
1012        private final CharSequence WEBSVR_BG_IMAGE;
1013        /**Get the JPEG exhibit to use as a background image (null if none).
1014         * Is a syntactically-valid JPEG exhibit name (or null).
1015         */
1016        public final Name.ExhibitFull getWEBSVR_BG_IMAGE()
1017            {
1018            final CharSequence result = WEBSVR_BG_IMAGE;
1019            if(null == result) { return(null); }
1020            if(result.getClass() == Name.ExhibitFull.class) { return((Name.ExhibitFull) result); }
1021            return(Name.ExhibitFull.create(result));
1022            }
1023    
1024        /**Class holding details of one simple banner advertisement; immutable, serialisable.
1025         * Weighting must be strictly positive integer; text must be
1026         * non-zero-length, 7-bit printable (32--126) ASCII/HTML data,
1027         * though we also allow CRLF (\r\n) for tricky JavaScript/HTML issues...
1028         * <p>
1029         * We assume that the on-the-wire format for members is reasonably
1030         * efficient, especially if the transport layer includes compression.
1031         * <p>
1032         * This supports equals(), hashCode() and compareTo(); two objects
1033         * are equal if all their fields are, and the sort order is to
1034         * minimise ad-lookup and HTML-bandwidth (ie by highest weight
1035         * then smallest code, with ties being broken by code lexical ordering).
1036         */
1037        private static final class SimpleAd implements Serializable,
1038                                                       ObjectInputValidation,
1039                                                       MemoryTools.Internable,
1040                                                       Comparable<SimpleAd>
1041            {
1042            /**Strictly positive weighting. */
1043            final int weight;
1044            /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1045            final String code;
1046    
1047            /**Maximum HTML code length in bytes/characters. */
1048            public static final int MAX_AD_CHARACTERS = 2048;
1049    
1050            /**Equality depends on all fields. */
1051            @Override
1052            public boolean equals(final Object o)
1053                {
1054                if(!(o instanceof SimpleAd)) { return(false); }
1055                final SimpleAd other = (SimpleAd) o;
1056                return((weight == other.weight) && (code.equals(other.code)));
1057                }
1058    
1059            /**Hash depends on text as good source of variability. */
1060            @Override
1061            public int hashCode() { return(code.hashCode()); }
1062    
1063            /**Provides total ordering, with primary sort by decreasing weight.
1064             * Full sort order is:
1065             * <ol>
1066             * <li>decreasing weight, then
1067             * <li>increasing code length, then
1068             * <li>code lexical order.
1069             * </ol>
1070             */
1071            public int compareTo(final SimpleAd o)
1072                {
1073                final SimpleAd other = (SimpleAd) o;
1074    
1075                if(weight > other.weight) { return(-1); } // Right order.
1076                if(weight < other.weight) { return( 1); } // Wrong order.
1077    
1078                if(code.length() < other.code.length()) { return(-1); } // Right order.
1079                if(code.length() > other.code.length()) { return( 1); } // Wrong order.
1080    
1081                // Break ties to make total ordering.
1082                return(code.compareTo(other.code));
1083                }
1084    
1085            /**Construct new simple ad details. */
1086            SimpleAd(final int adRelativeWeight, final String HTMLCode)
1087                {
1088                weight = adRelativeWeight;
1089                code = MemoryTools.intern(HTMLCode);
1090                // Verify object state and thus validate parameters...
1091                try { validateObject(); }
1092                catch(final InvalidObjectException e)
1093                    { throw new IllegalArgumentException(e.getMessage()); }
1094                }
1095    
1096            /**Our serialisation version. */
1097            private static final long serialVersionUID = 6649356978055850149L;
1098    
1099    //        /**Deserialise. */
1100    //        private void readObject(final ObjectInputStream in)
1101    //            throws IOException, ClassNotFoundException
1102    //            {
1103    //            in.defaultReadObject();
1104    //            validateObject(); // Validate state immediately.
1105    //            }
1106    
1107            /**Deserialise: validate and eliminate duplicates coming off the wire.
1108             * @return identical, de-duped, non-null instance
1109             */
1110            protected Object readResolve()
1111                // throws ObjectStreamException
1112                { return(MemoryTools.intern(new SimpleAd(weight, code))); }
1113    
1114            /**Validate fields/state.
1115             * Called in the constructor and possibly after de-serialising.
1116             */
1117            public void validateObject()
1118                throws InvalidObjectException
1119                {
1120                // Check that all components are sane and safe.
1121                // Weight must be strictly positive.
1122                if(weight <= 0)
1123                    { throw new InvalidObjectException("bad object: weight <= 0"); }
1124                // HTML code must be non-null, non-zero-length,
1125                // 7-bit printable ASCII (and CRLF).
1126                if((code == null) ||
1127                   (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1128                    { throw new InvalidObjectException("bad object: HTML code length silly"); }
1129                for(int i = code.length(); --i >= 0; )
1130                    {
1131                    final char c = code.charAt(i);
1132                    if(((c < 32) || (c > 126)) &&
1133                       ((c != '\r') && (c != '\n')))
1134                        { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1135                    }
1136                }
1137            }
1138    
1139        /**Class holding details of one classified advertisement; immutable, serialisable.
1140         * We assume that the on-the-wire format for members is reasonably efficient,
1141         * especially if the transport layer includes compression such as ZIP.
1142         */
1143        public static final class ClassifiedAd implements Serializable,
1144                                                          ObjectInputValidation,
1145                                                          MemoryTools.Internable
1146            {
1147            /**Construct new classified ad details.
1148             *
1149             * @param HTMLCode  HTML code as 7-bit ASCII; non-null, non-zero length
1150             * @param URIregex  valid regular expression to match a substring of a URI path;
1151             *     never null nor empty
1152             */
1153            public ClassifiedAd(final String HTMLCode,
1154                                final String URIregex,
1155                                final long start, final long end,
1156                                final String language,
1157                                final String ccTLDs)
1158                {
1159                regex = MemoryTools.intern(URIregex);
1160                code = MemoryTools.intern(HTMLCode);
1161                lang = MemoryTools.intern(language);
1162                countries = MemoryTools.intern(ccTLDs);
1163                this.start = start;
1164                this.end = end;
1165                // Verify object state and thus validate parameters...
1166                try { validateObject(); }
1167                catch(final InvalidObjectException e)
1168                    { throw new IllegalArgumentException(e.getMessage()); }
1169                }
1170    
1171            /**Valid case-sensitive regular expression to match a substring of a URI path, never null nor empty. */
1172            public final String regex;
1173    
1174            /**Private cache of compiled regex; null until first use.
1175             * Created on first use; not serialised.
1176             * <p>
1177             * Never set null once non-null.
1178             * <p>
1179             * Marked volatile for thread-safe lock-free access,
1180             */
1181            private transient volatile Pattern pattern;
1182            /**Get compiled regex; never null. */
1183            public Pattern getPattern()
1184                {
1185                Pattern p = pattern;
1186                if(p == null) { pattern = p = Pattern.compile(regex); }
1187                return(p);
1188                }
1189    
1190            /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1191            public final String code;
1192    
1193            /**Maximum HTML code length in bytes/characters, as per SimpleAs; strictly positive. */
1194            public static final int MAX_AD_CHARACTERS = SimpleAd.MAX_AD_CHARACTERS;
1195    
1196            /**Start time (+ve UTC ms) or zero if no start time. */
1197            public final long start;
1198            /**Start time (+ve UTC ms greater than 'start') or zero if no end time. */
1199            public final long end;
1200    
1201            /**Language/locale filter; two-letter lower-case language code or null if none. */
1202            public final String lang;
1203    
1204            /**Returns true if the supplied (visitor's) locale is inappropriate for this ad.
1205             * If the lang value is null then this returns false.
1206             * Else if the visitor's locale/language is null (unknown) then this returns true.
1207             * Else this returns true iff the visitor's locale language matches lang.
1208             */
1209            public boolean wrongLanguage(final Locale visitorLocale)
1210                {
1211                if(lang == null) { return(false); }
1212                if(visitorLocale == null) { return(true); }
1213                return(!lang.equals(visitorLocale.getLanguage()));
1214                }
1215    
1216            /**Country code; comma-separated list of lower-case ccTLD codes or null if none. */
1217            public final String countries;
1218    
1219            /**Returns true if the supplied (visitor's) country is inappropriate for this ad.
1220             * If the countries list is null then this returns false.
1221             * Else if the visitor's ccTLD (country code) is null (unknown) then this returns true.
1222             * Else this returns true iff the visitor's ccTLD is found on our countries list.
1223             */
1224            public boolean wrongCountry(final CCTLD visitorCcTLD)
1225                {
1226                if(countries == null) { return(false); }
1227                if(visitorCcTLD == null) { return(true); }
1228                // Check for presense in the 'allowed' list.
1229                // If the list is correctly normalised then a simple substring search suffices
1230                // (though might be inefficient for VERY long lists).
1231                // We search backwards since common values may be late (us, uk)
1232                // and we may have normalised to natural sorted (lexical/ASCII) order,
1233                // but the search direction is not necessary for correctness.
1234                return(countries.lastIndexOf(visitorCcTLD.code) == -1);
1235                }
1236    
1237            /**Equality depends on all fields. */
1238            @Override
1239            public boolean equals(final Object o)
1240                {
1241                if(!(o instanceof ClassifiedAd)) { return(false); }
1242                final ClassifiedAd other = (ClassifiedAd) o;
1243                if(start != other.start) { return(false); }
1244                if(end != other.end) { return(false); }
1245                if(!code.equals(other.code)) { return(false); }
1246                if(lang == null) { if(other.lang != null) { return(false); } }
1247                else if(!lang.equals(other.lang)) { return(false); }
1248                if(countries == null) { if(other.countries != null) { return(false); } }
1249                else if(!countries.equals(other.countries)) { return(false); }
1250                return(true); // Identical.
1251                }
1252    
1253            /**Hash depends on the regex and the end date as good/quick sources of variability. */
1254            @Override
1255            public int hashCode() { return(regex.hashCode() ^ (int)(end >> 26)); }
1256    
1257            /**Validate fields/state.
1258             * Called in the constructor and possibly after de-serialising.
1259             */
1260            public void validateObject()
1261                throws InvalidObjectException
1262                {
1263                // HTML code must be non-null, non-zero-length,
1264                // 7-bit printable ASCII (and CRLF).
1265                if((code == null) ||
1266                   (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1267                    { throw new InvalidObjectException("bad object: HTML code length silly"); }
1268                for(int i = code.length(); --i >= 0; )
1269                    {
1270                    final char c = code.charAt(i);
1271                    if(((c < 32) || (c > 126)) &&
1272                       ((c != '\r') && (c != '\n')))
1273                        { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1274                    }
1275                if((regex == null) || (regex.length() == 0) || (regex.length() > 2*ExhibitName.MAX_NAME_LENGTH))
1276                    { throw new InvalidObjectException("bad object: regex null or length invalid"); }
1277                // Validate the regex pattern (and cache it too; naughty but efficient).
1278                try { pattern = Pattern.compile(regex); }
1279                catch(final PatternSyntaxException e)
1280                    { throw new InvalidObjectException("bad object: regex invalid"); }
1281                if((start < 0) || (end < 0))
1282                    { throw new InvalidObjectException("bad object: negative start/end"); }
1283                if((start != 0) && (start < CoreConsts.GALLERY_EPOC_START))
1284                    { throw new InvalidObjectException("bad object: start date invalid: " + new Date(start)); }
1285                if((end != 0) && (end <= start))
1286                    { throw new InvalidObjectException("bad object: end < start"); }
1287                if((lang != null) && !lang.matches("^[a-z][a-z]$"))
1288                    { throw new InvalidObjectException("bad object: bad language not [a-z][a-z]"); }
1289                if((countries != null) && !countries.matches("^[a-z][a-z](,[a-z][a-z])*$"))
1290                    { throw new InvalidObjectException("bad object: bad countries list not [a-z][a-z](,[a-z][a-z])*"); }
1291                // Avoid huge countries list with arbitrary limit...
1292                if((countries != null) && (countries.length() > 128))
1293                    { throw new InvalidObjectException("bad object: bad countries list too long"); }
1294                }
1295    
1296            /**Deserialise: validate and eliminate duplicates coming off the wire.
1297             * @return identical, de-duped, non-null instance
1298             */
1299            protected Object readResolve()
1300                // throws ObjectStreamException
1301                { return(MemoryTools.intern(new ClassifiedAd(code, regex, start, end, lang, countries))); }
1302    
1303            /**Unique serialisation UID. */
1304            private static final long serialVersionUID = 8848447664623542985L;
1305            }
1306    
1307    
1308        /**Immutable Set of String tokens to replace with random values in banner HTML.
1309         * These tokens, when encountered in HTML banner ad text,
1310         * are replaced with random (positive, decimal) integers.
1311         * These are to help with "cache-busting".
1312         * <p>
1313         * These tokens must be chosen so as to be very unlikely to appear
1314         * where they are <em>not</em> intended for replacement.
1315         * <p>
1316         * This set may become replaceable at run-time in future.
1317         */
1318        public static final Set<String> AD_BANNER_RND_TOKENS = Collections.unmodifiableSet(
1319            new HashSet<String>(Arrays.asList(new String[]{
1320                "$RND$", // TribalFusion cache-buster token.
1321            })));
1322    
1323        /**Name of prefix for simple banner ad code.
1324         * There is one sub-property ({prefix}.allow, either "true" or "false")
1325         * that allows or disallows all simple ads at once.
1326         * <p>
1327         * Then for N starting at 1, and stopping at the first gap,
1328         * there must be a non-empty {prefix}.{N}.HTML containing
1329         * non-empty, pure-printable 7-bit ASCII (32--126, +CRLF) HTML ad code.
1330         * There may be an optional {prefix}.{N}.wt strictly-positive
1331         * integer relative weighting (default 100) of this ad to others,
1332         * the higher the weighting the more of the available space taken.
1333         */
1334        public static final String PNAME_WEBSVR_SIMPLEAD_PREFIX = "pg2k.websvr.ad.";
1335        /**Name of on/off switch for simple and classified ad code, enabled by default. */
1336        public static final String PNAME_WEBSVR_SIMPLEAD_ALLOW = PNAME_WEBSVR_SIMPLEAD_PREFIX + "allow";
1337        /**On/off switch for simple banner ad code, enabled by default. */
1338        private final boolean WEBSVR_SIMPLEAD_ALLOW;
1339        /**Get on/off switch for simple banner ad code, enabled by default.
1340         */
1341        public final boolean getWEBSVR_SIMPLEAD_ALLOW() { return(WEBSVR_SIMPLEAD_ALLOW); }
1342        /**Get HTML code for a simple ad at random; null if none available.
1343         * This returns ads with frequencies in proportion to their weightings.
1344         * <p>
1345         * This will always return null if getWEBSVR_SIMPLEAD_ALLOW()
1346         * returns false.
1347         * <p>
1348         * This algorithm will take O(n) time to return one of n available ads.
1349         * <p>
1350         * Will be faster when ads are unevenly weigted.
1351         * <p>
1352         * This will automatically do substitution of random "cache-buster" tokens
1353         * if required.
1354         * (Any of the recognised tokens will be substituted with a
1355         * random (positive decimal) integer value, different each time,
1356         * with all occurrences of any one token the same in any one call.)
1357         */
1358        public final String getSimpleAd()
1359            {
1360            if(/* !WEBSVR_SIMPLEAD_ALLOW || */ /* Test is redundant. */
1361               /* (simpleAds == null) || */ /* Test is redundant. */
1362               (totalSimpleAdWeight == 0)) { return(null); }
1363    
1364            // Pick uniformly-distributed selector value.
1365            // We primarily require speed from this generator,
1366            // not perfection nor security.
1367            final int r = Rnd.fastRnd.nextInt(totalSimpleAdWeight);
1368    
1369            // Total weight so far.
1370            int tw = 0;
1371    
1372            // Look until cumulative sum of weights passes selector,
1373            // then take the ad that passes that threshold.
1374            // If performance proves to be a problem we could
1375            // replace this with a binary chop by storing an auxillary
1376            // array containing the sum of the weights.
1377            // In the interim, if we discover that this array is not
1378            // sorted biggest weight first (which makes for fastest
1379            // linear search on average) then we can sort it when
1380            // we have finished selecting our ad this time.
1381            for(int i = 0; i < simpleAds.length; ++i)
1382                {
1383                tw += simpleAds[i].weight;
1384                if(r < tw)
1385                    {
1386                    String code = simpleAds[i].code;
1387                    // Substitute cache-buster tokens as needed.
1388                    if((code != null) && (AD_BANNER_RND_TOKENS.size() > 0))
1389                        {
1390                        for(final String token : AD_BANNER_RND_TOKENS)
1391                            {
1392                            // If the cache-buster token exists then replace all occurrences.
1393                            if(code.indexOf(token) != -1)
1394                                {
1395                                final int rnd = (Rnd.fastRnd.nextInt() >>> 1);
1396                                code = code.replace(token, String.valueOf(rnd));
1397                                }
1398                            }
1399                        }
1400                    return(code);
1401                    }
1402                }
1403    
1404            return(null); // No ad available.  (Probably should not be able to happen.)
1405            }
1406    
1407        /**Total weight of all simple ads; non-negative.
1408         * Strictly positive if there are some simple ads.
1409         */
1410        private final int totalSimpleAdWeight;
1411    
1412        /**Recompute totalSimpleAdWeight; zero if arg is null.
1413         * @throws IllegalArgumentException  if sum of weights would
1414         *     exceed Integer.MAX_VALUE.
1415         */
1416        private static int _compute_totalSimpleAdWeight(final SimpleAd _simpleAds[])
1417            {
1418            if(_simpleAds == null) { return(0); }
1419            int result = 0;
1420            for(int i = _simpleAds.length; --i >= 0; )
1421                {
1422                final long r = result + (long) (_simpleAds[i].weight);
1423                if(r > Integer.MAX_VALUE)
1424                    { throw new IllegalArgumentException("weights too large"); }
1425                result = (int) r;
1426                }
1427            return(result);
1428            }
1429    
1430        /**Sorted array of simple ads, no empty slots, non-zero length; or null if no simple ads. */
1431        private /* final */ SimpleAd simpleAds[];
1432    
1433        /**The maximum number of simple ads that we will entertain at once. */
1434        public static final int MAX_SIMPLE_ADS = 64;
1435    
1436    
1437        /**Name of prefix for classified ad code. */
1438        public static final String PNAME_WEBSVR_CLASSIFIEDAD_PREFIX = "pg2k.websvr.clad.";
1439    
1440        /**Array of classified ads, no empty slots or duplicates, non-zero length; or null if no classified ads. */
1441        private /* final */ ClassifiedAd classifiedAds[];
1442    
1443        /**The maximum number of classified ads that we will entertain at once. */
1444        public static final int MAX_CLASSIFIED_ADS = 64;
1445    
1446        /**Overall URI regex to check for match against any classified ad; null iff no classified ads.
1447         * Not serialised; created on first use.
1448         * Never set null once non-null.
1449         * <p>
1450         * Marked volatile for thread-safe lock-free access.
1451         */
1452        private transient volatile Pattern _masterClassifiedMatchPattern;
1453        /**Returns private mutable List of possible classifieds that match the filters; null if no such ads.
1454         * The URI passed should usually be absolute, ie starting with '/'.
1455         * <p>
1456         * This inspects and filters by:
1457         * <ul>
1458         * <li>the URI regex,
1459         * <li>the current date vs any start/end dates,
1460         * <li>the user's ccTLD if supplied vs the countries list if supplied,
1461         * <li>the user's locale language if supplied vs the ad language if supplied.
1462         * </ul>
1463         * <p>
1464         * It may be desirable to shuffle the result
1465         * to avoid any unwanted bias/ordering
1466         * and to minimise ad-blindness.
1467         *
1468         * @param  uri  the display URI; must be non-null
1469         * @param  locale  if non-null then ads are filtered by the ad language,
1470         *     if null then we do not filter by locale/language
1471         * @param  addr  if non-null then ads are filtered by the country indicated
1472         *     by this client IP address,
1473         *     if null then we do not filter by country
1474         *
1475         * @return  undefined-order non-empty private mutable List of possible matching classifieds, else null.
1476         */
1477        public List<ClassifiedAd> getClassifieds(final URI uri,
1478                                                 final Locale locale,
1479                                                 final InetAddress addr)
1480            {
1481            // If no classifieds then always return null immediately.
1482            if(classifiedAds == null) { return(null); }
1483    
1484            // Match against URI path component only.
1485            final String path = uri.getPath();
1486            // No match is possible if there is no path component.
1487            if(path == null) { return(null); }
1488    
1489            // If there is a matcher filter/matcher
1490            // to check against all the URI patterns in one go
1491            // then test against it now.
1492            final Pattern master = _masterClassifiedMatchPattern;
1493            if((master != null) && !master.matcher(path).find()) { return(null); }
1494    
1495            // Prepare to filter by current time/date.
1496            final long now = System.currentTimeMillis();
1497    
1498            final List<ClassifiedAd> result = new ArrayList<ClassifiedAd>(classifiedAds.length);
1499            for(final ClassifiedAd clad : classifiedAds)
1500                {
1501                // Filter by (optional) start/end dates.  Should be very fast.
1502                if((clad.start != 0) && (now < clad.start)) { continue; }
1503                if((clad.end != 0) && (now >= clad.end)) { continue; }
1504                // Filter by (optional) language, if provided.  Should be quick.
1505                if((locale != null) && clad.wrongLanguage(locale)) { continue; }
1506                // Filter by URI regex.  May be moderately slow.
1507                if(!clad.getPattern().matcher(path).find()) { continue; }
1508                // Filter by (optional) allowed countries.  May be (very) slow, so do last.
1509                if((addr != null) && clad.wrongCountry(GeoUtils.getCCTLDByAddress(addr, false))) { continue; }
1510                // Add the successfully-matched ad to the result.
1511                result.add(clad);
1512                }
1513            if(result.isEmpty())
1514                {
1515                // If there are many classifieds but none matched this time
1516                // (and we don't already have a master matcher)
1517                // then it may be worthwhile for subsequent calls
1518                // to have available an overall master matcher
1519                // to quickly reject any URI that will never match any current ad.
1520                if((master == null) && (classifiedAds.length > 2))
1521                    {
1522                    // Build a compound expression accepting the union of the matches.
1523                    final StringBuilder sb = new StringBuilder();
1524                    for(final ClassifiedAd clad : classifiedAds)
1525                        {
1526                        if(sb.length() == 0) { sb.append('('); }
1527                        else { sb.append("|("); }
1528                        sb.append(clad.regex);
1529                        sb.append(')');
1530                        }
1531                    _masterClassifiedMatchPattern = Pattern.compile(sb.toString());
1532                    }
1533    
1534                return(null);
1535                }
1536    
1537            return(result);
1538            }
1539    
1540    
1541        /**Get AuthData for specified author, or null if none.
1542         * @param auth  author initials of desired data.
1543         */
1544        public synchronized AuthData getAuthData(final CharSequence auth)
1545            {
1546            if(authDB == null) { return(null); }
1547            return(authDB.get(auth.toString()));
1548            }
1549    
1550        /**Property name prefix for author details. */
1551        public static final String PPREFIX_AUTH_DETAILS = "pg2k.authdb.";
1552    
1553        /**Immutable SortedMap of author details, no empty or duplicate slots; non-zero length, or null if none. */
1554        private /* final */ SortedMap<String,AuthData> authDB;
1555    
1556        /**The maximum number of author database entries. */
1557        public static final int MAX_AUTH_ENTRIES = 1024;
1558    
1559        /**Class holding details of one author; immutable, serialisable.
1560         * Author initial must be valid syntactically;
1561         * text must be 7-bit printable (32--126) ASCII/HTML data.
1562         * <p>
1563         * We assume that the on-the-wire format for members is reasonably
1564         * efficient, especially if the transport layer includes compression.
1565         * <p>
1566         * This supports equals(), hashCode() and compareTo(); two objects
1567         * are equal iff all the fields are, and the sort order is by author.
1568         */
1569        public static final class AuthData implements Serializable,
1570                                                      ObjectInputValidation,
1571                                                      Comparable<AuthData>,
1572                                                      MemoryTools.Internable
1573            {
1574            /**Author initials: must be syntactically valid; non-null. */
1575            public final String auth;
1576    
1577            /**Author name: must be pure 7-bit printable ASCII HTML with possible entities; not empty, non-null. */
1578            public final String name;
1579    
1580            /**Author home-page URL: should be syntactically valid URL or null. */
1581            public final String www;
1582    
1583            /**Author email: should be syntactically valid email address or null. */
1584            public final String email;
1585    
1586            /**Description HTML code as 7-bit ASCII; never empty but can be null. */
1587            public final String desc;
1588    
1589            /**Maximum HTML description length in bytes/characters. */
1590            public static final int MAX_DESC_CHARACTERS = 2048;
1591    
1592            /**Equality depends on all fields. */
1593            @Override
1594            public boolean equals(final Object o)
1595                {
1596                if(!(o instanceof AuthData)) { return(false); }
1597                final AuthData other = (AuthData) o;
1598                if(!auth.equals(other.auth)) { return(false); }
1599                if(!name.equals(other.name)) { return(false); }
1600                if((www == null) ? (other.www != null) : !www.equals(other.www)) { return(false); }
1601                if((email == null) ? (other.email != null) : !email.equals(other.email)) { return(false); }
1602                if((desc == null) ? (other.desc != null) : !desc.equals(other.desc)) { return(false); }
1603                return(true);
1604                }
1605    
1606            /**Hash depends on author. */
1607            @Override
1608            public int hashCode() { return(auth.hashCode()); }
1609    
1610            /**Provides total ordering, by author. */
1611            public int compareTo(final AuthData o)
1612                { return(auth.compareTo(((AuthData) o).auth)); }
1613    
1614            /**Construct new author details from single string.
1615             * The String should be of the form:
1616             * <samp>Real Name|HomePageURL|email|description</samp>
1617             * where any item can be blank except the name.
1618             */
1619            AuthData(final String authInitials,
1620                     final String pipeDelimitedString)
1621                {
1622                this(authInitials,
1623                     _getSection(pipeDelimitedString, 0),
1624                     _getSection(pipeDelimitedString, 1),
1625                     _getSection(pipeDelimitedString, 2),
1626                     _getSection(pipeDelimitedString, 3));
1627                }
1628    
1629            /**Extract given section from pipe-delimited String.
1630             * Skip given number of pipe symbols ("|")
1631             * then return next up to following one if present.
1632             * Returns null if section not present.
1633             */
1634            private static String _getSection(final String s, final int section)
1635                {
1636                int prevPipePos = -1;
1637                for(int i = section; --i >= 0; )
1638                    {
1639                    final int nextPipe = s.indexOf('|', prevPipePos+1);
1640                    // Return null if requested section not present.
1641                    if(nextPipe == -1) { return(null); }
1642                    // Else adjust start marker.
1643                    prevPipePos = nextPipe;
1644                    }
1645    
1646                // Find the start of the following section, if any.
1647                final int nextPipePos = s.indexOf('|', prevPipePos+1);
1648    
1649                // No following section; return the rest of the string.
1650                if(nextPipePos == -1)
1651                    { return(s.substring(prevPipePos+1)); }
1652    
1653                // Return the requested section.
1654                return(s.substring(prevPipePos+1, nextPipePos));
1655                }
1656    
1657            /**Construct new author details from individial components.
1658             * @param authInitials  the syntactally-valid author initials; non-null
1659             * @param authName  real name of author; not empty nor null
1660             * @param homePageURL  valid http URL, will be stripped,
1661             *     empty text is converted to null
1662             * @param emailAddress  valid email address, will be stripped,
1663             *     empty text is converted to null
1664             * @param descriptionText  the descriptive text, will be stripped,
1665             *     empty text is converted to null
1666             *
1667             * Not given public access as only GenProps should need to construct.
1668             */
1669            AuthData(final String authInitials,
1670                     final String authName,
1671                     String homePageURL,
1672                     String emailAddress,
1673                     String descriptionText)
1674                {
1675                // Canonicalise URL.
1676                if(homePageURL != null)
1677                    {
1678                    homePageURL = homePageURL.trim();
1679                    if(homePageURL.length() == 0)
1680                        { homePageURL = null; }
1681                    }
1682    
1683                // Canonicalise email address.
1684                if(emailAddress != null)
1685                    {
1686                    emailAddress = emailAddress.trim();
1687                    if(emailAddress.length() == 0)
1688                        { emailAddress = null; }
1689                    }
1690    
1691                // Canonicalise description text.
1692                if(descriptionText != null)
1693                    {
1694                    descriptionText = descriptionText.trim();
1695                    if(descriptionText.length() == 0)
1696                        { descriptionText = null; }
1697                    }
1698    
1699                // Use intern() mainly to reduce old-heap memory churn.
1700                auth = MemoryTools.intern(authInitials);
1701                name = MemoryTools.intern(authName.trim());
1702                www = MemoryTools.intern(homePageURL);
1703                email = MemoryTools.intern(emailAddress);
1704                desc = MemoryTools.intern(descriptionText);
1705    
1706                // Verify object state and thus validate parameters...
1707                try { validateObject(); }
1708                catch(final InvalidObjectException e)
1709                    { throw new IllegalArgumentException(e.getMessage()); }
1710                }
1711    
1712            /**Our serialisation version. */
1713            private static final long serialVersionUID = 2647433856891011944L;
1714    
1715            /**Deserialise. */
1716            private void readObject(final ObjectInputStream in)
1717                throws IOException, ClassNotFoundException
1718                {
1719                in.defaultReadObject();
1720                validateObject(); // Validate state immediately.
1721                }
1722    
1723            /**Validate fields/state.
1724             * Called in the constructor and possibly after de-serialising.
1725             * <p>
1726             * Barf if something bad is found.
1727             * (Maybe allow some extra info in debug version.)
1728             */
1729            public void validateObject()
1730                throws InvalidObjectException
1731                {
1732                // Validate author initials...
1733                if(!ExhibitName.validAuthorSyntax(auth))
1734                    { throw new InvalidObjectException("bad object: author initials invalid"); }
1735    
1736                if((name == null) || (name.length() == 0) ||
1737                    !name.equals(name.trim()))
1738                    { throw new InvalidObjectException("bad object: author name missing or too short or non-canonical"); }
1739                for(int i = name.length(); --i >= 0; )
1740                    {
1741                    final char c = name.charAt(i);
1742                    if((c < 32) || (c > 126))
1743                        { throw new InvalidObjectException("bad object: author name contains bad char " + ((int) c)); }
1744                    }
1745    
1746                // Validate URL if not null.
1747                //   * Start with "http://".
1748                //   * Parseable...
1749                if(www != null)
1750                    {
1751                    if(!www.equals(www.trim()))
1752                        { throw new InvalidObjectException("bad object: WWW address non-canonical"); }
1753                    if(!www.startsWith("http://"))
1754                        { throw new InvalidObjectException("bad object: WWW address seems invalid"); }
1755                    try { new URL(www); }
1756                    catch(final MalformedURLException e)
1757                        { throw new InvalidObjectException("bad object: WWW address unparseable"); }
1758                    }
1759    
1760                // Validate email address if not null.
1761                //   * At least 5 characters (eg "a@b.c").
1762                //   * Contains exactly one "@".
1763                if(email != null)
1764                    {
1765                    if(!email.equals(email.trim()))
1766                        { throw new InvalidObjectException("bad object: email address non-canonical"); }
1767                    if(email.length() < 5)
1768                        { throw new InvalidObjectException("bad object: email address too short"); }
1769                    final int firstAt = email.indexOf('@');
1770                    if(firstAt < 1)
1771                        { throw new InvalidObjectException("bad object: email address user portion missing"); }
1772                    if(firstAt != email.lastIndexOf('@'))
1773                        { throw new InvalidObjectException("bad object: email address contains more than one @"); }
1774                    }
1775    
1776                // HTML code must be non-empty (but can be null),
1777                // 7-bit printable ASCII (and CRLF).
1778                if(desc != null)
1779                    {
1780                    if(!desc.equals(desc.trim()))
1781                        { throw new InvalidObjectException("bad object: HTML desc text non-canonical"); }
1782                    if((desc.length() == 0) || (desc.length() > MAX_DESC_CHARACTERS))
1783                        { throw new InvalidObjectException("bad object: HTML desc text length silly"); }
1784                    for(int i = desc.length(); --i >= 0; )
1785                        {
1786                        final char c = desc.charAt(i);
1787                        if(((c < 32) || (c > 126)) &&
1788                           ((c != '\r') && (c != '\n')))
1789                            { throw new InvalidObjectException("bad object: HTML desc text contains bad char " + ((int) c)); }
1790                        }
1791                    }
1792                }
1793            }
1794    
1795        /**Prefix for static "goodness"/popularity weighting values. */
1796        public static final String PPREFIX_POPWT_DETAILS = "pg2k.popweight.";
1797    
1798        /**Goodness subcomponent by author; includes trailing dot. */
1799        public static final String PCOMP_POPWR_BYAUTH = "byAuth.";
1800    
1801        /**Goodness subcomponent by type/extension; includes trailing dot. */
1802        public static final String PCOMP_POPWR_BYTYPE = "byType.";
1803    
1804        /**Goodness subcomponent by attribute; includes trailing dot. */
1805        public static final String PCOMP_POPWR_BYATTR = "byAttr.";
1806    
1807        /**The maximum number of static "goodness"/popularity database entries. */
1808        public static final int MAX_POPWT_ENTRIES = 256;
1809    
1810        /**The maximum allowed "goodness"/popularity weight (maximally good); strictly negative. */
1811        public static final byte MAX_POPWT_VAL = 100;
1812    
1813        /**The minimum allowed "goodness"/popularity weight (maximally bad); strictly negative. */
1814        public static final byte MIN_POPWT_VAL = -MAX_POPWT_VAL;
1815    
1816        /**Map from String token to Byte ([-100,+100]) weighting.
1817         * We cheat and assume the that sets of author, attribute words and extensions
1818         * are disjoint, and so we keep all the mappings in a single table!
1819         */
1820        private /* final */ Map<String,Byte> popWeights;
1821    
1822        /**Get weighting by author in range ([-100,+100]); null if no weighting for specified author.
1823         * Author initials specified must be syntactically valid.
1824         * <p>
1825         * Note that this should be treated as if a value of +1.0
1826         * with a correlation of the returned value divided by MAX_POPWT_VAL.
1827         */
1828        public synchronized Byte getPopWeightForAuth(final CharSequence auth)
1829            {
1830            if(!ExhibitName.validAuthorSyntax(auth))
1831                { throw new IllegalArgumentException(); }
1832            final Map<String,Byte> p = popWeights;
1833            if(p == null) { return(null); }
1834            return(p.get(auth.toString()));
1835            }
1836    
1837        /**Get weighting by type/extension in range ([-100,+100]); null if no weighting for specified type.
1838         * Extension/type specified must be syntactically valid and known to the system.
1839         * <p>
1840         * Note that this should be treated as if a value of +1.0
1841         * with a correlation of the returned value divided by MAX_POPWT_VAL.
1842         */
1843        public synchronized Byte getPopWeightForType(final CharSequence type)
1844            {
1845            if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
1846                { throw new IllegalArgumentException(); }
1847            final Map<String,Byte> p = popWeights;
1848            if(p == null) { return(null); }
1849            return(p.get(type.toString()));
1850            }
1851    
1852        /**Get weighting by attribute word in range ([-100,+100]); null if no weighting for specified attribute.
1853         * Attribute word specified must be syntactically valid and known to the system.
1854         * <p>
1855         * Note that this should be treated as if a value of +1.0
1856         * with a correlation of the returned value divided by MAX_POPWT_VAL.
1857         */
1858        public synchronized Byte getPopWeightForAttr(final String attrWord)
1859            {
1860            if(!ExhibitName.validAttributeWord(attrWord))
1861                { throw new IllegalArgumentException(); }
1862            final Map<String,Byte> p = popWeights;
1863            if(p == null) { return(null); }
1864            return(p.get(attrWord));
1865            }
1866    
1867    
1868        /**Name of Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1869        public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER = "pg2k.websvr.ex.hotlinklimiter";
1870        /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1871        private final byte WEBSVR_EX_HOTLINK_LIMITER;
1872        /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads).
1873         * Constrained to the range approx 0 to 255, default 0.
1874         * <p>
1875         * This limit is too prevent too much (lazy or bandwidth-theft) direct hot-linking
1876         * to exhibits (and/or thumbnails) by external Web sites.
1877         * <p>
1878         * There <em>are</em> bona fide reasons for some direct hotlinks, eg:
1879         * <ul>
1880         * <li>From some image search engines.
1881         * <li>From small Web sites and/or from contributors' sites.
1882         * </ul>
1883         */
1884        public final int getWEBSVR_EX_HOTLINK_LIMITER()
1885            { return(WEBSVR_EX_HOTLINK_LIMITER & 0xff); }
1886    
1887        /**Name of Web server hotlinked-download-limiter divert graphic URL value. */
1888        public static final String PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL = "pg2k.websvr.ex.hotlinklimiter.altURL";
1889        /**Web server hotlinked-download-limiter divert graphic URL value. */
1890        private final String WEBSVR_EX_HOTLINK_DIVERT_URL;
1891        /**Web server hotlinked-download-limiter value divert graphic URL value, or null if none.
1892         * Absolute URL of an alternate graphic to divert a user's brower to
1893         * when they have been hotlinked to one of our exhibits,
1894         * or null if the user is simply to get a 5XX error.
1895         * <p>
1896         * Preferably a small image with a long cache time.
1897         */
1898        public final String getWEBSVR_EX_HOTLINK_DIVERT_URL()
1899            { return(WEBSVR_EX_HOTLINK_DIVERT_URL); }
1900    
1901        /**Maximum number of allow/disallow hotlink hosts that may be specified. */
1902        public static final int MAX_ALLOW_DISALLOW_HOTLINK_HOSTS = 64;
1903    
1904        /**Name of Web server hotlinked-download-host-allow set. */
1905        public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW = "pg2k.websvr.ex.hotlinklimiter.allowhosts";
1906    
1907        /**Name of Web server hotlinked-download-host-disallow set. */
1908        public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW = "pg2k.websvr.ex.hotlinklimiter.disallowhosts";
1909    
1910        /**Immutable Set of "allow" hotlink hosts (normalised host names); can be null for no such hosts. */
1911        private /* final */ Set<String> hotLinkAllowHosts;
1912    
1913        /**Immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1914        private final java.util.regex.Pattern hotLinkAllowHostsRegex;
1915    
1916        /**Immutable Set of "disallow" hotlink hosts (normalised/stripped/lower-cased host names); can be null for no such hosts. */
1917        private /* final */ Set<String> hotLinkDisallowHosts;
1918    
1919        /**Immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1920        private final java.util.regex.Pattern hotLinkDisallowHostsRegex;
1921    
1922        /**Get immutable Set of "allow" hotlink hosts (normalised host names); never null. */
1923        public final Set<String> getHotLinkAllowHosts()
1924            {
1925            final Set<String> r = hotLinkAllowHosts;
1926            if(r == null)
1927                {
1928                final Set<String> noHosts = Collections.emptySet();
1929                return(noHosts);
1930                }
1931            return(r);
1932            }
1933    
1934        /**Get immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1935        public final Pattern getHotLinkAllowHostsRegex()
1936            {
1937            return(hotLinkAllowHostsRegex);
1938            }
1939    
1940        /**Get immutable Set of "disallow" hotlink hosts (normalised host names); never null. */
1941        public final Set<String> getHotLinkDisallowHosts()
1942            {
1943            final Set<String> r = hotLinkDisallowHosts;
1944            if(r == null)
1945                {
1946                final Set<String> noHosts = Collections.emptySet();
1947                return(noHosts);
1948                }
1949            return(r);
1950            }
1951    
1952        /**Get immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1953        public final Pattern getHotLinkDisallowHostsRegex()
1954            {
1955            return(hotLinkDisallowHostsRegex);
1956            }
1957    
1958        /**Returns true if this may be a regex (ie contains non-DNS-safe chars).
1959         * If a user-specified hostname contains characters
1960         * other than [-a-zA-Z0-9.]
1961         * then we assume that it may be intended as a regex.
1962         *
1963         * @param n  non-null non-empty user-specified non-normalised hostname
1964         */
1965        private static boolean _mayBeRegexHostname(final String n)
1966            {
1967            assert((n != null) && (n.length() != 0));
1968            for(int i = n.length(); --i >= 0; )
1969                {
1970                final char c = n.charAt(i);
1971                if((c >= 'a') && (c <= 'z')) { continue; }
1972                if((c >= 'A') && (c <= 'Z')) { continue; }
1973                if((c >= '0') && (c <= '9')) { continue; }
1974                if((c == '.') || (c == '-')) { continue; }
1975                return(true); // Not a plain DNS-safe name.
1976                }
1977            return(false); // Seems to be a plain name.
1978            }
1979    
1980        /**Make single compiled pattern from all regex-match hostname expressions; null if no such expressions.
1981         * The pattern will match if any of the sub-patterns match.
1982         * <p>
1983         * This will whinge about and drop any indiviual non-compilable patterns.
1984         * No whingeing is done, however, if the passed log is null!
1985         */
1986        private static Pattern _makeResidualRegexHostMatch(final Set<String> rawNames,
1987                                                           final SimpleLoggerIF logger)
1988            {
1989            if(rawNames == null) { return(null); }
1990    
1991            final StringBuilder compoundExpr = new StringBuilder();
1992    
1993            for(final String rawName : rawNames)
1994                {
1995                try
1996                    {
1997                    // Ignore plain host names that are not regexes...
1998                    if(!_mayBeRegexHostname(rawName)) { continue; }
1999    
2000                    // See if this expression can be compiled, ie is valid...
2001                    try { Pattern.compile(rawName, Pattern.CASE_INSENSITIVE); }
2002                    catch(final Exception e)
2003                        {
2004                        if(logger != null)
2005                            { logger.log("WARNING: GenProps: ignoring unusable/bad hostname regex: `"+rawName+"'"); }
2006                        continue; // Skip this unusable pattern.
2007                        }
2008    
2009                    // Append this pattern to the final set...
2010                    if(compoundExpr.length() != 0) { compoundExpr.append('|'); }
2011                    compoundExpr.append('(').append(rawName).append(')');
2012                    }
2013                catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2014                }
2015    
2016            // If no regexes then return null.
2017            if(compoundExpr.length() == 0) { return(null); }
2018    
2019            // Return compiled expression...
2020            return(Pattern.compile(compoundExpr.toString(), Pattern.CASE_INSENSITIVE));
2021            }
2022    
2023        /**Normalise Set of normalised host names; result is immutable and not empty, or is null.
2024         * Normalises the set of names provided,
2025         * discarding any that seem to be invalid,
2026         * and stopping when/if we reach the size limit specified.
2027         * <p>
2028         * This skips any that seem to be regexes.
2029         *
2030         * @param rawNames  set of raw host names; if null then null is returned
2031         * @param maxSize  maximum size of result set; strictly positive
2032         */
2033        private static Set<String> _normaliseHostList(final Set<String> rawNames,
2034                                                      final int maxSize)
2035            {
2036            if((maxSize < 1))
2037                { throw new IllegalArgumentException(); }
2038    
2039            if(rawNames == null) { return(null); }
2040    
2041            final Set<String> rawResult = new HashSet<String>(1 + 2*Math.min(maxSize, rawNames.size()));
2042    
2043            for(final String rawName : rawNames)
2044                {
2045                try
2046                    {
2047                    if(_mayBeRegexHostname(rawName)) { continue; }
2048                    final String normName = MemoryTools.intern(HostUtils.normaliseVirtualHostName(rawName));
2049                    rawResult.add(normName); // Add to the result...
2050                    if(rawResult.size() >= maxSize) { break; /* Stop when we have enough. */ }
2051                    }
2052                catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2053                }
2054    
2055            // Return null rather than an empty set.
2056            if(rawResult.size() == 0) { return(null); }
2057    
2058            // Make sure that the result is immutable.
2059            return(Collections.unmodifiableSet(rawResult));
2060            }
2061    
2062        /**Parse set of (space-separated) hostnames; returns null for an empty list.
2063         * This does not validate or normalise the names supplied.
2064         *
2065         * @param hostList  space-separate list of hostnames (or null for none)
2066         */
2067        private Set<String> _parseHostList(final String hostList)
2068            {
2069            if((hostList == null) || (hostList.length() == 0)) { return(null); }
2070    
2071            final StringTokenizer st = new StringTokenizer(hostList);
2072            if(st.countTokens() == 0) { return(null); }
2073    
2074            final Set<String> result = new HashSet<String>(1 + 2*st.countTokens());
2075            while(st.hasMoreTokens())
2076                { result.add(st.nextToken()); }
2077    
2078            return(result);
2079            }
2080    
2081    
2082        /**Maximum number of DNSBLs that may be specified. */
2083        public static final int MAX_DNSBLS = 8;
2084    
2085        /**Name of Web server DNSRBLs set. */
2086        public static final String PNAME_WEBSVR_DNSBLs = "org.hd.d.pg2k.DNSBLs";
2087    
2088        /**Immutable Set of DNS BLs (normalised DNS zone names); can be null for no such hosts. */
2089        private /* final */ Set<String> DNSBLs;
2090    
2091        /**Get immutable Set of DNSBLs (normalised DNS zone names); never null. */
2092        public final Set<String> getDNSBLs()
2093            {
2094            final Set<String> r = DNSBLs;
2095            if(r == null)
2096                {
2097                // Return empty set if none.
2098                final Set<String> noHosts = Collections.emptySet();
2099                return(noHosts);
2100                }
2101            return(r);
2102            }
2103        }