001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.svrCore.props;
030    
031    import java.io.IOException;
032    import java.io.InvalidObjectException;
033    import java.io.ObjectInputStream;
034    import java.io.ObjectInputValidation;
035    import java.io.Serializable;
036    import java.net.InetAddress;
037    import java.net.MalformedURLException;
038    import java.net.URI;
039    import java.net.URL;
040    import java.util.ArrayList;
041    import java.util.Arrays;
042    import java.util.Calendar;
043    import java.util.Collections;
044    import java.util.Date;
045    import java.util.GregorianCalendar;
046    import java.util.HashMap;
047    import java.util.HashSet;
048    import java.util.Iterator;
049    import java.util.List;
050    import java.util.Locale;
051    import java.util.Map;
052    import java.util.Properties;
053    import java.util.Set;
054    import java.util.SortedMap;
055    import java.util.SortedSet;
056    import java.util.StringTokenizer;
057    import java.util.TimeZone;
058    import java.util.TreeMap;
059    import java.util.TreeSet;
060    import java.util.regex.Pattern;
061    import java.util.regex.PatternSyntaxException;
062    
063    import org.hd.d.pg2k.svrCore.CoreConsts;
064    import org.hd.d.pg2k.svrCore.ExhibitName;
065    import org.hd.d.pg2k.svrCore.GenUtils;
066    import org.hd.d.pg2k.svrCore.HostUtils;
067    import org.hd.d.pg2k.svrCore.MemoryTools;
068    import org.hd.d.pg2k.svrCore.Name;
069    import org.hd.d.pg2k.svrCore.Rnd;
070    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
071    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
072    import org.hd.d.pg2k.svrCore.location.GeoUtils;
073    import org.hd.d.pg2k.svrCore.location.GeoUtils.CCTLD;
074    
075    import ORG.hd.d.IsDebug;
076    
077    /**This holds run-time settable properties for the master and mirrors.
078     * It is immutable, and can be persisted or sent over RMI-IIOP.
079     * <p>
080     * All values are accessed through getXXX() methods
081     * (this object can be used as a bean)
082     * to allow us to always constrain values to sensible limits
083     * (and, for example, to patch up up any values that have `gone funny'
084     * in serialization because (say) because the class version changed
085     * and fields were added)
086     * and to allow for local overrides from properties.
087     * This means that some of the getXXXX() methods have to be synchronized.
088     * <p>
089     * We also extensively check object state at construction and
090     * deserialisation; and we try to impose explicit or implicit limits
091     * on the amount of space that an instance of this object can consume,
092     * especially because at a transition between an old version and a new one
093     * there may be multiple instances floating about in memory.
094     * <p>
095     * We don't retain the original raw properties internally, but parse them at
096     * construction time, to do most of the expensive work once if possible,
097     * and to keep the serialised form of the object small.
098     * Defaults are set at parse/constuction time,
099     * whereas limits are imposed at getXXX() time which makes for
100     * a lot of robustness if the object is damaged in transit or if
101     * the class details change slightly.
102     * <p>
103     * The version manufactured with a default contructor has a zero timestamp.
104     */
105    public final class GenProps implements Serializable, ObjectInputValidation
106        {
107        /**Our serialisation version. */
108        private static final long serialVersionUID = 8715644348905699663L;
109    
110        /**Deserialise. */
111        private void readObject(final ObjectInputStream in)
112            throws IOException, ClassNotFoundException
113            {
114            in.defaultReadObject();
115    
116            // Take defensive immutable copy of gen, if present, else make it empty.
117            if(gen == null)
118                { gen = Collections.emptyMap(); }
119            else
120                { gen = Collections.unmodifiableMap(new HashMap<String,String>(gen)); }
121    
122            // Take defensive copy of simpleAds[] if present, and re-sort.
123            if(simpleAds != null)
124                {
125                simpleAds = simpleAds.clone();
126                Arrays.sort(simpleAds);
127                }
128            // Take defensive copy of classifiedAds[] if present.
129            if(classifiedAds != null)
130                { classifiedAds = classifiedAds.clone(); }
131    
132            // Take defensive copy of authDB if present.
133            if(authDB != null)
134                { authDB = Collections.unmodifiableSortedMap(new TreeMap<String, AuthData>(authDB)); }
135    
136            // Take defensive copy of popWeights if present.
137            if(popWeights != null)
138                { popWeights = Collections.unmodifiableSortedMap(new TreeMap<String, Byte>(popWeights)); }
139    
140            // Take defensive copies of hotlink hot/cold sets and DNSBLs,
141            // but renormalise hostnames and truncate to size if need be.
142            if(hotLinkAllowHosts != null)
143                { hotLinkAllowHosts = _normaliseHostList(hotLinkAllowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
144            if(hotLinkDisallowHosts != null)
145                { hotLinkDisallowHosts = _normaliseHostList(hotLinkDisallowHosts, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS); }
146            if(DNSBLs != null)
147                { DNSBLs = _normaliseHostList(DNSBLs, MAX_DNSBLS); }
148    
149    
150            validateObject(); // Validate state immediately.
151            }
152    
153        /**Validate fields/state.
154         * Called in the constructor and possibly after de-serialising.
155         * <p>
156         * Barf if something bad is found.
157         * (Maybe allow some extra info in debug version.)
158         */
159        public void validateObject()
160            throws InvalidObjectException
161            {
162            // Check that all components are sane and safe.
163            if(timestamp < 0)
164                { throw new InvalidObjectException("bad object: timestamp < 0"); }
165    
166            // Verify generic properties.
167            if(gen == null)
168                { throw new InvalidObjectException("bad object: gen == null"); }
169            if(gen.size() > MAX_GEN_PROPS)
170                { throw new InvalidObjectException("bad object: gen too large"); }
171            for(final Object kO : gen.keySet())
172                {
173                if(!(kO instanceof String))
174                    { throw new InvalidObjectException("bad object: gen key not a String"); }
175                final String k = (String) kO;
176                if(!isSafeGenPropValue(k))
177                    { throw new InvalidObjectException("bad object: gen key not valid"); }
178    
179                final Object vO = gen.get(k);
180                if(!(vO instanceof String))
181                    { throw new InvalidObjectException("bad object: gen value not a String"); }
182                final String v = (String) vO;
183                if(!isSafeGenPropValue(v))
184                    { throw new InvalidObjectException("bad object: gen value not valid"); }
185                }
186    
187            // Check that HTML meta-header text is still printable ASCII excluding ".
188            if(!isSafeHTMLMetaHeaderString(WEBSVR_META_KEYWORDS))
189                { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_KEYWORDS); }
190            if(!isSafeHTMLMetaHeaderString(WEBSVR_META_DESCRIPTION))
191                { throw new InvalidObjectException("bad object: unsafe " + PNAME_WEBSVR_META_DESCRIPTION); }
192            // Check that background image name, if set, is syntactically valid.
193            // Check that it is a CharSequence or String.
194            // We also check that it is a JPEG image.
195            if(WEBSVR_BG_IMAGE != null)
196                {
197                final boolean isString = (WEBSVR_BG_IMAGE instanceof String);
198                final boolean isExhibitFull = (!isString) && (WEBSVR_BG_IMAGE instanceof Name.ExhibitFull);
199                if((!isString) && (!isExhibitFull))
200                    { throw new InvalidObjectException("bad object: unsafe type " + WEBSVR_BG_IMAGE.getClass().getName()); }
201                if((!isExhibitFull) && (!ExhibitName.validNameSyntax(WEBSVR_BG_IMAGE)))
202                    { throw new InvalidObjectException("bad object: unsafe name " + PNAME_WEBSVR_BG_IMAGE); }
203                final ExhibitMIME.ExhibitTypeParameters et =
204                    ExhibitMIME.getInputFileType(WEBSVR_BG_IMAGE);
205                if((et == null) || (et.type != ExhibitMIME.ET_JPEG))
206                    { throw new InvalidObjectException("bad object: unsafe exhibit type " + PNAME_WEBSVR_BG_IMAGE); }
207                }
208    
209            // Check authDB for consistency.
210            if(authDB != null)
211                {
212                if((authDB.size() == 0) || (authDB.size() > MAX_AUTH_ENTRIES))
213                    { throw new InvalidObjectException("bad object: bad-length authDB"); }
214    
215                // Check all entries are of the correct type (AuthData).
216                for(final Iterator it = authDB.keySet().iterator(); it.hasNext(); )
217                    {
218                    final Object key = it.next();
219                    if(!(key instanceof String))
220                        { throw new InvalidObjectException("bad object: bad key in authDB"); }
221                    if(!(authDB.get(key) instanceof AuthData))
222                        { throw new InvalidObjectException("bad object: bad value in authDB"); }
223                    }
224                }
225    
226            // Check "goodness"/popularity weights for consistency.
227            if(popWeights != null)
228                {
229                if((popWeights.size() == 0) || (popWeights.size() > MAX_POPWT_ENTRIES))
230                    { throw new InvalidObjectException("bad object: bad-length popWeights"); }
231    
232                // Check all entries are of the correct type (AuthData).
233                for(final Iterator<String> it = popWeights.keySet().iterator(); it.hasNext(); )
234                    {
235                    final String key = it.next();
236                    if(!ExhibitName.validAuthorSyntax(key) &&
237                       !ExhibitName.validAttributeWord(key) &&
238                       (ExhibitMIME.isValidInputExhibitNameExtension(key) == null))
239                        { throw new InvalidObjectException("bad object: bad key in popWeights"); }
240                    final Byte val = popWeights.get(key);
241                    if((val == null) ||
242                       (val.byteValue() < MIN_POPWT_VAL) ||
243                       (val.byteValue() > MAX_POPWT_VAL))
244                        { throw new InvalidObjectException("bad object: bad value in popWeights"); }
245                    }
246                }
247    
248            // Check ads for consistency.
249            // Don't waste space with zero-length simpleAds.
250            if((simpleAds != null) &&
251                ((simpleAds.length == 0) || (simpleAds.length > MAX_SIMPLE_ADS)))
252                { throw new InvalidObjectException("bad object: bad-length simpleAds[]"); }
253            // Ensure no nulls in simpleAds,
254            // and that ads are sorted.
255            if(simpleAds != null)
256                {
257                for(int i = simpleAds.length; --i >= 0; )
258                    {
259                    if(simpleAds[i] == null)
260                        { throw new InvalidObjectException("bad object: simpleAds[] contains null"); }
261                    // Check pair-wise sort order...
262                    // The ordering should be total,
263                    // but we'll live with monotonic.
264                    if(i > 0)
265                        {
266                        if(simpleAds[i-1].compareTo(simpleAds[i]) > 0)
267                            { throw new InvalidObjectException("bad object: simpleAds[] not ordered"); }
268                        }
269                    }
270                }
271            // Check correct total for simpleAds.
272            if(totalSimpleAdWeight != _compute_totalSimpleAdWeight(simpleAds))
273                { throw new InvalidObjectException("bad object: totalSimpleAdWeight inconsistent"); }
274            if(totalSimpleAdWeight < 0)
275                { throw new InvalidObjectException("bad object: totalSimpleAdWeight < 0"); }
276            // Don't waste space with zero-length classifiedAds.
277            if((classifiedAds != null) &&
278                ((classifiedAds.length == 0) || (classifiedAds.length > MAX_CLASSIFIED_ADS)))
279                { throw new InvalidObjectException("bad object: bad-length classifiedAds[]"); }
280            // Ensure no nulls in classifiedAds,
281            // TODO: and that ads are unique.
282            if(classifiedAds != null)
283                {
284                for(int i = classifiedAds.length; --i >= 0; )
285                    {
286                    if(classifiedAds[i] == null)
287                        { throw new InvalidObjectException("bad object: classifiedAds[] contains null"); }
288                    }
289                }
290            // There must be no ads held internally if ads are switched off.
291            if(!WEBSVR_SIMPLEAD_ALLOW &&
292                ((totalSimpleAdWeight != 0) || (simpleAds != null) || (classifiedAds != null)))
293                { throw new InvalidObjectException("bad object: simple ads loaded though disabled"); }
294    
295            // Verify hotlinker diversion URL is valid (http) URL.
296            if(WEBSVR_EX_HOTLINK_DIVERT_URL != null)
297                {
298                try {
299                    if(!"http".equals((new URL(WEBSVR_EX_HOTLINK_DIVERT_URL)).getProtocol()))
300                        { throw new InvalidObjectException("bad object: invalid non-HTTP hotlink divert URL"); }
301                    }
302                catch(final MalformedURLException e)
303                    {
304                    throw new InvalidObjectException("bad object: invalid unparseable hotlink divert URL");
305                    }
306                }
307    
308            // FIXME: Should check content to be normalised host names too...
309            if(hotLinkAllowHosts != null)
310                {
311                if(hotLinkAllowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
312                    { throw new InvalidObjectException("bad object: too many allow hosts"); }
313                }
314            if(hotLinkDisallowHosts != null)
315                {
316                if(hotLinkDisallowHosts.size() > MAX_ALLOW_DISALLOW_HOTLINK_HOSTS)
317                    { throw new InvalidObjectException("bad object: too many disallow hosts"); }
318                }
319            if(DNSBLs != null)
320                {
321                if(DNSBLs.size() > MAX_DNSBLS)
322                    { throw new InvalidObjectException("bad object: too many DNSBLs"); }
323                }
324            }
325    
326        /**Check if String is save as generic property key or value.
327         * Null and zero-length values are not permitted.
328         * <p>
329         * Only ASCII values in the range 32 to 126 are allowed.
330         *
331         * @param s  the putative generic property key or value
332         * @return true iff the argument is a non-null, non-empty, not-too-long
333         *     pure-ASCII value
334         */
335        private static final boolean isSafeGenPropValue(final String s)
336            {
337            // Check basic constraints.
338            if(s == null) { return(false); }
339            final int length = s.length();
340            if(length == 0) { return(false); }
341            if(length > MAX_GEN_LEN) { return(false); }
342    
343            for(int i = length; --i >= 0; )
344                {
345                final char c = s.charAt(i);
346                if((c < 32) || (c > 126)) { return(false); }
347                }
348    
349            return(true); // Benign string.
350            }
351    
352        /**Check if String is safe to use as HTML meta-header; return false if not.
353         * Null and empty string are regarded as OK.
354         */
355        private static final boolean isSafeHTMLMetaHeaderString(final String s)
356            {
357            if(s == null) { return(true); }
358            for(int i = s.length(); --i >= 0; )
359                {
360                if(!isSafeHTMLMetaHeaderChar(s.charAt(i)))
361                    { return(false); }
362                }
363            return(true); // All seems OK!
364            }
365    
366    //    /**Flags for User-Agent pattern matching checking for mobile phones. */
367    //    private static final int REGEX_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.CANON_EQ;
368    //
369    //    /**Regex expression for one character safe for an HTML meta-header.
370    //     * Basically such a character must be printable ASCII, and
371    //     * one of:
372    //     * <ul>
373    //     * <li>a letter
374    //     * <li>a digit
375    //     * <li>a space
376    //     * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
377    //     * </ul>
378    //     */
379    //    private static final String SAFE_HTML_META_HEADER_CHAR = "[a-zA-Z0-9+?!()\\[\\]{}/_.;:,@-]";
380    //
381    //    /**Single 'safe meta header character' match. */
382    //    private static final Pattern SAFE_HTML_META_HEADER_CHAR_PATTERN = Pattern.compile(SAFE_HTML_META_HEADER_CHAR, REGEX_FLAGS);
383    
384        /**Check that character is safe for an HTML meta-header; return false if not.
385         * Basically such a character must be printable ASCII, and
386         * one of:
387         * <ul>
388         * <li>a letter
389         * <li>a digit
390         * <li>a space
391         * <li>one of the punctuation characters -+?!()[]{}/_.;:,@
392         * </ul>
393         */
394        private static final boolean isSafeHTMLMetaHeaderChar(final char c)
395            {
396            switch(c)
397                {
398                // Letters...
399                case 'a': case 'A':
400                case 'b': case 'B':
401                case 'c': case 'C':
402                case 'd': case 'D':
403                case 'e': case 'E':
404                case 'f': case 'F':
405                case 'g': case 'G':
406                case 'h': case 'H':
407                case 'i': case 'I':
408                case 'j': case 'J':
409                case 'k': case 'K':
410                case 'l': case 'L':
411                case 'm': case 'M':
412                case 'n': case 'N':
413                case 'o': case 'O':
414                case 'p': case 'P':
415                case 'q': case 'Q':
416                case 'r': case 'R':
417                case 's': case 'S':
418                case 't': case 'T':
419                case 'u': case 'U':
420                case 'v': case 'V':
421                case 'w': case 'W':
422                case 'x': case 'X':
423                case 'y': case 'Y':
424                case 'z': case 'Z':
425                // Digits.
426                case '0': case '1': case '2': case '3': case '4':
427                case '5': case '6': case '7': case '8': case '9':
428                // Safe punctuation and space.
429                case ' ':
430                case ',': case '.': case ';': case ':':
431                case '-': case '+':
432                case '!': case '?':
433                case '(': case ')':
434                case '[': case ']':
435                case '{': case '}':
436                case '/': case '_': case '@':
437                    { return(true); } // OK!
438    
439                default:
440                    { return(false); } // Not safe.
441                }
442            }
443    
444        /**Construct a default, zero-timestamp set of generic properties.
445         */
446        public GenProps()
447            { this(new Properties(), 0, null); }
448    
449        /**Construct a new, immutable, properties set.
450         * The properties must be non-null
451         * and the timestamp must be non-negative.
452         * <p>
453         * Minor problems with the properties themselves will
454         * be silently ignored or logged, and defaults substituted
455         * for broken or missing values.
456         */
457        public GenProps(final Properties props,
458                        final long _timestamp)
459            { this(props, _timestamp, GenUtils.systemErrLogger); }
460    
461        /**Construct a new, immutable, properties set.
462         * The properties must be non-null
463         * and the timestamp must be non-negative.
464         * <p>
465         * Minor problems with the properties themselves will
466         * be silently ignored or logged, and defaults substituted
467         * for broken or missing values.
468         *
469         * @param logger if non-null than non-fatal problems will be logged here
470         */
471        public GenProps(final Properties props,
472                        final long _timestamp,
473                        final SimpleLoggerIF logger)
474            {
475            if((props == null) || (_timestamp < 0))
476                { throw new IllegalArgumentException(); }
477            timestamp = _timestamp;
478    
479            // Parse properties.
480            // Set up some working variables to help with parsing.
481            int iTmp;
482    //        long lTmp;
483            String sTmp;
484            Boolean bTmp;
485    
486            // Extract the generic key/value properties, if any.
487            final Map<String,String> g = new HashMap<String, String>();
488            for(final Object keyO : props.keySet())
489                {
490                if(!(keyO instanceof String))
491                    { throw new IllegalArgumentException("bad properties key: not String"); }
492                final String key = (String) keyO;
493    
494                if(!key.startsWith(GEN_PREFIX)) { continue; }
495    
496                final String gKey = key.substring(GEN_PREFIX.length());
497                final String gVal = props.getProperty(key);
498    
499                // Validate the values later.
500                g.put(gKey, gVal);
501                }
502            // If no generic keys then save space with shared empty Map.
503            if(g.size() == 0)
504                { gen = Collections.emptyMap(); }
505            else
506                { gen = Collections.unmodifiableMap(g); }
507    
508            iTmp = -1;
509            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS, "120000"), 10); }
510            catch(final Exception e) { } // Ignore errors.
511            WEBSVR_MIN_EX_IMATTR_RECHECK_MS = iTmp;
512    
513            iTmp = -1;
514            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_SYSPROPS_RECHECK_MS, "10013"), 10); }
515            catch(final Exception e) { } // Ignore errors.
516            WEBSVR_SYSPROPS_RECHECK_MS = iTmp;
517    
518            iTmp = -1;
519            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES, "2123456"), 10); }
520            catch(final Exception e) { } // Ignore errors.
521            WEBSVR_MAX_CACHEABLE_EX_BYTES = iTmp;
522    
523            iTmp = -1;
524            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_BW_LIMITER, "10"), 10); }
525            catch(final Exception e) { } // Ignore errors.
526            WEBSVR_BW_LIMITER = iTmp;
527    
528            iTmp = -1;
529            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_TN_CACHEPC, "10"), 10); }
530            catch(final Exception e) { } // Ignore errors.
531            WEBSVR_TN_CACHEPC = iTmp;
532    
533            iTmp = -1;
534            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_MAX_EX_BYTES, "1123456789"), 10); }
535            catch(final Exception e) { } // Ignore errors.
536            WEBSVR_MAX_EX_BYTES = iTmp;
537    
538            // We canonicalise this keyword list to reduce the
539            // space it consumes (especially after compression), by:
540            //   * Converting to lower-case (helps compression; helps drop dups).
541            //   * Tokenize with "," and trim whitespace around each token.
542            //   * We could eliminate duplicate whitespace within a single token too.
543            //   * Eliminate duplicates.
544            //   * Sort (to help compression).
545            //   * Glue back together again without whitespace.
546            //   * Eliminate any quote (" or ') characters.
547            // We object strongly to non-ASCII text (we remove it!).
548            sTmp = null;
549            try {
550                sTmp = props.getProperty(PNAME_WEBSVR_META_KEYWORDS, "").trim().toLowerCase();
551                final SortedSet<String> words = new TreeSet<String>();
552                final StringTokenizer st = new StringTokenizer(sTmp, ",");
553                while(st.hasMoreTokens())
554                    {
555                    // Take each token,
556                    // zap any non-ASCII printable (and unsafe) chars,
557                    // and trim of any obvious excess whitespace.
558                    String s = st.nextToken();
559                    for(int i = s.length(); --i >= 0; )
560                        {
561                        final char c = s.charAt(i);
562                        if(!isSafeHTMLMetaHeaderChar(c)) { s = s.replace(c, ' '); }
563                        }
564                    s = s.trim();
565                    words.add(s);
566                    }
567                final StringBuilder sb = new StringBuilder(sTmp.length());
568                for(final Iterator<String> it = words.iterator(); it.hasNext(); )
569                    {
570                    final String s = it.next();
571                    sb.append(s);
572                    if(it.hasNext()) { sb.append(','); }
573                    }
574                sTmp = sb.toString();
575                }
576            catch(final Exception e) { } // Ignore errors.
577            WEBSVR_META_KEYWORDS = sTmp;
578    
579            sTmp = null;
580            try {
581                sTmp = props.getProperty(PNAME_WEBSVR_META_DESCRIPTION, "");
582                // Discard any unsafe characters for a meta header.
583                for(int i = sTmp.length(); --i >= 0; )
584                    {
585                    final char c = sTmp.charAt(i);
586                    if(!isSafeHTMLMetaHeaderChar(c)) { sTmp = sTmp.replace(c, ' '); }
587                    }
588                sTmp = sTmp.trim();
589                }
590            catch(final Exception e) { } // Ignore errors.
591            WEBSVR_META_DESCRIPTION = sTmp;
592    
593            sTmp = null;
594            try {
595                sTmp = props.getProperty(PNAME_WEBSVR_BG_IMAGE);
596    
597                // Drop if not a valid-syntax JPEG exhibit name.
598                // We *do not* attempt to check that it exists here.
599                if(!ExhibitName.validNameSyntax(sTmp))
600                    { sTmp = null; }
601                else
602                    {
603                    final ExhibitMIME.ExhibitTypeParameters tp =
604                        ExhibitMIME.getInputFileType(sTmp);
605                    if((tp == null) || (tp.type != ExhibitMIME.ET_JPEG))
606                        { sTmp = null; }
607                    }
608    
609                }
610            catch(final Exception e) { } // Ignore errors.
611            WEBSVR_BG_IMAGE = (sTmp == null) ? null : Name.ExhibitFull.create(sTmp);
612    
613            bTmp = null;
614            try { bTmp = Boolean.valueOf(props.getProperty(PNAME_WEBSVR_SIMPLEAD_ALLOW)); }
615            catch(final Exception e) { } // Ignore errors.
616            WEBSVR_SIMPLEAD_ALLOW = (bTmp == null) ? true : bTmp.booleanValue();
617            SimpleAd[] simpleAdsTmp = null;
618            if(WEBSVR_SIMPLEAD_ALLOW)
619                {
620                final List<SimpleAd> v = new ArrayList<SimpleAd>(); // Collect ads here...
621                // Only even look for simple ads if they are allowed.
622                for(int N = 1; N < Integer.MAX_VALUE; ++N)
623                    {
624                    final String prefix = PNAME_WEBSVR_SIMPLEAD_PREFIX + N + ".";
625                    final String rawCode = props.getProperty(prefix + "HTML");
626                    if(rawCode == null) { break; } // End of ads.
627    
628                    final String code = rawCode.trim();
629                    int weight = 100; // Default;
630                    try { weight = Integer.parseInt(props.getProperty(prefix + "wt"), 10); }
631                    catch(final Exception e) { } // Ignore errors.
632                    if(weight <= 0) { continue; } // Temporarily disabled; skip.
633    
634                    // Attempt to construct new ad item and save it,
635                    // but quietly skip any difficulties.
636                    try { v.add(MemoryTools.intern(new SimpleAd(weight, code))); }
637                    catch(final Exception e) { } // Ignore errors.
638                    }
639                // If we found some ads, extract and sort them, ready to store.
640                if(v.size() > 0)
641                    {
642                    simpleAdsTmp = new SimpleAd[v.size()];
643                    v.toArray(simpleAdsTmp);
644                    Arrays.sort(simpleAdsTmp);
645                    }
646                }
647            simpleAds = simpleAdsTmp; // Store simple ads, if any.
648            totalSimpleAdWeight = _compute_totalSimpleAdWeight(simpleAds);
649    
650            // Parse classified ads...
651            ClassifiedAd[] classifiedAdsTmp = null;
652            // Disable classified ads when we disable simple ads.
653            if(WEBSVR_SIMPLEAD_ALLOW)
654                {
655                final Set<ClassifiedAd> v = new HashSet<ClassifiedAd>(); // Collect ads here...
656                // Trawl through all properties looking for the mandatory regex value(s).
657                for(final Object keyO: props.keySet())
658                    {
659                    // Skip keys nothing to do with classified ads.
660                    assert(keyO instanceof String);
661                    final String regexKey = (String) keyO;
662                    if(!regexKey.startsWith(PNAME_WEBSVR_CLASSIFIEDAD_PREFIX)) { continue; }
663                    // Skip everything but the URI regex key initially.
664                    if(!regexKey.endsWith(".URIregex")) { continue; }
665                    // Compute the stub up to and including the final '.'
666                    // to allow us to verify other keys.
667                    final int lastDot = regexKey.lastIndexOf('.');
668                    final String stub = regexKey.substring(0, lastDot+1);
669                    // Try to create an instance for the classified ad found,
670                    // logging any problems from invalid data
671                    // (and dropping/ignoring any such 'bad' ad).
672                   try {
673                       // We interpret/parse start/end UTC dates (if any) here.
674                       final long start = parseUTCyyyymmdd(props.getProperty(stub+"start"));
675                       final long end = parseUTCyyyymmdd(props.getProperty(stub+"end"));
676                       // We omit ads whose end date has already expired by the timestamp
677                       // as a minor system-wide optimisation.
678                       if((end != 0) && (end < _timestamp)) { continue; }
679                       v.add(new ClassifiedAd(props.getProperty(stub+"HTML"),
680                                               props.getProperty(regexKey),
681                                               start,
682                                               end,
683                                               props.getProperty(stub+"lang"),
684                                               props.getProperty(stub+"countries")));
685    if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ad loaded: "+stub); }
686                        }
687                    catch(final Exception e)
688                        {
689                        logger.log("ERROR: GenProps: unable to load classified ad "+stub+"... "+e.getMessage());
690    if(IsDebug.isDebug) { System.err.println("ERROR: GenProps: classified ad BROKEN: "+stub); }
691                        }
692                    }
693    
694                // If we found some ads, extract and store.
695                if(v.size() > 0)
696                    {
697                    classifiedAdsTmp = new ClassifiedAd[v.size()];
698                    v.toArray(classifiedAdsTmp);
699    if(IsDebug.isDebug) { System.out.println("INFO: GenProps: classified ads loaded: "+v.size()); }
700                    }
701                }
702            classifiedAds = classifiedAdsTmp; // Store classified ads, if any.
703    
704    
705            // Extract author details.
706            // We iterate over all the properties, looking for suitable candidates.
707            final TreeMap<String,AuthData> putativeAuthDB = new TreeMap<String, AuthData>();
708            for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
709                {
710                try
711                    {
712                    final String key = (String) it.next();
713                    if(!key.startsWith(PPREFIX_AUTH_DETAILS))
714                        { continue; /* Not the right prefix for authDB. */ }
715    
716                    // We have a key that could be a valid author entry...
717                    final String auth = key.substring(PPREFIX_AUTH_DETAILS.length());
718                    // TODO: add warning for invalid entries?
719                    if(!ExhibitName.validAuthorSyntax(auth))
720                        { continue; /* Not a valid author initials string. */ }
721    
722                    // TODO: add warning for duplicate entries?
723                    putativeAuthDB.put(MemoryTools.intern(auth),
724                        MemoryTools.intern(new AuthData(auth, props.getProperty(key))));
725                    }
726                catch(final Exception e)
727                    {
728                    // Absorb errors...
729                    e.printStackTrace();
730                    }
731                }
732            // Save immutable version of our authDB.
733            // If empty, save space by storing a null instead.
734            authDB = (putativeAuthDB.size() == 0) ? null :
735                Collections.unmodifiableSortedMap(putativeAuthDB);
736    
737    
738            // Extract supplied static "goodness"/popularity weightings, if any.
739            // We iterate over all the properties, looking for suitable candidiates.
740            final HashMap<String,Byte> putativePopWeights = new HashMap<String, Byte>();
741            for(final Iterator it = props.keySet().iterator(); it.hasNext(); )
742                {
743                try
744                    {
745                    final String key = (String) it.next();
746                    if(!key.startsWith(PPREFIX_POPWT_DETAILS))
747                        { continue; /* Not the right prefix for popWeight. */ }
748    
749                    final String aWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYAUTH;
750                    if(key.startsWith(aWPrefix))
751                        {
752                        // We have a key that could be a valid author weighting entry...
753                        final String auth = key.substring(aWPrefix.length());
754                        // TODO: add warning for invalid entries?
755                        if(!ExhibitName.validAuthorSyntax(auth))
756                            { continue; /* Not a valid author initials string. */ }
757                        final Byte v = Byte.decode(props.getProperty(key));
758                        if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
759                            { continue; /* Not a valid weight. */ }
760                        putativePopWeights.put(MemoryTools.intern(auth),
761                                               MemoryTools.intern(v));
762                        continue;
763                        }
764    
765                    final String tWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYTYPE;
766                    if(key.startsWith(tWPrefix))
767                        {
768                        // We have a key that could be a valid type/extension weighting entry...
769                        final String type = key.substring(tWPrefix.length());
770                        // TODO: add warning for invalid entries?
771                        if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
772                            { continue; /* Not a valid type string. */ }
773                        final Byte v = Byte.decode(props.getProperty(key));
774                        if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
775                            { continue; /* Not a valid weight. */ }
776                        putativePopWeights.put(MemoryTools.intern(type),
777                                               MemoryTools.intern(v));
778                        continue;
779                        }
780    
781                    final String attrWPrefix = PPREFIX_POPWT_DETAILS + PCOMP_POPWR_BYATTR;
782                    if(key.startsWith(attrWPrefix))
783                        {
784                        // We have a key that could be a valid attribute-word weighting entry...
785                        final String attrWord = key.substring(attrWPrefix.length());
786                        // TODO: add warning for invalid entries?
787                        if(!ExhibitName.validAttributeWord(attrWord))
788                            { continue; /* Not a valid attribute word string. */ }
789                        final Byte v = Byte.decode(props.getProperty(key));
790                        if((v < MIN_POPWT_VAL) || (v > MAX_POPWT_VAL))
791                            { continue; /* Not a valid weight. */ }
792                        putativePopWeights.put(MemoryTools.intern(attrWord),
793                                               MemoryTools.intern(v));
794                        continue;
795                        }
796                    }
797                catch(final Exception e)
798                    {
799                    // Absorb errors...
800                    e.printStackTrace();
801                    }
802                }
803            // Save immutable version of our authDB.
804            // If empty, save space by storing a null instead.
805            popWeights = (putativePopWeights.size() == 0) ? null :
806                Collections.unmodifiableMap(putativePopWeights);
807    
808    
809            iTmp = -1;
810            try { iTmp = Integer.parseInt(props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER, "0"), 10); }
811            catch(final Exception e) { } // Ignore errors.
812            WEBSVR_EX_HOTLINK_LIMITER = (byte) Math.max(0, Math.min(255, iTmp));
813    
814            // Get alternative URL to divert unwanted exhibit hotlinks to, if any.
815            // Invalid URLs are ignored and valid ones may be canonicalised.
816            sTmp = null;
817            try
818                {
819                final String p = props.getProperty(PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL);
820                if((p != null) && (p.startsWith("http:")))
821                    { sTmp = (new URL(p)).toExternalForm(); } // Parse and canonicalise.
822                }
823            catch(final MalformedURLException e) { } // Ignore malformed URLs.
824            WEBSVR_EX_HOTLINK_DIVERT_URL = sTmp;
825    
826            // Parse lists of hotlinker allow/deny hosts, DNSBLs...
827            final Set<String> rawAllowNames = _parseHostList(
828                            props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW));
829            hotLinkAllowHosts = _normaliseHostList(rawAllowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
830            hotLinkAllowHostsRegex = _makeResidualRegexHostMatch(rawAllowNames, logger);
831            final Set<String> rawDisallowNames = _parseHostList(
832                            props.getProperty(PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW));
833            hotLinkDisallowHosts = _normaliseHostList(rawDisallowNames, MAX_ALLOW_DISALLOW_HOTLINK_HOSTS);
834            hotLinkDisallowHostsRegex = _makeResidualRegexHostMatch(rawDisallowNames, logger);
835            DNSBLs = _normaliseHostList(_parseHostList(
836                    props.getProperty(PNAME_WEBSVR_DNSBLs)), MAX_DNSBLS);
837    
838    
839            // Verify complete object state.
840            try { validateObject(); }
841            catch(final InvalidObjectException e)
842                { throw new IllegalArgumentException(e.getMessage()); }
843            }
844    
845    
846        /**Parse UTC YYYYMMDD date, or zero if absent (null). */
847        private static long parseUTCyyyymmdd(final String property)
848            {
849            if(property == null) { return(0); }
850    //        property = property.trim();
851            if(property.length() != 8) { throw new IllegalArgumentException("format required: YYYYMMDD"); }
852    
853            // Get GMT/UTC timezone.
854            final TimeZone tz = TimeZone.getTimeZone("GMT");
855            final GregorianCalendar gc = new GregorianCalendar(tz);
856            // Set YMD from property value.
857            gc.set(Calendar.YEAR, Integer.parseInt(property.substring(0, 4), 10));
858            // Not that month is zero-based (January is zero).
859            gc.set(Calendar.MONTH, Integer.parseInt(property.substring(4, 6), 10) - 1);
860            gc.set(Calendar.DAY_OF_MONTH, Integer.parseInt(property.substring(6, 8), 10));
861    
862            // Convert to timestamp and return if sensible...
863            final long t = gc.getTimeInMillis();
864            if(t < CoreConsts.GALLERY_EPOC_START)
865                { throw new IllegalArgumentException("classified ad date too early to be sensible"); }
866            return(t);
867            }
868    
869        /**Timestamp of this properties set; zero for default/empty set. */
870        public final long timestamp;
871    
872    
873        /**Maximum number of general properties. */
874        public static final int MAX_GEN_PROPS = 1024;
875    
876        /**Maximum length of general property key or value (chars). */
877        public static final int MAX_GEN_LEN = 1024;
878    
879        /**Prefix for general properties in file (not needed on lookup). */
880        public static final String GEN_PREFIX = "gen.";
881    
882        /**The (immutable) generic properties map; never null. */
883        private Map<String,String> gen;
884    
885        /**Get the immutable generic properties; never null.
886         * The keys in this Map here are stripped of the initial GEN_PREFIX
887         * that appears in the properties file.
888         */
889        public Map<String,String> getGen()
890            { return(gen); }
891    
892    
893        /**Name of Web server minimum exhibit-immutable-attributes check interval (ms) property. */
894        public static final String PNAME_WEBSVR_MIN_EX_IMATTR_RECHECK_MS = "pg2k.websvr.ex.imattr.minrecheckms";
895        /**Web server minimum exhibit-immutable-attributes check interval (ms). */
896        private final int WEBSVR_MIN_EX_IMATTR_RECHECK_MS;
897        /**Get the Web server minimum exhibit-immutable-attributes check interval (ms).
898         * Constrained to the range approx 1s to 6h, default approx 2m.
899         */
900        public final int getWEBSVR_MIN_EX_IMATTR_RECHECK_MS()
901            { return(Math.max(1001, Math.min(6 * 3600 * 1000, WEBSVR_MIN_EX_IMATTR_RECHECK_MS))); }
902    
903        /**Name of Web server sysprops check interval (ms) property. */
904        public static final String PNAME_WEBSVR_SYSPROPS_RECHECK_MS = "pg2k.websvr.props.sys.recheckms";
905        /**Web server sysprops (ie most property files including GenProps) (re)check interval (ms). */
906        private final int WEBSVR_SYSPROPS_RECHECK_MS;
907        /**Get the Web server sysprops check interval (ms).
908         * Constrained to the range approx 1s to 1h, default approx 10s (which applies until first props set is fetched).
909         */
910        public final int getWEBSVR_SYSPROPS_RECHECK_MS()
911            { return(Math.max(1003, Math.min(1 * 3600 * 1000, WEBSVR_SYSPROPS_RECHECK_MS))); }
912    
913        /**Name of Web server maximum-cached-exhibit-prefix-size (bytes) property. */
914        public static final String PNAME_WEBSVR_MAX_CACHEABLE_EX_BYTES = "pg2k.websvr.ex.maxcacheablesize";
915        /**Web server maximum-cached-exhibit-prefix-size (bytes). */
916        private final int WEBSVR_MAX_CACHEABLE_EX_BYTES;
917        /**Get the Web server maximum-cached-exhibit-prefix-size (bytes).
918         * Constrained to the range approx 64kB to 1GB, default approx 2MB.
919         */
920        public final int getWEBSVR_MAX_CACHEABLE_EX_BYTES()
921            { return(Math.max(1<<16, Math.min(1123456789, WEBSVR_MAX_CACHEABLE_EX_BYTES))); }
922    
923        /**Name of Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
924        public static final String PNAME_WEBSVR_TN_CACHEPC = "pg2k.websvr.ex.tncachepercent";
925        /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value. */
926        private final int WEBSVR_TN_CACHEPC;
927        /**Web server persistent thumbnail cache size limit as percentage of exhibit cache limit value.
928         * Constrained to the range 0 to 100, default approx 10.
929         * <p>
930         * A value of zero disables the persistent cache
931         * (though in-memory cacheing may continue).
932         * <p>
933         * Any additional cache of generated thumbnails (exhibit samples)
934         * is limited to a size at most this many percent of the exhibit cache size.
935         * This cache is managed separately from this exhibit cache, but yoking the
936         * values together makes for simpler system management.
937         */
938        public final int getWEBSVR_TN_CACHEPC()
939            { return(Math.max(0, Math.min(100, WEBSVR_TN_CACHEPC))); }
940    
941        /**Name of Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
942        public static final String PNAME_WEBSVR_BW_LIMITER = "pg2k.websvr.ex.bwlimiter";
943        /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing). */
944        private final int WEBSVR_BW_LIMITER;
945        /**Web server precacheing bandwidth-limiter value (max fraction of available bandwidth used on precacheing).
946         * Constrained to the range approx 1 to 1000, default approx 10.
947         * <p>
948         * If any precacheing operation takes N milliseconds the next one
949         * will not be started until at least N times this value later.
950         * <p>
951         * The effect is to limit resource consumption to at most about
952         * 1/N of the most constrained of slave and master resources
953         * (typically CPU and/or bandwidth).
954         */
955        public final int getWEBSVR_BW_LIMITER()
956            { return(Math.max(1, Math.min(1000, WEBSVR_BW_LIMITER))); }
957    
958        /**Name of Web server maximum exhibit size (32-bit value, bytes). */
959        public static final String PNAME_WEBSVR_MAX_EX_BYTES = "pg2k.websvr.ex.maxexhibitsize";
960        /**Web server maximum exhibit size (32-bit value, bytes). */
961        private final int WEBSVR_MAX_EX_BYTES;
962        /**Get the Web server maximum exhibit size (32-bit value, bytes).
963         * Constrained to the range approx 1B to 1GB, default approx 1GB.
964         */
965        public final int getWEBSVR_MAX_EX_BYTES()
966            { return(Math.max(1, Math.min(Integer.MAX_VALUE/2, WEBSVR_MAX_EX_BYTES))); }
967    
968        /**Name of keywords list for HTML meta header ("" if none; not null). */
969        public static final String PNAME_WEBSVR_META_KEYWORDS = "pg2k.websvr.meta.keywords";
970        /**Keywords list for HTML meta header ("" if none; not null). */
971        private final String WEBSVR_META_KEYWORDS;
972        /**Get the keywords list for HTML meta header ("" if none; never null).
973         * May be trimmed of excess whitespace and canonicalised; "" if none.
974         * <p>
975         * Guaranteed only to contain characters safe to put directly into
976         * an HTML description meta-header attribute value.
977         * <p>
978         * In particular this result contains no quote marks (" or ')
979         * nor ampersand nor angle-brackets, though may contain whitespace,
980         * so should be quoted.
981         */
982        public final String getWEBSVR_META_KEYWORDS()
983            {
984            final String result = WEBSVR_META_KEYWORDS;
985            if(result == null) { return(""); }
986            assert(result.indexOf('\'') == -1);
987            assert(result.indexOf('"') == -1);
988            assert(result.indexOf('&') == -1);
989            assert(result.indexOf('<') == -1);
990            assert(result.indexOf('>') == -1);
991            return(result);
992            }
993    
994        /**Name of description for HTML meta header ("" if none; not null). */
995        public static final String PNAME_WEBSVR_META_DESCRIPTION = "pg2k.websvr.meta.description";
996        /**Description for HTML meta header ("" if none; not null). */
997        private final String WEBSVR_META_DESCRIPTION;
998        /**Get the description list for HTML meta header ("" if none; not null).
999         * May be trimmed of excess whitespace; "" if none.
1000         */
1001        public final String getWEBSVR_META_DESCRIPTION()
1002            {
1003            final String result = WEBSVR_META_DESCRIPTION;
1004            if(result == null) { return(""); }
1005            return(result);
1006            }
1007    
1008        /**Name of JPEG exhibit to use as a background image (null if none). */
1009        public static final String PNAME_WEBSVR_BG_IMAGE = "pg2k.websvr.livery.default.bg";
1010        /**JPEG exhibit to use as a background image (null if none).
1011         * Should be Name.ExhibitFull but could be String from deserialising old format.
1012         */
1013        private final CharSequence WEBSVR_BG_IMAGE;
1014        /**Get the JPEG exhibit to use as a background image (null if none).
1015         * Is a syntactically-valid JPEG exhibit name (or null).
1016         */
1017        public final Name.ExhibitFull getWEBSVR_BG_IMAGE()
1018            {
1019            final CharSequence result = WEBSVR_BG_IMAGE;
1020            if(null == result) { return(null); }
1021            if(result.getClass() == Name.ExhibitFull.class) { return((Name.ExhibitFull) result); }
1022            return(Name.ExhibitFull.create(result));
1023            }
1024    
1025        /**Class holding details of one simple banner advertisement; immutable, serialisable.
1026         * Weighting must be strictly positive integer; text must be
1027         * non-zero-length, 7-bit printable (32--126) ASCII/HTML data,
1028         * though we also allow CRLF (\r\n) for tricky JavaScript/HTML issues...
1029         * <p>
1030         * We assume that the on-the-wire format for members is reasonably
1031         * efficient, especially if the transport layer includes compression.
1032         * <p>
1033         * This supports equals(), hashCode() and compareTo(); two objects
1034         * are equal if all their fields are, and the sort order is to
1035         * minimise ad-lookup and HTML-bandwidth (ie by highest weight
1036         * then smallest code, with ties being broken by code lexical ordering).
1037         */
1038        private static final class SimpleAd implements Serializable,
1039                                                       ObjectInputValidation,
1040                                                       MemoryTools.Internable,
1041                                                       Comparable<SimpleAd>
1042            {
1043            /**Strictly positive weighting. */
1044            final int weight;
1045            /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1046            final String code;
1047    
1048            /**Maximum HTML code length in bytes/characters. */
1049            public static final int MAX_AD_CHARACTERS = 2048;
1050    
1051            /**Equality depends on all fields. */
1052            @Override
1053            public boolean equals(final Object o)
1054                {
1055                if(!(o instanceof SimpleAd)) { return(false); }
1056                final SimpleAd other = (SimpleAd) o;
1057                return((weight == other.weight) && (code.equals(other.code)));
1058                }
1059    
1060            /**Hash depends on text as good source of variability. */
1061            @Override
1062            public int hashCode() { return(code.hashCode()); }
1063    
1064            /**Provides total ordering, with primary sort by decreasing weight.
1065             * Full sort order is:
1066             * <ol>
1067             * <li>decreasing weight, then
1068             * <li>increasing code length, then
1069             * <li>code lexical order.
1070             * </ol>
1071             */
1072            public int compareTo(final SimpleAd o)
1073                {
1074                final SimpleAd other = (SimpleAd) o;
1075    
1076                if(weight > other.weight) { return(-1); } // Right order.
1077                if(weight < other.weight) { return( 1); } // Wrong order.
1078    
1079                if(code.length() < other.code.length()) { return(-1); } // Right order.
1080                if(code.length() > other.code.length()) { return( 1); } // Wrong order.
1081    
1082                // Break ties to make total ordering.
1083                return(code.compareTo(other.code));
1084                }
1085    
1086            /**Construct new simple ad details. */
1087            SimpleAd(final int adRelativeWeight, final String HTMLCode)
1088                {
1089                weight = adRelativeWeight;
1090                code = MemoryTools.intern(HTMLCode);
1091                // Verify object state and thus validate parameters...
1092                try { validateObject(); }
1093                catch(final InvalidObjectException e)
1094                    { throw new IllegalArgumentException(e.getMessage()); }
1095                }
1096    
1097            /**Our serialisation version. */
1098            private static final long serialVersionUID = 6649356978055850149L;
1099    
1100    //        /**Deserialise. */
1101    //        private void readObject(final ObjectInputStream in)
1102    //            throws IOException, ClassNotFoundException
1103    //            {
1104    //            in.defaultReadObject();
1105    //            validateObject(); // Validate state immediately.
1106    //            }
1107    
1108            /**Deserialise: validate and eliminate duplicates coming off the wire.
1109             * @return identical, de-duped, non-null instance
1110             */
1111            protected Object readResolve()
1112                // throws ObjectStreamException
1113                { return(MemoryTools.intern(new SimpleAd(weight, code))); }
1114    
1115            /**Validate fields/state.
1116             * Called in the constructor and possibly after de-serialising.
1117             */
1118            public void validateObject()
1119                throws InvalidObjectException
1120                {
1121                // Check that all components are sane and safe.
1122                // Weight must be strictly positive.
1123                if(weight <= 0)
1124                    { throw new InvalidObjectException("bad object: weight <= 0"); }
1125                // HTML code must be non-null, non-zero-length,
1126                // 7-bit printable ASCII (and CRLF).
1127                if((code == null) ||
1128                   (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1129                    { throw new InvalidObjectException("bad object: HTML code length silly"); }
1130                for(int i = code.length(); --i >= 0; )
1131                    {
1132                    final char c = code.charAt(i);
1133                    if(((c < 32) || (c > 126)) &&
1134                       ((c != '\r') && (c != '\n')))
1135                        { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1136                    }
1137                }
1138            }
1139    
1140        /**Class holding details of one classified advertisement; immutable, serialisable.
1141         * We assume that the on-the-wire format for members is reasonably efficient,
1142         * especially if the transport layer includes compression such as ZIP.
1143         */
1144        public static final class ClassifiedAd implements Serializable,
1145                                                          ObjectInputValidation,
1146                                                          MemoryTools.Internable
1147            {
1148            /**Construct new classified ad details.
1149             *
1150             * @param HTMLCode  HTML code as 7-bit ASCII; non-null, non-zero length
1151             * @param URIregex  valid regular expression to match a substring of a URI path;
1152             *     never null nor empty
1153             */
1154            public ClassifiedAd(final String HTMLCode,
1155                                final String URIregex,
1156                                final long start, final long end,
1157                                final String language,
1158                                final String ccTLDs)
1159                {
1160                regex = MemoryTools.intern(URIregex);
1161                code = MemoryTools.intern(HTMLCode);
1162                lang = MemoryTools.intern(language);
1163                countries = MemoryTools.intern(ccTLDs);
1164                this.start = start;
1165                this.end = end;
1166                // Verify object state and thus validate parameters...
1167                try { validateObject(); }
1168                catch(final InvalidObjectException e)
1169                    { throw new IllegalArgumentException(e.getMessage()); }
1170                }
1171    
1172            /**Valid case-sensitive regular expression to match a substring of a URI path, never null nor empty. */
1173            public final String regex;
1174    
1175            /**Private cache of compiled regex; null until first use.
1176             * Created on first use; not serialised.
1177             * <p>
1178             * Never set null once non-null.
1179             * <p>
1180             * Marked volatile for thread-safe lock-free access,
1181             */
1182            private transient volatile Pattern pattern;
1183            /**Get compiled regex; never null. */
1184            public Pattern getPattern()
1185                {
1186                Pattern p = pattern;
1187                if(p == null) { pattern = p = Pattern.compile(regex); }
1188                return(p);
1189                }
1190    
1191            /**HTML code as 7-bit ASCII; non-null, non-zero length. */
1192            public final String code;
1193    
1194            /**Maximum HTML code length in bytes/characters, as per SimpleAs; strictly positive. */
1195            public static final int MAX_AD_CHARACTERS = SimpleAd.MAX_AD_CHARACTERS;
1196    
1197            /**Start time (+ve UTC ms) or zero if no start time. */
1198            public final long start;
1199            /**Start time (+ve UTC ms greater than 'start') or zero if no end time. */
1200            public final long end;
1201    
1202            /**Language/locale filter; two-letter lower-case language code or null if none. */
1203            public final String lang;
1204    
1205            /**Returns true if the supplied (visitor's) locale is inappropriate for this ad.
1206             * If the lang value is null then this returns false.
1207             * Else if the visitor's locale/language is null (unknown) then this returns true.
1208             * Else this returns true iff the visitor's locale language matches lang.
1209             */
1210            public boolean wrongLanguage(final Locale visitorLocale)
1211                {
1212                if(lang == null) { return(false); }
1213                if(visitorLocale == null) { return(true); }
1214                return(!lang.equals(visitorLocale.getLanguage()));
1215                }
1216    
1217            /**Country code; comma-separated list of lower-case ccTLD codes or null if none. */
1218            public final String countries;
1219    
1220            /**Returns true if the supplied (visitor's) country is inappropriate for this ad.
1221             * If the countries list is null then this returns false.
1222             * Else if the visitor's ccTLD (country code) is null (unknown) then this returns true.
1223             * Else this returns true iff the visitor's ccTLD is found on our countries list.
1224             */
1225            public boolean wrongCountry(final CCTLD visitorCcTLD)
1226                {
1227                if(countries == null) { return(false); }
1228                if(visitorCcTLD == null) { return(true); }
1229                // Check for presense in the 'allowed' list.
1230                // If the list is correctly normalised then a simple substring search suffices
1231                // (though might be inefficient for VERY long lists).
1232                // We search backwards since common values may be late (us, uk)
1233                // and we may have normalised to natural sorted (lexical/ASCII) order,
1234                // but the search direction is not necessary for correctness.
1235                return(countries.lastIndexOf(visitorCcTLD.code) == -1);
1236                }
1237    
1238            /**Equality depends on all fields. */
1239            @Override
1240            public boolean equals(final Object o)
1241                {
1242                if(!(o instanceof ClassifiedAd)) { return(false); }
1243                final ClassifiedAd other = (ClassifiedAd) o;
1244                if(start != other.start) { return(false); }
1245                if(end != other.end) { return(false); }
1246                if(!code.equals(other.code)) { return(false); }
1247                if(lang == null) { if(other.lang != null) { return(false); } }
1248                else if(!lang.equals(other.lang)) { return(false); }
1249                if(countries == null) { if(other.countries != null) { return(false); } }
1250                else if(!countries.equals(other.countries)) { return(false); }
1251                return(true); // Identical.
1252                }
1253    
1254            /**Hash depends on the regex and the end date as good/quick sources of variability. */
1255            @Override
1256            public int hashCode() { return(regex.hashCode() ^ (int)(end >> 26)); }
1257    
1258            /**Validate fields/state.
1259             * Called in the constructor and possibly after de-serialising.
1260             */
1261            public void validateObject()
1262                throws InvalidObjectException
1263                {
1264                // HTML code must be non-null, non-zero-length,
1265                // 7-bit printable ASCII (and CRLF).
1266                if((code == null) ||
1267                   (code.length() == 0) || (code.length() > MAX_AD_CHARACTERS))
1268                    { throw new InvalidObjectException("bad object: HTML code length silly"); }
1269                for(int i = code.length(); --i >= 0; )
1270                    {
1271                    final char c = code.charAt(i);
1272                    if(((c < 32) || (c > 126)) &&
1273                       ((c != '\r') && (c != '\n')))
1274                        { throw new InvalidObjectException("bad object: HTML code contains bad char " + ((int) c)); }
1275                    }
1276                if((regex == null) || (regex.length() == 0) || (regex.length() > 2*ExhibitName.MAX_NAME_LENGTH))
1277                    { throw new InvalidObjectException("bad object: regex null or length invalid"); }
1278                // Validate the regex pattern (and cache it too; naughty but efficient).
1279                try { pattern = Pattern.compile(regex); }
1280                catch(final PatternSyntaxException e)
1281                    { throw new InvalidObjectException("bad object: regex invalid"); }
1282                if((start < 0) || (end < 0))
1283                    { throw new InvalidObjectException("bad object: negative start/end"); }
1284                if((start != 0) && (start < CoreConsts.GALLERY_EPOC_START))
1285                    { throw new InvalidObjectException("bad object: start date invalid: " + new Date(start)); }
1286                if((end != 0) && (end <= start))
1287                    { throw new InvalidObjectException("bad object: end < start"); }
1288                if((lang != null) && !lang.matches("^[a-z][a-z]$"))
1289                    { throw new InvalidObjectException("bad object: bad language not [a-z][a-z]"); }
1290                if((countries != null) && !countries.matches("^[a-z][a-z](,[a-z][a-z])*$"))
1291                    { throw new InvalidObjectException("bad object: bad countries list not [a-z][a-z](,[a-z][a-z])*"); }
1292                // Avoid huge countries list with arbitrary limit...
1293                if((countries != null) && (countries.length() > 128))
1294                    { throw new InvalidObjectException("bad object: bad countries list too long"); }
1295                }
1296    
1297            /**Deserialise: validate and eliminate duplicates coming off the wire.
1298             * @return identical, de-duped, non-null instance
1299             */
1300            protected Object readResolve()
1301                // throws ObjectStreamException
1302                { return(MemoryTools.intern(new ClassifiedAd(code, regex, start, end, lang, countries))); }
1303    
1304            /**Unique serialisation UID. */
1305            private static final long serialVersionUID = 8848447664623542985L;
1306            }
1307    
1308    
1309        /**Immutable Set of String tokens to replace with random values in banner HTML.
1310         * These tokens, when encountered in HTML banner ad text,
1311         * are replaced with random (positive, decimal) integers.
1312         * These are to help with "cache-busting".
1313         * <p>
1314         * These tokens must be chosen so as to be very unlikely to appear
1315         * where they are <em>not</em> intended for replacement.
1316         * <p>
1317         * This set may become replaceable at run-time in future.
1318         */
1319        public static final Set<String> AD_BANNER_RND_TOKENS = Collections.unmodifiableSet(
1320            new HashSet<String>(Arrays.asList(new String[]{
1321                "$RND$", // TribalFusion cache-buster token.
1322            })));
1323    
1324        /**Name of prefix for simple banner ad code.
1325         * There is one sub-property ({prefix}.allow, either "true" or "false")
1326         * that allows or disallows all simple ads at once.
1327         * <p>
1328         * Then for N starting at 1, and stopping at the first gap,
1329         * there must be a non-empty {prefix}.{N}.HTML containing
1330         * non-empty, pure-printable 7-bit ASCII (32--126, +CRLF) HTML ad code.
1331         * There may be an optional {prefix}.{N}.wt strictly-positive
1332         * integer relative weighting (default 100) of this ad to others,
1333         * the higher the weighting the more of the available space taken.
1334         */
1335        public static final String PNAME_WEBSVR_SIMPLEAD_PREFIX = "pg2k.websvr.ad.";
1336        /**Name of on/off switch for simple and classified ad code, enabled by default. */
1337        public static final String PNAME_WEBSVR_SIMPLEAD_ALLOW = PNAME_WEBSVR_SIMPLEAD_PREFIX + "allow";
1338        /**On/off switch for simple banner ad code, enabled by default. */
1339        private final boolean WEBSVR_SIMPLEAD_ALLOW;
1340        /**Get on/off switch for simple banner ad code, enabled by default.
1341         */
1342        public final boolean getWEBSVR_SIMPLEAD_ALLOW() { return(WEBSVR_SIMPLEAD_ALLOW); }
1343        /**Get HTML code for a simple ad at random; null if none available.
1344         * This returns ads with frequencies in proportion to their weightings.
1345         * <p>
1346         * This will always return null if getWEBSVR_SIMPLEAD_ALLOW()
1347         * returns false.
1348         * <p>
1349         * This algorithm will take O(n) time to return one of n available ads.
1350         * <p>
1351         * Will be faster when ads are unevenly weigted.
1352         * <p>
1353         * This will automatically do substitution of random "cache-buster" tokens
1354         * if required.
1355         * (Any of the recognised tokens will be substituted with a
1356         * random (positive decimal) integer value, different each time,
1357         * with all occurrences of any one token the same in any one call.)
1358         */
1359        public final String getSimpleAd()
1360            {
1361            if(/* !WEBSVR_SIMPLEAD_ALLOW || */ /* Test is redundant. */
1362               /* (simpleAds == null) || */ /* Test is redundant. */
1363               (totalSimpleAdWeight == 0)) { return(null); }
1364    
1365            // Pick uniformly-distributed selector value.
1366            // We primarily require speed from this generator,
1367            // not perfection nor security.
1368            final int r = Rnd.fastRnd.nextInt(totalSimpleAdWeight);
1369    
1370            // Total weight so far.
1371            int tw = 0;
1372    
1373            // Look until cumulative sum of weights passes selector,
1374            // then take the ad that passes that threshold.
1375            // If performance proves to be a problem we could
1376            // replace this with a binary chop by storing an auxillary
1377            // array containing the sum of the weights.
1378            // In the interim, if we discover that this array is not
1379            // sorted biggest weight first (which makes for fastest
1380            // linear search on average) then we can sort it when
1381            // we have finished selecting our ad this time.
1382            for(int i = 0; i < simpleAds.length; ++i)
1383                {
1384                tw += simpleAds[i].weight;
1385                if(r < tw)
1386                    {
1387                    String code = simpleAds[i].code;
1388                    // Substitute cache-buster tokens as needed.
1389                    if((code != null) && (AD_BANNER_RND_TOKENS.size() > 0))
1390                        {
1391                        for(final String token : AD_BANNER_RND_TOKENS)
1392                            {
1393                            // If the cache-buster token exists then replace all occurrences.
1394                            if(code.indexOf(token) != -1)
1395                                {
1396                                final int rnd = (Rnd.fastRnd.nextInt() >>> 1);
1397                                code = code.replace(token, String.valueOf(rnd));
1398                                }
1399                            }
1400                        }
1401                    return(code);
1402                    }
1403                }
1404    
1405            return(null); // No ad available.  (Probably should not be able to happen.)
1406            }
1407    
1408        /**Total weight of all simple ads; non-negative.
1409         * Strictly positive if there are some simple ads.
1410         */
1411        private final int totalSimpleAdWeight;
1412    
1413        /**Recompute totalSimpleAdWeight; zero if arg is null.
1414         * @throws IllegalArgumentException  if sum of weights would
1415         *     exceed Integer.MAX_VALUE.
1416         */
1417        private static int _compute_totalSimpleAdWeight(final SimpleAd _simpleAds[])
1418            {
1419            if(_simpleAds == null) { return(0); }
1420            int result = 0;
1421            for(int i = _simpleAds.length; --i >= 0; )
1422                {
1423                final long r = result + (long) (_simpleAds[i].weight);
1424                if(r > Integer.MAX_VALUE)
1425                    { throw new IllegalArgumentException("weights too large"); }
1426                result = (int) r;
1427                }
1428            return(result);
1429            }
1430    
1431        /**Sorted array of simple ads, no empty slots, non-zero length; or null if no simple ads. */
1432        private /* final */ SimpleAd simpleAds[];
1433    
1434        /**The maximum number of simple ads that we will entertain at once. */
1435        public static final int MAX_SIMPLE_ADS = 64;
1436    
1437    
1438        /**Name of prefix for classified ad code. */
1439        public static final String PNAME_WEBSVR_CLASSIFIEDAD_PREFIX = "pg2k.websvr.clad.";
1440    
1441        /**Array of classified ads, no empty slots or duplicates, non-zero length; or null if no classified ads. */
1442        private /* final */ ClassifiedAd classifiedAds[];
1443    
1444        /**The maximum number of classified ads that we will entertain at once. */
1445        public static final int MAX_CLASSIFIED_ADS = 64;
1446    
1447        /**Overall URI regex to check for match against any classified ad; null iff no classified ads.
1448         * Not serialised; created on first use.
1449         * Never set null once non-null.
1450         * <p>
1451         * Marked volatile for thread-safe lock-free access.
1452         */
1453        private transient volatile Pattern _masterClassifiedMatchPattern;
1454        /**Returns List of possible classifieds that match the filters; null if no such ads.
1455         * The URI passed should usually be absolute, ie starting with '/'.
1456         * <p>
1457         * This inspects and filters by:
1458         * <ul>
1459         * <li>the URI regex,
1460         * <li>the current date vs any start/end dates,
1461         * <li>the user's ccTLD if supplied vs the countries list if supplied,
1462         * <li>the user's locale language if supplied vs the ad language if supplied.
1463         * </ul>
1464         * <p>
1465         * It may be desirable to shuffle the result
1466         * to avoid any unwanted bias/ordering
1467         * and to minimise ad-blindness.
1468         *
1469         * @param  uri  the display URI; must be non-null
1470         * @param  locale  if non-null then ads are filtered by the ad language,
1471         *     if null then we do not filter by locale/language
1472         * @param  addr  if non-null then ads are filtered by the country indicated
1473         *     by this client IP address,
1474         *     if null then we do not filter by country
1475         *
1476         * @return  undefined-order non-empty List of possible matching classifieds, else null.
1477         */
1478        public List<ClassifiedAd> getClassifieds(final URI uri,
1479                                                 final Locale locale,
1480                                                 final InetAddress addr)
1481            {
1482            // If no classifieds then always return null immediately.
1483            if(classifiedAds == null) { return(null); }
1484    
1485            // Match against URI path component only.
1486            final String path = uri.getPath();
1487            // No match is possible if there is no path component.
1488            if(path == null) { return(null); }
1489    
1490            // If there is a matcher filter/matcher
1491            // to check against all the URI patterns in one go
1492            // then test against it now.
1493            final Pattern master = _masterClassifiedMatchPattern;
1494            if((master != null) && !master.matcher(path).find()) { return(null); }
1495    
1496            // Prepare to filter by current time/date.
1497            final long now = System.currentTimeMillis();
1498    
1499            final List<ClassifiedAd> result = new ArrayList<ClassifiedAd>(classifiedAds.length);
1500            for(final ClassifiedAd clad : classifiedAds)
1501                {
1502                // Filter by (optional) start/end dates.  Should be very fast.
1503                if((clad.start != 0) && (now < clad.start)) { continue; }
1504                if((clad.end != 0) && (now >= clad.end)) { continue; }
1505                // Filter by (optional) language, if provided.  Should be quick.
1506                if((locale != null) && clad.wrongLanguage(locale)) { continue; }
1507                // Filter by URI regex.  May be moderately slow.
1508                if(!clad.getPattern().matcher(path).find()) { continue; }
1509                // Filter by (optional) allowed countries.  May be (very) slow, so do last.
1510                if((addr != null) && clad.wrongCountry(GeoUtils.getCCTLDByAddress(addr, false))) { continue; }
1511                // Add the successfully-matched ad to the result.
1512                result.add(clad);
1513                }
1514            if(result.isEmpty())
1515                {
1516                // If there are many classifieds but none matched this time
1517                // (and we don't already have a master matcher)
1518                // then it may be worthwhile for subsequent calls
1519                // to have available an overall master matcher
1520                // to quickly reject any URI that will never match any current ad.
1521                if((master == null) && (classifiedAds.length > 2))
1522                    {
1523                    // Build a compound expression accepting the union of the matches.
1524                    final StringBuilder sb = new StringBuilder();
1525                    for(final ClassifiedAd clad : classifiedAds)
1526                        {
1527                        if(sb.length() == 0) { sb.append('('); }
1528                        else { sb.append("|("); }
1529                        sb.append(clad.regex);
1530                        sb.append(')');
1531                        }
1532                    _masterClassifiedMatchPattern = Pattern.compile(sb.toString());
1533                    }
1534    
1535                return(null);
1536                }
1537    
1538            return(result);
1539            }
1540    
1541    
1542        /**Get AuthData for specified author, or null if none.
1543         * @param auth  author initials of desired data.
1544         */
1545        public synchronized AuthData getAuthData(final CharSequence auth)
1546            {
1547            if(authDB == null) { return(null); }
1548            return(authDB.get(auth.toString()));
1549            }
1550    
1551        /**Property name prefix for author details. */
1552        public static final String PPREFIX_AUTH_DETAILS = "pg2k.authdb.";
1553    
1554        /**Immutable SortedMap of author details, no empty or duplicate slots; non-zero length, or null if none. */
1555        private /* final */ SortedMap<String,AuthData> authDB;
1556    
1557        /**The maximum number of author database entries. */
1558        public static final int MAX_AUTH_ENTRIES = 1024;
1559    
1560        /**Class holding details of one author; immutable, serialisable.
1561         * Author initial must be valid syntactically;
1562         * text must be 7-bit printable (32--126) ASCII/HTML data.
1563         * <p>
1564         * We assume that the on-the-wire format for members is reasonably
1565         * efficient, especially if the transport layer includes compression.
1566         * <p>
1567         * This supports equals(), hashCode() and compareTo(); two objects
1568         * are equal iff all the fields are, and the sort order is by author.
1569         */
1570        public static final class AuthData implements Serializable,
1571                                                      ObjectInputValidation,
1572                                                      Comparable<AuthData>,
1573                                                      MemoryTools.Internable
1574            {
1575            /**Author initials: must be syntactically valid; non-null. */
1576            public final String auth;
1577    
1578            /**Author name: must be pure 7-bit printable ASCII HTML with possible entities; not empty, non-null. */
1579            public final String name;
1580    
1581            /**Author home-page URL: should be syntactically valid URL or null. */
1582            public final String www;
1583    
1584            /**Author email: should be syntactically valid email address or null. */
1585            public final String email;
1586    
1587            /**Description HTML code as 7-bit ASCII; never empty but can be null. */
1588            public final String desc;
1589    
1590            /**Maximum HTML description length in bytes/characters. */
1591            public static final int MAX_DESC_CHARACTERS = 2048;
1592    
1593            /**Equality depends on all fields. */
1594            @Override
1595            public boolean equals(final Object o)
1596                {
1597                if(!(o instanceof AuthData)) { return(false); }
1598                final AuthData other = (AuthData) o;
1599                if(!auth.equals(other.auth)) { return(false); }
1600                if(!name.equals(other.name)) { return(false); }
1601                if((www == null) ? (other.www != null) : !www.equals(other.www)) { return(false); }
1602                if((email == null) ? (other.email != null) : !email.equals(other.email)) { return(false); }
1603                if((desc == null) ? (other.desc != null) : !desc.equals(other.desc)) { return(false); }
1604                return(true);
1605                }
1606    
1607            /**Hash depends on author. */
1608            @Override
1609            public int hashCode() { return(auth.hashCode()); }
1610    
1611            /**Provides total ordering, by author. */
1612            public int compareTo(final AuthData o)
1613                { return(auth.compareTo(((AuthData) o).auth)); }
1614    
1615            /**Construct new author details from single string.
1616             * The String should be of the form:
1617             * <samp>Real Name|HomePageURL|email|description</samp>
1618             * where any item can be blank except the name.
1619             */
1620            AuthData(final String authInitials,
1621                     final String pipeDelimitedString)
1622                {
1623                this(authInitials,
1624                     _getSection(pipeDelimitedString, 0),
1625                     _getSection(pipeDelimitedString, 1),
1626                     _getSection(pipeDelimitedString, 2),
1627                     _getSection(pipeDelimitedString, 3));
1628                }
1629    
1630            /**Extract given section from pipe-delimited String.
1631             * Skip given number of pipe symbols ("|")
1632             * then return next up to following one if present.
1633             * Returns null if section not present.
1634             */
1635            private static String _getSection(final String s, final int section)
1636                {
1637                int prevPipePos = -1;
1638                for(int i = section; --i >= 0; )
1639                    {
1640                    final int nextPipe = s.indexOf('|', prevPipePos+1);
1641                    // Return null if requested section not present.
1642                    if(nextPipe == -1) { return(null); }
1643                    // Else adjust start marker.
1644                    prevPipePos = nextPipe;
1645                    }
1646    
1647                // Find the start of the following section, if any.
1648                final int nextPipePos = s.indexOf('|', prevPipePos+1);
1649    
1650                // No following section; return the rest of the string.
1651                if(nextPipePos == -1)
1652                    { return(s.substring(prevPipePos+1)); }
1653    
1654                // Return the requested section.
1655                return(s.substring(prevPipePos+1, nextPipePos));
1656                }
1657    
1658            /**Construct new author details from individial components.
1659             * @param authInitials  the syntactally-valid author initials; non-null
1660             * @param authName  real name of author; not empty nor null
1661             * @param homePageURL  valid http URL, will be stripped,
1662             *     empty text is converted to null
1663             * @param emailAddress  valid email address, will be stripped,
1664             *     empty text is converted to null
1665             * @param descriptionText  the descriptive text, will be stripped,
1666             *     empty text is converted to null
1667             *
1668             * Not given public access as only GenProps should need to construct.
1669             */
1670            AuthData(final String authInitials,
1671                     final String authName,
1672                     String homePageURL,
1673                     String emailAddress,
1674                     String descriptionText)
1675                {
1676                // Canonicalise URL.
1677                if(homePageURL != null)
1678                    {
1679                    homePageURL = homePageURL.trim();
1680                    if(homePageURL.length() == 0)
1681                        { homePageURL = null; }
1682                    }
1683    
1684                // Canonicalise email address.
1685                if(emailAddress != null)
1686                    {
1687                    emailAddress = emailAddress.trim();
1688                    if(emailAddress.length() == 0)
1689                        { emailAddress = null; }
1690                    }
1691    
1692                // Canonicalise description text.
1693                if(descriptionText != null)
1694                    {
1695                    descriptionText = descriptionText.trim();
1696                    if(descriptionText.length() == 0)
1697                        { descriptionText = null; }
1698                    }
1699    
1700                // Use intern() mainly to reduce old-heap memory churn.
1701                auth = MemoryTools.intern(authInitials);
1702                name = MemoryTools.intern(authName.trim());
1703                www = MemoryTools.intern(homePageURL);
1704                email = MemoryTools.intern(emailAddress);
1705                desc = MemoryTools.intern(descriptionText);
1706    
1707                // Verify object state and thus validate parameters...
1708                try { validateObject(); }
1709                catch(final InvalidObjectException e)
1710                    { throw new IllegalArgumentException(e.getMessage()); }
1711                }
1712    
1713            /**Our serialisation version. */
1714            private static final long serialVersionUID = 2647433856891011944L;
1715    
1716            /**Deserialise. */
1717            private void readObject(final ObjectInputStream in)
1718                throws IOException, ClassNotFoundException
1719                {
1720                in.defaultReadObject();
1721                validateObject(); // Validate state immediately.
1722                }
1723    
1724            /**Validate fields/state.
1725             * Called in the constructor and possibly after de-serialising.
1726             * <p>
1727             * Barf if something bad is found.
1728             * (Maybe allow some extra info in debug version.)
1729             */
1730            public void validateObject()
1731                throws InvalidObjectException
1732                {
1733                // Validate author initials...
1734                if(!ExhibitName.validAuthorSyntax(auth))
1735                    { throw new InvalidObjectException("bad object: author initials invalid"); }
1736    
1737                if((name == null) || (name.length() == 0) ||
1738                    !name.equals(name.trim()))
1739                    { throw new InvalidObjectException("bad object: author name missing or too short or non-canonical"); }
1740                for(int i = name.length(); --i >= 0; )
1741                    {
1742                    final char c = name.charAt(i);
1743                    if((c < 32) || (c > 126))
1744                        { throw new InvalidObjectException("bad object: author name contains bad char " + ((int) c)); }
1745                    }
1746    
1747                // Validate URL if not null.
1748                //   * Start with "http://".
1749                //   * Parseable...
1750                if(www != null)
1751                    {
1752                    if(!www.equals(www.trim()))
1753                        { throw new InvalidObjectException("bad object: WWW address non-canonical"); }
1754                    if(!www.startsWith("http://"))
1755                        { throw new InvalidObjectException("bad object: WWW address seems invalid"); }
1756                    try { new URL(www); }
1757                    catch(final MalformedURLException e)
1758                        { throw new InvalidObjectException("bad object: WWW address unparseable"); }
1759                    }
1760    
1761                // Validate email address if not null.
1762                //   * At least 5 characters (eg "a@b.c").
1763                //   * Contains exactly one "@".
1764                if(email != null)
1765                    {
1766                    if(!email.equals(email.trim()))
1767                        { throw new InvalidObjectException("bad object: email address non-canonical"); }
1768                    if(email.length() < 5)
1769                        { throw new InvalidObjectException("bad object: email address too short"); }
1770                    final int firstAt = email.indexOf('@');
1771                    if(firstAt < 1)
1772                        { throw new InvalidObjectException("bad object: email address user portion missing"); }
1773                    if(firstAt != email.lastIndexOf('@'))
1774                        { throw new InvalidObjectException("bad object: email address contains more than one @"); }
1775                    }
1776    
1777                // HTML code must be non-empty (but can be null),
1778                // 7-bit printable ASCII (and CRLF).
1779                if(desc != null)
1780                    {
1781                    if(!desc.equals(desc.trim()))
1782                        { throw new InvalidObjectException("bad object: HTML desc text non-canonical"); }
1783                    if((desc.length() == 0) || (desc.length() > MAX_DESC_CHARACTERS))
1784                        { throw new InvalidObjectException("bad object: HTML desc text length silly"); }
1785                    for(int i = desc.length(); --i >= 0; )
1786                        {
1787                        final char c = desc.charAt(i);
1788                        if(((c < 32) || (c > 126)) &&
1789                           ((c != '\r') && (c != '\n')))
1790                            { throw new InvalidObjectException("bad object: HTML desc text contains bad char " + ((int) c)); }
1791                        }
1792                    }
1793                }
1794            }
1795    
1796        /**Prefix for static "goodness"/popularity weighting values. */
1797        public static final String PPREFIX_POPWT_DETAILS = "pg2k.popweight.";
1798    
1799        /**Goodness subcomponent by author; includes trailing dot. */
1800        public static final String PCOMP_POPWR_BYAUTH = "byAuth.";
1801    
1802        /**Goodness subcomponent by type/extension; includes trailing dot. */
1803        public static final String PCOMP_POPWR_BYTYPE = "byType.";
1804    
1805        /**Goodness subcomponent by attribute; includes trailing dot. */
1806        public static final String PCOMP_POPWR_BYATTR = "byAttr.";
1807    
1808        /**The maximum number of static "goodness"/popularity database entries. */
1809        public static final int MAX_POPWT_ENTRIES = 256;
1810    
1811        /**The maximum allowed "goodness"/popularity weight (maximally good); strictly negative. */
1812        public static final byte MAX_POPWT_VAL = 100;
1813    
1814        /**The minimum allowed "goodness"/popularity weight (maximally bad); strictly negative. */
1815        public static final byte MIN_POPWT_VAL = -MAX_POPWT_VAL;
1816    
1817        /**Map from String token to Byte ([-100,+100]) weighting.
1818         * We cheat and assume the that sets of author, attribute words and extensions
1819         * are disjoint, and so we keep all the mappings in a single table!
1820         */
1821        private /* final */ Map<String,Byte> popWeights;
1822    
1823        /**Get weighting by author in range ([-100,+100]); null if no weighting for specified author.
1824         * Author initials specified must be syntactically valid.
1825         * <p>
1826         * Note that this should be treated as if a value of +1.0
1827         * with a correlation of the returned value divided by MAX_POPWT_VAL.
1828         */
1829        public synchronized Byte getPopWeightForAuth(final CharSequence auth)
1830            {
1831            if(!ExhibitName.validAuthorSyntax(auth))
1832                { throw new IllegalArgumentException(); }
1833            final Map<String,Byte> p = popWeights;
1834            if(p == null) { return(null); }
1835            return(p.get(auth.toString()));
1836            }
1837    
1838        /**Get weighting by type/extension in range ([-100,+100]); null if no weighting for specified type.
1839         * Extension/type specified must be syntactically valid and known to the system.
1840         * <p>
1841         * Note that this should be treated as if a value of +1.0
1842         * with a correlation of the returned value divided by MAX_POPWT_VAL.
1843         */
1844        public synchronized Byte getPopWeightForType(final CharSequence type)
1845            {
1846            if(ExhibitMIME.isValidInputExhibitNameExtension(type) == null)
1847                { throw new IllegalArgumentException(); }
1848            final Map<String,Byte> p = popWeights;
1849            if(p == null) { return(null); }
1850            return(p.get(type.toString()));
1851            }
1852    
1853        /**Get weighting by attribute word in range ([-100,+100]); null if no weighting for specified attribute.
1854         * Attribute word specified must be syntactically valid and known to the system.
1855         * <p>
1856         * Note that this should be treated as if a value of +1.0
1857         * with a correlation of the returned value divided by MAX_POPWT_VAL.
1858         */
1859        public synchronized Byte getPopWeightForAttr(final String attrWord)
1860            {
1861            if(!ExhibitName.validAttributeWord(attrWord))
1862                { throw new IllegalArgumentException(); }
1863            final Map<String,Byte> p = popWeights;
1864            if(p == null) { return(null); }
1865            return(p.get(attrWord));
1866            }
1867    
1868    
1869        /**Name of Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1870        public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER = "pg2k.websvr.ex.hotlinklimiter";
1871        /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads). */
1872        private final byte WEBSVR_EX_HOTLINK_LIMITER;
1873        /**Web server hotlinked-download-limiter value (as percentange of all exhibit downloads).
1874         * Constrained to the range approx 0 to 255, default 0.
1875         * <p>
1876         * This limit is too prevent too much (lazy or bandwidth-theft) direct hot-linking
1877         * to exhibits (and/or thumbnails) by external Web sites.
1878         * <p>
1879         * There <em>are</em> bona fide reasons for some direct hotlinks, eg:
1880         * <ul>
1881         * <li>From some image search engines.
1882         * <li>From small Web sites and/or from contributors' sites.
1883         * </ul>
1884         */
1885        public final int getWEBSVR_EX_HOTLINK_LIMITER()
1886            { return(WEBSVR_EX_HOTLINK_LIMITER & 0xff); }
1887    
1888        /**Name of Web server hotlinked-download-limiter divert graphic URL value. */
1889        public static final String PNAME_WEBSVR_EX_HOTLINK_DIVERT_URL = "pg2k.websvr.ex.hotlinklimiter.altURL";
1890        /**Web server hotlinked-download-limiter divert graphic URL value. */
1891        private final String WEBSVR_EX_HOTLINK_DIVERT_URL;
1892        /**Web server hotlinked-download-limiter value divert graphic URL value, or null if none.
1893         * Absolute URL of an alternate graphic to divert a user's brower to
1894         * when they have been hotlinked to one of our exhibits,
1895         * or null if the user is simply to get a 5XX error.
1896         * <p>
1897         * Preferably a small image with a long cache time.
1898         */
1899        public final String getWEBSVR_EX_HOTLINK_DIVERT_URL()
1900            { return(WEBSVR_EX_HOTLINK_DIVERT_URL); }
1901    
1902        /**Maximum number of allow/disallow hotlink hosts that may be specified. */
1903        public static final int MAX_ALLOW_DISALLOW_HOTLINK_HOSTS = 64;
1904    
1905        /**Name of Web server hotlinked-download-host-allow set. */
1906        public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_ALLOW = "pg2k.websvr.ex.hotlinklimiter.allowhosts";
1907    
1908        /**Name of Web server hotlinked-download-host-disallow set. */
1909        public static final String PNAME_WEBSVR_EX_HOTLINK_LIMITER_DISALLOW = "pg2k.websvr.ex.hotlinklimiter.disallowhosts";
1910    
1911        /**Immutable Set of "allow" hotlink hosts (normalised host names); can be null for no such hosts. */
1912        private /* final */ Set<String> hotLinkAllowHosts;
1913    
1914        /**Immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1915        private final java.util.regex.Pattern hotLinkAllowHostsRegex;
1916    
1917        /**Immutable Set of "disallow" hotlink hosts (normalised/stripped/lower-cased host names); can be null for no such hosts. */
1918        private /* final */ Set<String> hotLinkDisallowHosts;
1919    
1920        /**Immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1921        private final java.util.regex.Pattern hotLinkDisallowHostsRegex;
1922    
1923        /**Get immutable Set of "allow" hotlink hosts (normalised host names); never null. */
1924        public final Set<String> getHotLinkAllowHosts()
1925            {
1926            final Set<String> r = hotLinkAllowHosts;
1927            if(r == null)
1928                {
1929                final Set<String> noHosts = Collections.emptySet();
1930                return(noHosts);
1931                }
1932            return(r);
1933            }
1934    
1935        /**Get immutable compiled case-insensitive regex pattern for hotlink allow hosts not in literal list; can be null if no such "regex-match" hosts. */
1936        public final Pattern getHotLinkAllowHostsRegex()
1937            {
1938            return(hotLinkAllowHostsRegex);
1939            }
1940    
1941        /**Get immutable Set of "disallow" hotlink hosts (normalised host names); never null. */
1942        public final Set<String> getHotLinkDisallowHosts()
1943            {
1944            final Set<String> r = hotLinkDisallowHosts;
1945            if(r == null)
1946                {
1947                final Set<String> noHosts = Collections.emptySet();
1948                return(noHosts);
1949                }
1950            return(r);
1951            }
1952    
1953        /**Get immutable compiled case-insensitive regex pattern for hotlink disallow hosts not in literal list; can be null if no such "regex-match" hosts. */
1954        public final Pattern getHotLinkDisallowHostsRegex()
1955            {
1956            return(hotLinkDisallowHostsRegex);
1957            }
1958    
1959        /**Returns true if this may be a regex (ie contains non-DNS-safe chars).
1960         * If a user-specified hostname contains characters
1961         * other than [-a-zA-Z0-9.]
1962         * then we assume that it may be intended as a regex.
1963         *
1964         * @param n  non-null non-empty user-specified non-normalised hostname
1965         */
1966        private static boolean _mayBeRegexHostname(final String n)
1967            {
1968            assert((n != null) && (n.length() != 0));
1969            for(int i = n.length(); --i >= 0; )
1970                {
1971                final char c = n.charAt(i);
1972                if((c >= 'a') && (c <= 'z')) { continue; }
1973                if((c >= 'A') && (c <= 'Z')) { continue; }
1974                if((c >= '0') && (c <= '9')) { continue; }
1975                if((c == '.') || (c == '-')) { continue; }
1976                return(true); // Not a plain DNS-safe name.
1977                }
1978            return(false); // Seems to be a plain name.
1979            }
1980    
1981        /**Make single compiled pattern from all regex-match hostname expressions; null if no such expressions.
1982         * The pattern will match if any of the sub-patterns match.
1983         * <p>
1984         * This will whinge about and drop any indiviual non-compilable patterns.
1985         * No whingeing is done, however, if the passed log is null!
1986         */
1987        private static Pattern _makeResidualRegexHostMatch(final Set<String> rawNames,
1988                                                           final SimpleLoggerIF logger)
1989            {
1990            if(rawNames == null) { return(null); }
1991    
1992            final StringBuilder compoundExpr = new StringBuilder();
1993    
1994            for(final String rawName : rawNames)
1995                {
1996                try
1997                    {
1998                    // Ignore plain host names that are not regexes...
1999                    if(!_mayBeRegexHostname(rawName)) { continue; }
2000    
2001                    // See if this expression can be compiled, ie is valid...
2002                    try { Pattern.compile(rawName, Pattern.CASE_INSENSITIVE); }
2003                    catch(final Exception e)
2004                        {
2005                        if(logger != null)
2006                            { logger.log("WARNING: GenProps: ignoring unusable/bad hostname regex: `"+rawName+"'"); }
2007                        continue; // Skip this unusable pattern.
2008                        }
2009    
2010                    // Append this pattern to the final set...
2011                    if(compoundExpr.length() != 0) { compoundExpr.append('|'); }
2012                    compoundExpr.append('(').append(rawName).append(')');
2013                    }
2014                catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2015                }
2016    
2017            // If no regexes then return null.
2018            if(compoundExpr.length() == 0) { return(null); }
2019    
2020            // Return compiled expression...
2021            return(Pattern.compile(compoundExpr.toString(), Pattern.CASE_INSENSITIVE));
2022            }
2023    
2024        /**Normalise Set of normalised host names; result is immutable and not empty, or is null.
2025         * Normalises the set of names provided,
2026         * discarding any that seem to be invalid,
2027         * and stopping when/if we reach the size limit specified.
2028         * <p>
2029         * This skips any that seem to be regexes.
2030         *
2031         * @param rawNames  set of raw host names; if null then null is returned
2032         * @param maxSize  maximum size of result set; strictly positive
2033         */
2034        private static Set<String> _normaliseHostList(final Set<String> rawNames,
2035                                                      final int maxSize)
2036            {
2037            if((maxSize < 1))
2038                { throw new IllegalArgumentException(); }
2039    
2040            if(rawNames == null) { return(null); }
2041    
2042            final Set<String> rawResult = new HashSet<String>(1 + 2*Math.min(maxSize, rawNames.size()));
2043    
2044            for(final String rawName : rawNames)
2045                {
2046                try
2047                    {
2048                    if(_mayBeRegexHostname(rawName)) { continue; }
2049                    final String normName = MemoryTools.intern(HostUtils.normaliseVirtualHostName(rawName));
2050                    rawResult.add(normName); // Add to the result...
2051                    if(rawResult.size() >= maxSize) { break; /* Stop when we have enough. */ }
2052                    }
2053                catch(final IllegalArgumentException e) { } // Drop this name if it seems to be bad.
2054                }
2055    
2056            // Return null rather than an empty set.
2057            if(rawResult.size() == 0) { return(null); }
2058    
2059            // Make sure that the result is immutable.
2060            return(Collections.unmodifiableSet(rawResult));
2061            }
2062    
2063        /**Parse set of (space-separated) hostnames; returns null for an empty list.
2064         * This does not validate or normalise the names supplied.
2065         *
2066         * @param hostList  space-separate list of hostnames (or null for none)
2067         */
2068        private Set<String> _parseHostList(final String hostList)
2069            {
2070            if((hostList == null) || (hostList.length() == 0)) { return(null); }
2071    
2072            final StringTokenizer st = new StringTokenizer(hostList);
2073            if(st.countTokens() == 0) { return(null); }
2074    
2075            final Set<String> result = new HashSet<String>(1 + 2*st.countTokens());
2076            while(st.hasMoreTokens())
2077                { result.add(st.nextToken()); }
2078    
2079            return(result);
2080            }
2081    
2082    
2083        /**Maximum number of DNSBLs that may be specified. */
2084        public static final int MAX_DNSBLS = 8;
2085    
2086        /**Name of Web server DNSRBLs set. */
2087        public static final String PNAME_WEBSVR_DNSBLs = "org.hd.d.pg2k.DNSBLs";
2088    
2089        /**Immutable Set of DNS BLs (normalised DNS zone names); can be null for no such hosts. */
2090        private /* final */ Set<String> DNSBLs;
2091    
2092        /**Get immutable Set of DNSBLs (normalised DNS zone names); never null. */
2093        public final Set<String> getDNSBLs()
2094            {
2095            final Set<String> r = DNSBLs;
2096            if(r == null)
2097                {
2098                // Return empty set if none.
2099                final Set<String> noHosts = Collections.emptySet();
2100                return(noHosts);
2101                }
2102            return(r);
2103            }
2104        }