001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.webSvr.util;
030    
031    import java.net.InetAddress;
032    import java.util.Locale;
033    
034    import javax.servlet.ServletContext;
035    import javax.servlet.http.HttpServletRequest;
036    
037    import org.hd.d.pg2k.svrCore.HostUtils;
038    import org.hd.d.pg2k.svrCore.I18NTools;
039    import org.hd.d.pg2k.svrCore.LocaleBeanBase;
040    import org.hd.d.pg2k.svrCore.location.GeoUtils;
041    import org.hd.d.pg2k.svrCore.props.LocalProps;
042    import org.hd.d.pg2k.svrCore.stats.StatsLogger;
043    
044    /**JavaBean encapsulating notions of locale for JSP/servlet pages.
045     * This should be created with request scope.
046     * <p>
047     * This puts off loading the properties bundle(s) until necessary,
048     * since it can suck CPU cycles.
049     * <p>
050     * If the user has a session in progress an explicit locale is looked for there,
051     * else the user's locale is obtained from the HTTP request object
052     * if the Accept-Languages header is present
053     * (which resorts to the server's locale if the user has not specified one),
054     * else we attempt to guess a locale given the user's IP address.
055     * <p>
056     * Access to this object is thread-safe, though since this object
057     * should appear at request (or page) level we do not actually expect
058     * threading to be an issue.
059     * <p>
060     * This is Serializable so as to be able to be stored in
061     * a servlet session; nothing especially long-lived or sensitive.
062     */
063    public final class LocaleBean extends LocaleBeanBase
064        {
065        /**If true, we may default the locale to that of the server location rather than the client, if no explicit locale is specified.
066         * This may help SEs distinguish between different mirrors
067         * which may have some different i18n-ed content to offer.
068         * <p>
069         * This is only in cases where no locale is specified, usually by SEs,
070         * whose spiders are predominantly in the US.
071         */
072        private static final boolean SHOW_SERVER_ACCENT = true;
073    
074        /**If true then note when we seem to be visited by a client with multiple locale preferences set.
075         * Probably very unusual, but if it becomes common we should handle it well.
076         */
077        private static final boolean LOG_MULTI_LOCALE_CLIENTS = false;
078    
079    
080        /**Our logger which falls back to System.out if servlet log not available; never null. */
081        private final static WebUtils.ServletLoggerWithFallback logger = new WebUtils.ServletLoggerWithFallback();
082    
083        /**The stats set to which we log HTTP-client preferred language code counts.
084         * This is a set of two-letter ISO 639 lower-case codes,
085         * with the default being the server's locale.
086         * <p>
087         * We attempt to ignore requests from spiders,
088         * ie we try to log requests only from live humans.
089         * <p>
090         * We may enable or disable this logging (etc) from system parameters,
091         * but this is nonetheless our unique identifier.
092         */
093        private final static StatsLogger.StatsConfig statsIDLang =
094            new StatsLogger.StatsConfig("WEB-USER-LOCALE",
095                                        logger, // Use servlet log if poss.
096                                        false, // Only dump summaries...
097                                        12 * 3600, // About every 12 hours.
098                                        true); // Adaptive.
099    
100        /**The stats set to which we log HTTP-client explicitly-set language code counts.
101         * This is a set of two-letter ISO 639 lower-case codes.
102         * <p>
103         * We attempt to ignore requests from spiders,
104         * ie we try to log requests only from live humans.
105         * <p>
106         * We may enable or disable this logging (etc) from system parameters,
107         * but this is nonetheless our unique identifier.
108         */
109        private final static StatsLogger.StatsConfig statsIDLangExplicit =
110            new StatsLogger.StatsConfig("WEB-USER-LOCALE-EXPLICIT",
111                                        logger, // Use servlet log if poss.
112                                        false, // Only dump summaries...
113                                        12 * 3600, // About every 12 hours.
114                                        true); // Adaptive.
115    
116        /**If true then try to guess locale from user's IP address if no explicit locale indicated in HTTP request. */
117        private static final boolean ALLOW_LOCALE_GUESS_FROM_IP_ADDR = true;
118    
119        /**Cached request object.
120         * Marked volatile so as to be safe to access without a lock.
121         */
122        private transient volatile HttpServletRequest cachedRequest;
123    
124        /**Set the current HTTP request to initialise the bean.
125         * This allows us to retrieve any locale information that we need.
126         * <p>
127         * This first looks to see if there is a session
128         * containing a locale-override value;
129         * if so then it is used.
130         * No session is created if none exists.
131         * <p>
132         * Else, this looks for a locale set by the user's browser,
133         * and uses that if present.
134         * <p>
135         * Else, this tried to guess a suitable locale from the user's IP address,
136         * using the IP-to-location tools.
137         * <p>
138         * Else a `safe' locale is used, usually the server's.
139         * <p>
140         * If the request is null or some other invalid condition is
141         * encountered then this resets the local to a safe value
142         * (usually the server's locale).
143         * <p>
144         * If the new cached request is identical to the old one,
145         * then nothing is done.
146         * <p>
147         * @param request  if non-null is used to try to determine the user's locale
148         * @param ctxt  if non-null selects the servlet log to write to
149         *     (if null, output will go to System.out)
150         */
151        public void setRequest(final HttpServletRequest request,
152                               final ServletContext ctxt)
153            {
154            // If we've already done the work for this request then return now!
155            if(cachedRequest == request) { return; }
156    
157            try
158                {
159                // Whoops, no request at all, so use a safe locale.
160                if(request == null)
161                    {
162                    setLocale(SAFE_LOCALE);
163                    return;
164                    }
165    
166                // Divert any output to the correct servlet log...
167                logger.setContext(ctxt);
168    
169                // Try first for session-based override.
170                final SessionVarBean svb = SessionVarBean.getExtantSessionVars(request, false);
171                if(svb != null)
172                    {
173                    final Locale l = svb.getSessionVarLocale();
174                    if(l != null)
175                        {
176                        if(!l.equals(getLocale()))
177                            {
178                            // Only explicitly set/report first override per request.
179                            setLocale(l);
180    
181                            // Log locale's language part from session request,
182                            // but only for apparent real humans...
183                            if(!WebUtils.requestProbablyFromSpider(request))
184                                { StatsLogger.captureDataPoint(statsIDLangExplicit, toString()); }
185                            }
186    
187                        // Now correctly set with session-driven override locale.
188                        return;
189                        }
190                    }
191    
192                // With no explicit client-specified locale(s),
193                // possibly try giving the server a local accent
194                // (especially for spiders when an explicit mirror is requested),
195                // or else guess the client's probable locale given their location.
196                final String headerAcceptLang = request.getHeader("Accept-Language");
197                if(headerAcceptLang == null)
198                    {
199                    final String serverName = request.getServerName();
200                    final String mirrorTag;
201                    final Locale mirrorLocale;
202                    if(SHOW_SERVER_ACCENT &&
203                       (serverName != null) && HostUtils.isMirrorName(serverName) &&
204                       WebUtils.requestProbablyFromSpider(request) &&
205                       ((mirrorTag = LocalProps.getMirrorTag()) != null) &&
206                       ((mirrorLocale = I18NTools.LOCALE_BY_CCTLD.get(new GeoUtils.CCTLD(mirrorTag.substring(0, 2)))) != null))
207                        {
208                        // Will speak with my local "accent"...
209                        setLocale(mirrorLocale);
210    
211                        // Note in stats.
212                        StatsLogger.captureDataPoint(statsIDLang, "LOCAL-" + mirrorLocale);
213                        return;
214                        }
215                    else if(ALLOW_LOCALE_GUESS_FROM_IP_ADDR)
216                        {
217                        // Try to guess user's locale
218                        // from client IP and thus user's country/location.
219                        try
220                            {
221                            final InetAddress clientAddr = InetAddress.getByName(request.getRemoteAddr());
222                            // Try quick CC lookup only
223                            // since all this delays the user...
224                            final GeoUtils.CCTLD cctld = GeoUtils.getCCTLDByAddress(clientAddr, true);
225    
226                            // If we got a ccTLD then look for a locale for it.
227                            if(cctld != null)
228                                {
229                                // See if we know a default locale for this country.
230                                final Locale l = I18NTools.LOCALE_BY_CCTLD.get(cctld);
231                                if(l != null)
232                                    {
233                                    // OK, guessed locale from IP address.
234                                    setLocale(l);
235    
236                                    // Note in stats.
237                                    StatsLogger.captureDataPoint(statsIDLang, "NONE-" + l);
238                                    return;
239                                    }
240                                }
241                            }
242                        catch(final Exception e)
243                            {
244                            e.printStackTrace();  // Whinge but drop through to continue.
245                            }
246    
247                        // No browser-set nor geo-guessable-from-IP-address locale.
248                        StatsLogger.captureDataPoint(statsIDLang, "NONE");
249    
250                        // Fall through to try other methods anyway.
251                        }
252                    }
253    
254                else if(LOG_MULTI_LOCALE_CLIENTS)
255                    {
256                    if(headerAcceptLang.indexOf(',') != -1)
257                        {
258                        // Log potential multi-locale client.
259                        StatsLogger.captureDataPoint(statsIDLang, "MULTIPLE");
260                        }
261                    }
262    
263                // Finally try for the browser-set locale-preference value(s)
264                // (which falls back to our Web-server hard-coded default).
265                // We validate this a little before accepting it,
266                // and normalise too (eg by dropping any "variant" portion).
267                // The language component must be present (and of length 2)
268                // and the country must be absent or (in lower case) a valid ccTLD.
269                // TODO: be prepared to try user 2nd-and-later-choice locales.
270                final Locale l = request.getLocale();
271                if((l != null) && (l.getLanguage().length() == 2) &&
272                   (I18NTools.LOCALES.contains(l) ||
273                    "".equals(l.getCountry()) ||
274                    GeoUtils.CCTLD.isSyntaticallyValidCcTLD(l.getCountry().toLowerCase())))
275                    {
276                    setLocale(new Locale(l.getLanguage().toLowerCase(), l.getCountry().toUpperCase(), ""));
277    
278                    // Log locale from HTTP request
279                    // iff this does not look like a spider.
280                    if(!WebUtils.requestProbablyFromSpider(request))
281                        {
282                        if(headerAcceptLang != null)
283                            { StatsLogger.captureDataPoint(statsIDLang, l.toString()); }
284                        }
285    
286                    return;
287                    }
288    
289                // Give up and use a safe value.
290                setLocale(SAFE_LOCALE);
291                }
292            finally
293                {
294                // Note that we've finished the work for this request.
295                cachedRequest = request;
296                }
297            }
298    
299    
300        /**Public no-arg constructor for ease of use as a JavaBean.
301         * Sets to a safe locale.
302         */
303        public LocaleBean()
304            { }
305    
306        /**Public no-arg constructor for ease of use as a JavaBean.
307         * This defers as much work as it reasonably can.
308         *
309         * @param l  initial locale; never null
310         */
311        public LocaleBean(final Locale l)
312            {
313            super(l);
314            }
315    
316        /**Unique Serialisation class ID generated by http://random.hd.org/. */
317        private static final long serialVersionUID = -6180248508840300659L;
318        }