001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.webSvr.util;
030
031 import java.net.InetAddress;
032 import java.util.Locale;
033
034 import javax.servlet.ServletContext;
035 import javax.servlet.http.HttpServletRequest;
036
037 import org.hd.d.pg2k.svrCore.HostUtils;
038 import org.hd.d.pg2k.svrCore.I18NTools;
039 import org.hd.d.pg2k.svrCore.LocaleBeanBase;
040 import org.hd.d.pg2k.svrCore.location.GeoUtils;
041 import org.hd.d.pg2k.svrCore.props.LocalProps;
042 import org.hd.d.pg2k.svrCore.stats.StatsLogger;
043
044 /**JavaBean encapsulating notions of locale for JSP/servlet pages.
045 * This should be created with request scope.
046 * <p>
047 * This puts off loading the properties bundle(s) until necessary,
048 * since it can suck CPU cycles.
049 * <p>
050 * If the user has a session in progress an explicit locale is looked for there,
051 * else the user's locale is obtained from the HTTP request object
052 * if the Accept-Languages header is present
053 * (which resorts to the server's locale if the user has not specified one),
054 * else we attempt to guess a locale given the user's IP address.
055 * <p>
056 * Access to this object is thread-safe, though since this object
057 * should appear at request (or page) level we do not actually expect
058 * threading to be an issue.
059 * <p>
060 * This is Serializable so as to be able to be stored in
061 * a servlet session; nothing especially long-lived or sensitive.
062 */
063 public final class LocaleBean extends LocaleBeanBase
064 {
065 /**If true, we may default the locale to that of the server location rather than the client, if no explicit locale is specified.
066 * This may help SEs distinguish between different mirrors
067 * which may have some different i18n-ed content to offer.
068 * <p>
069 * This is only in cases where no locale is specified, usually by SEs,
070 * whose spiders are predominantly in the US.
071 */
072 private static final boolean SHOW_SERVER_ACCENT = true;
073
074 /**If true then note when we seem to be visited by a client with multiple locale preferences set.
075 * Probably very unusual, but if it becomes common we should handle it well.
076 */
077 private static final boolean LOG_MULTI_LOCALE_CLIENTS = false;
078
079
080 /**Our logger which falls back to System.out if servlet log not available; never null. */
081 private final static WebUtils.ServletLoggerWithFallback logger = new WebUtils.ServletLoggerWithFallback();
082
083 /**The stats set to which we log HTTP-client preferred language code counts.
084 * This is a set of two-letter ISO 639 lower-case codes,
085 * with the default being the server's locale.
086 * <p>
087 * We attempt to ignore requests from spiders,
088 * ie we try to log requests only from live humans.
089 * <p>
090 * We may enable or disable this logging (etc) from system parameters,
091 * but this is nonetheless our unique identifier.
092 */
093 private final static StatsLogger.StatsConfig statsIDLang =
094 new StatsLogger.StatsConfig("WEB-USER-LOCALE",
095 logger, // Use servlet log if poss.
096 false, // Only dump summaries...
097 12 * 3600, // About every 12 hours.
098 true); // Adaptive.
099
100 /**The stats set to which we log HTTP-client explicitly-set language code counts.
101 * This is a set of two-letter ISO 639 lower-case codes.
102 * <p>
103 * We attempt to ignore requests from spiders,
104 * ie we try to log requests only from live humans.
105 * <p>
106 * We may enable or disable this logging (etc) from system parameters,
107 * but this is nonetheless our unique identifier.
108 */
109 private final static StatsLogger.StatsConfig statsIDLangExplicit =
110 new StatsLogger.StatsConfig("WEB-USER-LOCALE-EXPLICIT",
111 logger, // Use servlet log if poss.
112 false, // Only dump summaries...
113 12 * 3600, // About every 12 hours.
114 true); // Adaptive.
115
116 /**If true then try to guess locale from user's IP address if no explicit locale indicated in HTTP request. */
117 private static final boolean ALLOW_LOCALE_GUESS_FROM_IP_ADDR = true;
118
119 /**Cached request object.
120 * Marked volatile so as to be safe to access without a lock.
121 */
122 private transient volatile HttpServletRequest cachedRequest;
123
124 /**Set the current HTTP request to initialise the bean.
125 * This allows us to retrieve any locale information that we need.
126 * <p>
127 * This first looks to see if there is a session
128 * containing a locale-override value;
129 * if so then it is used.
130 * No session is created if none exists.
131 * <p>
132 * Else, this looks for a locale set by the user's browser,
133 * and uses that if present.
134 * <p>
135 * Else, this tried to guess a suitable locale from the user's IP address,
136 * using the IP-to-location tools.
137 * <p>
138 * Else a `safe' locale is used, usually the server's.
139 * <p>
140 * If the request is null or some other invalid condition is
141 * encountered then this resets the local to a safe value
142 * (usually the server's locale).
143 * <p>
144 * If the new cached request is identical to the old one,
145 * then nothing is done.
146 * <p>
147 * @param request if non-null is used to try to determine the user's locale
148 * @param ctxt if non-null selects the servlet log to write to
149 * (if null, output will go to System.out)
150 */
151 public void setRequest(final HttpServletRequest request,
152 final ServletContext ctxt)
153 {
154 // If we've already done the work for this request then return now!
155 if(cachedRequest == request) { return; }
156
157 try
158 {
159 // Whoops, no request at all, so use a safe locale.
160 if(request == null)
161 {
162 setLocale(SAFE_LOCALE);
163 return;
164 }
165
166 // Divert any output to the correct servlet log...
167 logger.setContext(ctxt);
168
169 // Try first for session-based override.
170 final SessionVarBean svb = SessionVarBean.getExtantSessionVars(request, false);
171 if(svb != null)
172 {
173 final Locale l = svb.getSessionVarLocale();
174 if(l != null)
175 {
176 if(!l.equals(getLocale()))
177 {
178 // Only explicitly set/report first override per request.
179 setLocale(l);
180
181 // Log locale's language part from session request,
182 // but only for apparent real humans...
183 if(!WebUtils.requestProbablyFromSpider(request))
184 { StatsLogger.captureDataPoint(statsIDLangExplicit, toString()); }
185 }
186
187 // Now correctly set with session-driven override locale.
188 return;
189 }
190 }
191
192 // With no explicit client-specified locale(s),
193 // possibly try giving the server a local accent
194 // (especially for spiders when an explicit mirror is requested),
195 // or else guess the client's probable locale given their location.
196 final String headerAcceptLang = request.getHeader("Accept-Language");
197 if(headerAcceptLang == null)
198 {
199 final String serverName = request.getServerName();
200 final String mirrorTag;
201 final Locale mirrorLocale;
202 if(SHOW_SERVER_ACCENT &&
203 (serverName != null) && HostUtils.isMirrorName(serverName) &&
204 WebUtils.requestProbablyFromSpider(request) &&
205 ((mirrorTag = LocalProps.getMirrorTag()) != null) &&
206 ((mirrorLocale = I18NTools.LOCALE_BY_CCTLD.get(new GeoUtils.CCTLD(mirrorTag.substring(0, 2)))) != null))
207 {
208 // Will speak with my local "accent"...
209 setLocale(mirrorLocale);
210
211 // Note in stats.
212 StatsLogger.captureDataPoint(statsIDLang, "LOCAL-" + mirrorLocale);
213 return;
214 }
215 else if(ALLOW_LOCALE_GUESS_FROM_IP_ADDR)
216 {
217 // Try to guess user's locale
218 // from client IP and thus user's country/location.
219 try
220 {
221 final InetAddress clientAddr = InetAddress.getByName(request.getRemoteAddr());
222 // Try quick CC lookup only
223 // since all this delays the user...
224 final GeoUtils.CCTLD cctld = GeoUtils.getCCTLDByAddress(clientAddr, true);
225
226 // If we got a ccTLD then look for a locale for it.
227 if(cctld != null)
228 {
229 // See if we know a default locale for this country.
230 final Locale l = I18NTools.LOCALE_BY_CCTLD.get(cctld);
231 if(l != null)
232 {
233 // OK, guessed locale from IP address.
234 setLocale(l);
235
236 // Note in stats.
237 StatsLogger.captureDataPoint(statsIDLang, "NONE-" + l);
238 return;
239 }
240 }
241 }
242 catch(final Exception e)
243 {
244 e.printStackTrace(); // Whinge but drop through to continue.
245 }
246
247 // No browser-set nor geo-guessable-from-IP-address locale.
248 StatsLogger.captureDataPoint(statsIDLang, "NONE");
249
250 // Fall through to try other methods anyway.
251 }
252 }
253
254 else if(LOG_MULTI_LOCALE_CLIENTS)
255 {
256 if(headerAcceptLang.indexOf(',') != -1)
257 {
258 // Log potential multi-locale client.
259 StatsLogger.captureDataPoint(statsIDLang, "MULTIPLE");
260 }
261 }
262
263 // Finally try for the browser-set locale-preference value(s)
264 // (which falls back to our Web-server hard-coded default).
265 // We validate this a little before accepting it,
266 // and normalise too (eg by dropping any "variant" portion).
267 // The language component must be present (and of length 2)
268 // and the country must be absent or (in lower case) a valid ccTLD.
269 // TODO: be prepared to try user 2nd-and-later-choice locales.
270 final Locale l = request.getLocale();
271 if((l != null) && (l.getLanguage().length() == 2) &&
272 (I18NTools.LOCALES.contains(l) ||
273 "".equals(l.getCountry()) ||
274 GeoUtils.CCTLD.isSyntaticallyValidCcTLD(l.getCountry().toLowerCase())))
275 {
276 setLocale(new Locale(l.getLanguage().toLowerCase(), l.getCountry().toUpperCase(), ""));
277
278 // Log locale from HTTP request
279 // iff this does not look like a spider.
280 if(!WebUtils.requestProbablyFromSpider(request))
281 {
282 if(headerAcceptLang != null)
283 { StatsLogger.captureDataPoint(statsIDLang, l.toString()); }
284 }
285
286 return;
287 }
288
289 // Give up and use a safe value.
290 setLocale(SAFE_LOCALE);
291 }
292 finally
293 {
294 // Note that we've finished the work for this request.
295 cachedRequest = request;
296 }
297 }
298
299
300 /**Public no-arg constructor for ease of use as a JavaBean.
301 * Sets to a safe locale.
302 */
303 public LocaleBean()
304 { }
305
306 /**Public no-arg constructor for ease of use as a JavaBean.
307 * This defers as much work as it reasonably can.
308 *
309 * @param l initial locale; never null
310 */
311 public LocaleBean(final Locale l)
312 {
313 super(l);
314 }
315
316 /**Unique Serialisation class ID generated by http://random.hd.org/. */
317 private static final long serialVersionUID = -6180248508840300659L;
318 }