001 /*
002 Copyright (c) 1996-2011, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.webSvr.ads;
031
032 import java.io.IOException;
033 import java.lang.ref.WeakReference;
034 import java.net.InetAddress;
035 import java.net.UnknownHostException;
036 import java.util.ArrayList;
037 import java.util.Arrays;
038 import java.util.BitSet;
039 import java.util.Collections;
040 import java.util.Date;
041 import java.util.EnumMap;
042 import java.util.List;
043 import java.util.Map;
044 import java.util.Random;
045 import java.util.SortedMap;
046 import java.util.TreeMap;
047 import java.util.concurrent.Callable;
048 import java.util.concurrent.Future;
049 import java.util.concurrent.TimeUnit;
050 import java.util.concurrent.atomic.AtomicInteger;
051
052 import javax.servlet.ServletContext;
053 import javax.servlet.http.HttpServletRequest;
054
055 import org.hd.d.pg2k.svrCore.CoreConsts;
056 import org.hd.d.pg2k.svrCore.ExhibitName;
057 import org.hd.d.pg2k.svrCore.GenUtils;
058 import org.hd.d.pg2k.svrCore.Name;
059 import org.hd.d.pg2k.svrCore.Rnd;
060 import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
061 import org.hd.d.pg2k.svrCore.ThreadUtils;
062 import org.hd.d.pg2k.svrCore.Tuple;
063 import org.hd.d.pg2k.svrCore.Tuple.Pair;
064 import org.hd.d.pg2k.svrCore.location.GeoUtils;
065 import org.hd.d.pg2k.svrCore.props.GenProps;
066 import org.hd.d.pg2k.svrCore.vars.EventPeriod;
067 import org.hd.d.pg2k.svrCore.vars.EventVariableValue;
068 import org.hd.d.pg2k.svrCore.vars.SimpleVariableDefinition;
069 import org.hd.d.pg2k.svrCore.vars.SimpleVariablePipelineIF;
070 import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
071 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
072 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
073 import org.hd.d.pg2k.webSvr.util.HTMLThumbnailInsertGenerators;
074 import org.hd.d.pg2k.webSvr.util.LocaleBean;
075 import org.hd.d.pg2k.webSvr.util.PageSkinUtils;
076 import org.hd.d.pg2k.webSvr.util.SessionVarBean;
077 import org.hd.d.pg2k.webSvr.util.StatsSink;
078 import org.hd.d.pg2k.webSvr.util.ThroughputMonitorFilterPG2K;
079 import org.hd.d.pg2k.webSvr.util.WebConsts;
080 import org.hd.d.pg2k.webSvr.util.WebUtils;
081
082 import ORG.hd.d.IsDebug;
083
084 /**Advertising-related utility functions.
085 * One advantage of having code here rather than in-line in a JSP
086 * is that it is pre-compiled off-line for speed and robustness;
087 * code here is also potentially easier to test.
088 */
089 public final class AdUtils
090 {
091 /**Prevent construction of an instance. */
092 private AdUtils() { }
093
094
095 /**Threshold (in thousands) when we switch from logging individual hits/clicks to thousands; strictly positive.
096 * Raising this value improves accuracy when stats come in slowly
097 * and/or servers are frequently restarted,
098 * but increases stats traffic and log size.
099 * <p>
100 * Ideally this should be large enough that loss of a nearly full "k-click"
101 * should not be hugely significant, which implies a value of maybe 10 or more.
102 * <p>
103 * We should set this as low as we can bear in terms of
104 * potential statistical inaccuracy to keep log traffic reasonable.
105 */
106 private static final int KHIT_THRESHOLD = 4;
107
108 /**Extra weighting in ad layout/format decisions given to local hits/stats; non-negative.
109 * Although all ad layout and click-through stats are recorded globally,
110 * some of them are also recorded locally, in a somewhat volatile form.
111 * (Such local stats might be lost at an upgrade for example,
112 * or be less robust in the face of local disc errors.)
113 * <p>
114 * A weighting of zero means to not treat local stats specially,
115 * and just use the global stats (which includes the local stats).
116 * <p>
117 * A weighting of 1 gives local stats equal weight to global stats,
118 * though clicks (etc) are recorded once in the global stats already,
119 * so that means that local stats are effectively given somewhat higher weight.
120 * <p>
121 * Note that the weighting is usually in terms of the entire eCPM/eCTR/etc
122 * (providing that there are enough local pageviews to keep noise acceptable,
123 * else the local stats may be ignored or used in a more-noise-tolerant way),
124 * and thus even low-traffic mirrors can show significant favour to local mores.
125 */
126 public static final int LOCAL_AD_STATS_WEIGHTING = 4;
127
128 /**Minimum number of pageviews to accept one of the local stats values; strictly positive.
129 * Because the stats system switches from recording individual page views and clicks
130 * to recording thousands of page views and clicks at a round 1000 mark,
131 * this value should be either 1000 or less (though that may be too small)
132 * or a multiple of 1000 (but not so high as to exclude local stats entirely),
133 * or enough below the KHIT_THRESHOLD to be immune to the large-count lumpiness.
134 * <p>
135 * NB: Google AdWords seems to regard 1000 as a minimum sensible sample size.
136 * <p>
137 * A value of 1000 up to some fraction of our KHIT_THRESHOLD may be good.
138 */
139 private static final int MIN_LOCAL_STATS_PAGEVIEWS = Math.max(1000, 334*KHIT_THRESHOLD);
140
141
142 /**Ranking threshold assumed to indicate "well" ranked amongst many peers; strictly positive. */
143 private static final int TOP_N_RANK_AD_LIMIT = 31 + Rnd.fastRnd.nextInt(19);
144
145 /**What fraction of ad slots are shown in "lite" mode; strictly positive. */
146 private static final int LITE_AD_SHOW_FRACTION = 5 + Rnd.fastRnd.nextInt(3);
147
148 /**Time constant to use for ageing any memory of ad slot run times; strictly positive. */
149 private static final int ADSLOTMONIOR_TIME_CONST = 8;
150
151 /**Handler for ad click-throughs; designed not to retain important strong refs. */
152 private static final class ClickThruHandler extends StatsSink.AbstractStatsListener
153 {
154 private final String[] otherTagsCopy;
155 private final String canonReqURI;
156 private final InetAddress clickerIPAddr;
157 private final WeakReference<SimpleVariablePipelineIF> varsWR;
158 private final String dpID;
159 private final String exhibitName;
160
161 private ClickThruHandler(
162 final String uniqueDataPointID,
163 final long expireBy,
164 final String[] otherTagsCopy,
165 final String canonReqURI,
166 final InetAddress clickerIPAddr,
167 final WeakReference<SimpleVariablePipelineIF> varsWR,
168 final String exhibitName)
169 {
170 super(uniqueDataPointID, expireBy);
171 this.otherTagsCopy = otherTagsCopy;
172 this.canonReqURI = canonReqURI;
173 this.clickerIPAddr = clickerIPAddr;
174 this.varsWR = varsWR;
175 dpID = uniqueDataPointID;
176 this.exhibitName = exhibitName;
177 }
178
179 @Override public final String handle(final Map<String, String[]> parameters)
180 {
181 // Give up immediately if our pipeline has gone away...
182 final SimpleVariablePipelineIF vars = varsWR.get();
183 if(null == vars) { return(null); }
184
185 if(parameters != null)
186 {
187 // We must see a non-empty url parameter (for AdSense-type click-throughs).
188 final String[] values = parameters.get("url");
189 if((values != null) && (values.length == 1) &&
190 (values[0] != null) && (values[0].length() > 0))
191 {
192 try
193 {
194 // If this was for a bona-fide catalogue page,
195 // then log the click-through for that exhibit,
196 // else just log as a null (valueless) event.
197
198 // Post vote event value to correct event stream...
199 if(IsDebug.isDebug) { System.out.println("Click-through registered for: " + exhibitName + " by " + dpID + " to " + values[0]); }
200 vars.setVariable(new SimpleVariableValue(
201 SystemVariables.ACCESSPATTERN_CLICKTHROUGH,
202 (exhibitName == null) ? null : ExhibitName.getFileComponent(exhibitName)));
203
204 if(clickerIPAddr != null)
205 {
206 // Note location of clicker (as ccTLD or region).
207 // We are prepared to spend some time on this as clicking-through is valuable
208 // and we've possibly already collected and cached any needed data.
209 final String location = GeoUtils.getRegionByAddress(clickerIPAddr, false);
210 vars.setVariable(new SimpleVariableValue(
211 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
212 "clickerLocation=" + location));
213 vars.setVariable(new SimpleVariableValue(
214 SystemVariables.GENSTATS_STRING_LOCAL_EVENT,
215 "clickerLocation=" + location));
216 }
217
218 // Note page URI that generated the click.
219 if(canonReqURI != null)
220 {
221 if(IsDebug.isDebug) { System.out.println("Click-through URI: " + canonReqURI + " by " + dpID + " to " + values[0]); }
222 vars.setVariable(new SimpleVariableValue(
223 SystemVariables.ACCESSPATTERN_CLICKTHROUGH_BY_URI,
224 canonReqURI));
225 }
226
227 // Note any other (non-null, non-empty)
228 // tags for click-through.
229 if(otherTagsCopy != null)
230 {
231 for(final String tag : otherTagsCopy)
232 {
233 if((tag == null) || (tag.length() == 0)) { continue; }
234 // Log it globally and locally.
235 vars.setVariable(new SimpleVariableValue(
236 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
237 tag));
238 vars.setVariable(new SimpleVariableValue(
239 SystemVariables.GENSTATS_STRING_LOCAL_EVENT,
240 tag));
241 }
242 }
243 }
244 catch(final IOException e)
245 { e.printStackTrace(); /* Just absorb errors. */ }
246 }
247 }
248
249 // No redirection; no user should be visiting this page...
250 return(null);
251 }
252 }
253
254 /**Unique thread-safe marker tag and stats object for a particular ad slot.
255 * Typically a private static final in the JSP/servlet,
256 * call 'mon.done()' in the same thread as showAds(..., mon)
257 * when an ad gated/enabled by showAds() with this as arg is completed.
258 * <p>
259 * Used to avoid showing ads when server-side insertion is taking too long.
260 * <p>
261 * This is lock-free and thread-safe for multiple JSP/servlet threads
262 * to use the same slots at the same time.
263 */
264 public static final class AdSlotMonitor
265 {
266 /**Name of this ad slot; never null nor empty. */
267 public final String name;
268
269 /**Construct an instance with a non-empty, non=null name. */
270 public AdSlotMonitor(final String name)
271 {
272 if((name == null) || "".equals(name)) { throw new IllegalArgumentException(); }
273 this.name = name;
274 }
275
276 /**Used by showAds() to start timing an ad that it has allowed.
277 * Default visibility so not accessible to JSPs, but visible to showAds().
278 * <p>
279 * Calling start() again with no intervening done()
280 * may have no effect and/or may provoke a warning.
281 */
282 void start(final SimpleLoggerIF log)
283 {
284 if(IsDebug.isDebug && (lastStartTime.get() != null)) { System.err.println("WARNING: AdSlotMonitor: "+name+": time not null at start"); }
285 final int count = startCount.incrementAndGet();
286 if(IsDebug.isDebug) { System.out.println("INFO: AdSlotMonitor: "+name+" start#"+count); }
287 // Occasionally log stats for each slot,
288 // logging the first result after a few ad fetches.
289 // Use the hash code to distinguish multiple instances with the same name.
290 if(3 == (count & 0xfff))
291 { log.log("AdSlotMonitor: "+name+"("+this.hashCode()+") count="+count+", ms smoothed="+genTimeSmoothedMs.get()); }
292 // Record the start time of this showing...
293 lastStartTime.set(new Long(System.currentTimeMillis()));
294 // Note that at least one ad is now being generated.
295 pending = true;
296 }
297
298 /**Used by JSP code after generating text for an ad that showAds() allowed.
299 * Public visibility so as to be accessible to JSPs.
300 * <p>
301 * Calling this if start() was not called (since the last done())
302 * may have no effect and/or may provoke a warning.
303 */
304 public void done()
305 {
306 final Long tStart = lastStartTime.get();
307 if(tStart == null)
308 {
309 System.err.println("ERROR: AdSlotMonitor: "+name+": time not set when done()");
310 return;
311 }
312
313 // Note that at least one ad seems to have completed successfully.
314 pending = false;
315
316 // Compute time taken to fetch/create the ad text.
317 final long adTime = System.currentTimeMillis() - tStart.longValue();
318 // Update running smoothed value.
319 // But move towards new value quickly if unacceptably large and larger than extant
320 // so as to back off quickly in case of poor/slow ad-server connectivity.
321 for( ; ; )
322 {
323 final int oldVal = genTimeSmoothedMs.get();
324 final int smoothed = ((adTime > AdConsts.MAX_SS_AD_WAIT_MS) && (adTime > oldVal))
325 ? (int) Math.min((adTime+oldVal)/2, Integer.MAX_VALUE)
326 : Math.max(0, (int) Math.min(Integer.MAX_VALUE,
327 ((oldVal * (ADSLOTMONIOR_TIME_CONST-1)) + adTime) / ADSLOTMONIOR_TIME_CONST));
328 if(IsDebug.isDebug) { System.out.println("INFO: AdSlotMonitor: "+name+" time="+adTime+", smoothed="+smoothed); }
329 // Repeat if we don't win the race against other updates...
330 if(genTimeSmoothedMs.compareAndSet(oldVal, smoothed)) { break; }
331 }
332
333 // Clear out this start time.
334 lastStartTime.set(null);
335 }
336
337 /**Set when any ad is start()ed, cleared when any ad is done().
338 * If this is true then there's probably an ad being filled in now or failed,
339 * if it is false then there is no ad pending or one completed successfully.
340 * <p>
341 * If there is currently no ad showing and all recent ads have completed fine
342 * then this will be false.
343 * <p>
344 * This is kept true for as short a time as possible.
345 * <p>
346 * We may choose not to show ads in some cases where one may be pending,
347 * though this is not any sort of guarantee because of concurrency/exceptions.
348 * <p>
349 * Marked volatile for timely sharing between threads.
350 */
351 private volatile boolean pending;
352
353 /**True if another showing of this slot may be pending. */
354 public boolean mayBePending() { return(pending); }
355
356 /**Per-thread record of last start() time for this ad slot; never null. */
357 private final ThreadLocal<Long> lastStartTime = new ThreadLocal<Long>();
358
359 /**Count of ads shown in this slot (or at least for which start() was called). */
360 private final AtomicInteger startCount = new AtomicInteger();
361
362 /**Smoothed time of recent ad slot generation time; never null, value non-negative.
363 * Starts with a just-unacceptable time, forcing a cautious 'slow start'.
364 */
365 private final AtomicInteger genTimeSmoothedMs = new AtomicInteger(AdConsts.MAX_SS_AD_WAIT_MS+1);
366
367 /**Get current 'smoothed' recent ad generation time; non-negative. */
368 public int getGenTimeSmoothedMs() { return(genTimeSmoothedMs.get()); }
369 }
370
371 /**Show ads or not on the current page and/or in the current slot?
372 * Returns false if ads are generally disabled, etc,
373 * though even in the case that the user session has been set to "lite" mode,
374 * will return true a small fraction of the time so as to show occasional ads.
375 * <p>
376 * We don't show ads if they've been explicitly disabled system-wide,
377 * or if the URI seems to indicate possibly-sensitive content...
378 * <p>
379 * Doesn't monitor a particular ad slot, ie is yes/no for the whole page,
380 * and thus nominally the result is the same
381 * whenever and however often called on a page,
382 * though we might start saying no if it has been a long time
383 * since this page's generation started.
384 *
385 * @return true if it is OK to show an ad
386 */
387 public static final boolean showAds(final DataSourceBean dsb,
388 final HttpServletRequest request)
389 { return showAds(dsb, request, null); }
390
391 /**Show ads or not on the current page and/or in the current slot?
392 * Returns false if ads are generally disabled, etc,
393 * though even in the case that the user session has been set to "lite" mode,
394 * will return true a small fraction of the time so as to show occasional ads.
395 * <p>
396 * We don't show ads if they've been explicitly disabled system-wide,
397 * or if the URI seems to indicate possibly-sensitive content...
398 *
399 * @param monitor if non-null should be a unique monitor token
400 * associated with one ad slot (or group of slots)
401 * to time how long it takes to generate content for that slot
402 * and decide whether to avoid showing the ad at all;
403 * only appropriate for server-side inserted ads
404 *
405 * @return true if it is OK to show an ad
406 */
407 public static final boolean showAds(final DataSourceBean dsb,
408 final HttpServletRequest request,
409 final AdSlotMonitor monitor)
410 {
411 // If not showing/supporting any ads then always return false.
412 if(!AdConsts.AD_SUPPORT)
413 { return(false); }
414
415 // If this page's start time is available
416 // then we disallow ads here if the page is already taking too long to build
417 // to reduce annoyance to users.
418 final Object sto = request.getAttribute(org.hd.d.pg2k.webSvr.util.WebConsts.REQUEST_START_DATETIME_ATTR_NAME);
419 int timeSoFar = 0;
420 if(sto instanceof Date)
421 {
422 final Date d = (Date) sto;
423 final long timeSoFarL = System.currentTimeMillis() - d.getTime();
424 if(timeSoFar >= WebConsts.MAX_PG_DOWNLOAD_MS)
425 { return(false); }
426 if((int)timeSoFarL > 0)
427 { timeSoFar = (int)timeSoFarL; }
428 }
429
430 try
431 {
432 final GenProps gp = dsb.getGenProps(-1);
433 // If ads have been disabled in the GenProps then return false.
434 if(!gp.getWEBSVR_SIMPLEAD_ALLOW())
435 { return(false); }
436
437 // Don't show (all) ads on first page view (a referral) for a new user, if so configured.
438 // This doesn't necessarily hide ads from spiders or direct/type-in navigation,
439 // but it should remove ads (for example) from a landing page for a PPC referral.
440 if(AdConsts.DROP_ADS_ON_FIRST_PAGE_VIEW)
441 {
442 final String referringHost = WebUtils.requestProbablyReferredFromExternalSite(request);
443 if((referringHost != null) && !referringHost.isEmpty())
444 { return(false); }
445 }
446
447 // Don't show ads on top-level pages to keep them as light/uncluttered/fast as possible.
448 final String requestURI = request.getRequestURI();
449 if((null != requestURI) && (requestURI.lastIndexOf('/') < 1))
450 { return(false); }
451
452 // Don't show ads against potentially-sensitive content...
453 final Object fro = request.getAttribute(ThroughputMonitorFilterPG2K.PNAME_SAVED_REQ_URI);
454 final String canonReqURI = (fro instanceof String) ? canonicaliseURI((String) fro) : request.getRequestURI();
455 if(GenUtils.isSensitive(canonReqURI, gp))
456 {
457 if(IsDebug.isDebug) { System.err.println("Page may be sensitive: "+request.getPathInfo()); }
458 return(false);
459 }
460
461 // Show ads anyway about 10% of the time in "lite" mode.
462 final SessionVarBean svb = SessionVarBean.getExtantSessionVars(request, false);
463 if((svb != null) && svb.isSessionVarLiteUI() && (0 != Rnd.fastRnd.nextInt(LITE_AD_SHOW_FRACTION)))
464 { return(false); }
465 }
466 catch(final Exception e)
467 {
468 e.printStackTrace(); // Note unexpected error.
469 return(false); // And avoid showing any ad...
470 }
471
472 // If this is a monitored slot then do some extra slot-specific work before saying yes...
473 if(monitor != null)
474 {
475 // Get estimated time to generate/fill this slot.
476 final int genTime = monitor.getGenTimeSmoothedMs();
477 // Skip this slot (most of the time) if likely to make page generation overrun.
478 // TODO: consider using a lower threshold if site is already overloaded/slow.
479 if((genTime >= AdConsts.MAX_SS_AD_WAIT_MS) ||
480 ((genTime + timeSoFar) >= (3*WebConsts.MAX_PG_DOWNLOAD_MS)/4))
481 {
482 // This slot seems to be too slow to run,
483 // but re-sample once in a while to recover from transient problems.
484 // Sample less often the slower the ad slot is to generate/fetch/fill.
485 // Never resample more than ~1 in 4 at most to avoid annoying users
486 // if ad-server connectivity still is poor.
487 if(0 != Rnd.fastRnd.nextInt(3 + (genTime/AdConsts.MAX_SS_AD_WAIT_MS)))
488 { return(false); }
489 }
490
491 // Since we will be allowing this slot to run,
492 // start its timer running now.
493 monitor.start(dsb.getLogger());
494 }
495
496 return(true); // OK to show an ad...
497 }
498
499 /**Decides if an underperforming ad slot should be skipped (ie not filled).
500 * This uses the system variable info indicated to decide how well
501 * the ad slot is performing, and then chooses randomly with an appropriate
502 * probability whether it should be filled or skipped.
503 * <p>
504 * Reducing the number of (non-performing) ads shown to users
505 * probably improves the user experience in various ways
506 * and probably also increases the effective revenue from other slots.
507 * <p>
508 * The system variable may represent something such as page views or CTR.
509 * <p>
510 * This must not throw any exceptions so as to avoid killing any JSP
511 * that it happens to be called from.
512 * <p>
513 * TODO: May want to factor in local stats specially at some point.
514 *
515 * @param vars handle on the system variables; never null
516 * @param history event history (definition) to be examined; never null
517 * @param eventPeriod period we are interested in from event history;
518 * must be valid for the given event and non-null
519 * @param slotIdentifier the unique ID for advertising "events" for this slot;
520 * must be valid value and type for specified history and non-null
521 *
522 * @return true if the slot should be skipped this time,
523 * false if it should be filled
524 */
525 public static boolean skipUnderperformingAdSlot(final SimpleVariablePipelineIF vars,
526 final SimpleVariableDefinition history,
527 final EventPeriod eventPeriod,
528 final Object slotIdentifier)
529 {
530 // First-pass sanity check on arguments.
531 if((vars == null) || (eventPeriod == null) ||
532 (history == null) || !history.isEvent())
533 { throw new IllegalArgumentException(); }
534
535 // For a null or inappropriate ID we play safe and skip the ad.
536 if((slotIdentifier == null) || !history.checkType(slotIdentifier))
537 { return(true); }
538
539 // If not dynamically choosing layouts then don't skip much...
540 if(!AdConsts.DYNAMICALLY_CHOOSE_AD_NETWORK)
541 { return(false); }
542
543 // If we seem to have sufficient/meaningful click-through data
544 // then we can use it to help us decide about showing an ad here.
545 // Show the ad if the exhibit is any of:
546 // * well-ranked recently by click-through
547 // * in the all-time top-N-ranked clicked-thru slots
548 // * reasonably-highly ranked from all distinct values recorded
549 // * randomly about 1 time in N (to continue testing the slot slowly).
550 // The end result is that only a small number of relatively-high CTR
551 // URIs should get ads shown in this slot,
552 // but all exhibits will get an ad shown in this slot occasionally.
553
554 try
555 {
556 // If the slot has ranked well "recently"
557 // then fill the slot again now.
558 // (We check the "prev" first for efficiency,
559 // since it should always be able to come from local cache.)
560 // TODO: consider normalising to CTR.
561 final EventVariableValue eventsYesterday = vars.getEventValue(history, eventPeriod, false);
562 if(eventsYesterday.getRank(slotIdentifier) < (eventsYesterday.getTotalDistinctValues()/3))
563 { return(false); /* Show the ad! */ }
564 final EventVariableValue eventsToday = vars.getEventValue(history, eventPeriod, true);
565 if(eventsToday.getRank(slotIdentifier) < (eventsToday.getTotalDistinctValues()/2))
566 { return(false); /* Show the ad! */ }
567
568 // Now we look into the full collected history for this value.
569 final EventVariableValue[] all = vars.getEventValues(history, eventPeriod, 0, null);
570
571 // If no "all" history at all or not enough to be significant
572 // then fill this ad slot until there is more data available.
573 if(all.length < 1) { return(false); /* Show the ad! */ }
574 final EventVariableValue allEVV = all[0];
575 if(allEVV == null) { return(false); /* Show the ad! */ }
576 final int totalDistinctValues = allEVV.getTotalDistinctValues();
577 if(totalDistinctValues < TOP_N_RANK_AD_LIMIT*2) { return(false); /* Show the ad! */ }
578
579 // The better that this slot has performed over time,
580 // then the lower the chance of us "skipping" this slot.
581 // We leave a roughly 1-in-N chance of showing the ad anyway
582 // so that we can continue to gather stats, for example.
583 // We ensure that the top-ranked slot is always filled.
584 return((Rnd.fastRnd.nextInt(1 + Math.min(allEVV.getRank(slotIdentifier), totalDistinctValues)) >
585 Math.max(TOP_N_RANK_AD_LIMIT, totalDistinctValues/TOP_N_RANK_AD_LIMIT)));
586 }
587 // catch(final IOException e)
588 // {
589 // // Something went wrong: show an ad to pay for repairs!
590 // System.err.println("WARNING: skipUnderperformingAdSlot() caught an IOException.");
591 // return(false);
592 // }
593 catch(final Throwable e)
594 {
595 // Something went wrong: show an ad to pay for repairs!
596 System.err.println("WARNING: skipUnderperformingAdSlot() caught an Exception.");
597 return(false);
598 }
599 }
600
601 /**Maximum leading portion of request URI that we will consider significant.
602 * This is based on the maximum legitimate exhibit name length
603 * (for when all or part of an exhibit name is embedded in a URI)
604 * plus an allowance for overhead.
605 */
606 private static final int MAX_SIGNIFICANT_URI_PREFIX = ExhibitName.MAX_NAME_LENGTH + 32;
607
608 /**Time we wait for a user to click through on ads (of the order of minutes), in ms. */
609 private static final int CLICKTHRU_MIN_WAIT_TIME_MS = 10 * 60 * 1000 +
610 Rnd.fastRnd.nextInt(9 * 60 * 1000);
611
612 /**Create and post the handler for a click-through if possible and returns the unique listenerID.
613 * This replaces any existing listener for this users,
614 * eg if the user goes back and clicks then we won't capture it.
615 * <p>
616 * This returns null if we could not create a listener.
617 *
618 * @param request client's HTTP request; never null
619 * @param vars where stats updates are posted; never null
620 * @param otherTags any non-null (and non-empty) values
621 * (which should be distinctive but short)
622 * are recorded to GENSTATS_STRING_GLOBAL_EVENT upon a click-through
623 *
624 * @return listenerID, or null if one could not be generated
625 */
626 public static String createAndFileClickThruListener(final HttpServletRequest request,
627 final SimpleVariablePipelineIF vars,
628 final String ... otherTags)
629 {
630 if(vars == null) { return(null); }
631
632 // Take a copy of any tags passed in to fire upon click-through.
633 final String otherTagsCopy[] = (otherTags == null) ? null : otherTags.clone();
634
635 // Treat the exhibitName as null unless valid at least syntactically.
636 final Object eno = request.getAttribute("exhibitName");
637 final String exhibitName =((eno instanceof String) &&
638 ExhibitName.validNameSyntax((String) eno)) ?
639 ((String) eno) : null;
640
641 final String dpID = WebUtils.generateUserDataPointID("ClickThru-", request);
642 if(dpID == null) { return(null); }
643
644
645 // Compute the unique/canonical URI for the page displaying the ad,
646 // or null if not available.
647 final Object fro = request.getAttribute(ThroughputMonitorFilterPG2K.PNAME_SAVED_REQ_URI);
648 final String canonReqURI = (fro instanceof String) ? canonicaliseURI((String) fro) : null;
649
650
651 // Compute expiry time (with a random element).
652 final long expireBy = System.currentTimeMillis() + CLICKTHRU_MIN_WAIT_TIME_MS +
653 Rnd.fastRnd.nextInt(3*CLICKTHRU_MIN_WAIT_TIME_MS);
654
655 // Capture the visitor's IP address for later (in case they vote).
656 InetAddress ca = null;
657 try { ca = InetAddress.getByName(request.getRemoteAddr()); }
658 catch(final UnknownHostException e) { } // Ignore errors...
659 final InetAddress clickerIPAddr = ca;
660
661 // Create new listener.
662 final StatsSink.AbstractStatsListener newAsl =
663 new ClickThruHandler(dpID, expireBy, otherTagsCopy, canonReqURI,
664 clickerIPAddr, new WeakReference<SimpleVariablePipelineIF>(vars), exhibitName);
665
666 // Get the listener ID...
667 final String listnerID = newAsl.uniqueListenerID;
668
669 // File the request!
670 StatsSink.addListenerForDataPoint(newAsl);
671
672 return(listnerID);
673 }
674
675 /**Canonicalise URI (lower-case and truncate if need be; null is returned as null. */
676 public static String canonicaliseURI(final String uri)
677 {
678 if(uri == null) { return(null); }
679
680 // Compute the unique/canonical URI for the page displaying the ad.
681 // Note that because some parts of our URIs are case-insensitive
682 // we lower-case the recorded URI to collapse the state-space a little.
683 // We truncate the URI length again to trim the state space.
684 final String lcReqURI = uri.toLowerCase();
685 return((lcReqURI.length() <= MAX_SIGNIFICANT_URI_PREFIX) ?
686 lcReqURI : lcReqURI.substring(0, MAX_SIGNIFICANT_URI_PREFIX));
687 }
688
689
690 /**Compute value (in arbitrary units) of named thousand CPM-ad-display impressions; non-negative.
691 * This will apply a default value if no specific one applies.
692 */
693 public static int computeECPM(final GenProps gp, final String statName)
694 {
695 assert(gp != null);
696
697 // Look for generic key value for this stat and parse any value as int.
698 final String s = gp.getGen().get("eCPM." + statName);
699 if(s != null)
700 {
701 try { return(Integer.parseInt(s, 10)); }
702 catch(final NumberFormatException e)
703 { e.printStackTrace(); /* Whinge but fall through to default. */ }
704 }
705
706 // Return default value of 0.
707 return(0);
708 }
709
710 /**Compute value (in arbitrary units) of named click-though; non-negative.
711 * This will apply a default value if no specific one applies.
712 */
713 public static int computeECPC(final GenProps gp, final String statName)
714 {
715 assert(gp != null);
716
717 // Look for generic key value for this stat and parse any value as int.
718 final String s = gp.getGen().get("eCPC." + statName);
719 if(s != null)
720 {
721 try { return(Integer.parseInt(s, 10)); }
722 catch(final NumberFormatException e)
723 { e.printStackTrace(); /* Whinge but fall through to default. */ }
724 }
725
726 // Return default value of 1.
727 return(1);
728 }
729
730 /**Minimum sample period to decide which is best-performing layout, ms; strictly positive.
731 * This should be long enough to cover:
732 * <ul>
733 * <li>At least one rotation of all possible major ad-layout styles.
734 * <li>At least one rotation of all major/minor ad-layout styles together.
735 * <li>At least one day to allow for daily traffic cycles/patterns.
736 * <li>At least one week/month to allow for traffic cycles/patterns.
737 * <li>Long enough to survive glitches such as frequent restarts that
738 * "lose" low-frequency events.
739 * </ul>
740 * but not so long as to incur a huge calculation penalty
741 * nor make it very slow to respond to changes.
742 * <p>
743 * (It may be wise to factor in a small component of the all-time numbers.)
744 * <p>
745 * A length of just over a week-multiple (to give some extra emphasis to
746 * patterns pertient to the day just coming up) may be good.
747 */
748 private static final long BEST_AD_LAYOUT_SAMPLE_PERIOD_MIN_MS = 35 * 24 * 3600 * 1000L;
749
750 /**Ad-layout selection period expressed in VLONG units; strictly positive. */
751 private static final int BEST_AD_LAYOUT_SAMPLE_PERIOD_MIN_VLONG_UNITS = 1 +
752 (int) (BEST_AD_LAYOUT_SAMPLE_PERIOD_MIN_MS / SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS);
753
754 /**Extra weighting given to "within last week" clickthrough stats (normal==1); strictly positive.
755 * This is based on the assumption that recent behaviour is the most telling of the neat future.
756 */
757 private static final int WT_LAST_WEEK = 2;
758
759 /**Extra weighting given to "this day last week" clickthrough stats (normal==1); strictly positive.
760 * This is based on the assumption that surfers' behaviour shows a weekly cycle.
761 */
762 private static final int WT_THIS_DAY_LAST_WEEK = 3;
763
764 /**Recent-stats weighting period (1 week) expressed in VLONG units; strictly positive. */
765 private static final int WT__VLONG_UNITS = Math.max(1,
766 (int) ((7 * 24 * 3600 * 1000L) / SystemVariables.EVENT_INTERVAL_VLONG_TERM_MS));
767
768 /**Return optimal value(s) best-first of parameter based on past data, recomputing/cacheing as needed; never null.
769 * Any recomputation is done asynchronously.
770 * We may wait a very short time for the computation to finish
771 * but if the result isn't very quickly available then we return a default
772 * (and cache it for a short while to help avoid concurrent recomputations).
773 *
774 * @param defaultValue the default value to use
775 * when we cannot (immediately) compute the optimum; never null
776 *
777 * @return immutable computed optimum values best first,
778 * or the (non-null) default as a singleton
779 * if there is no (valid) cached optimum value
780 * and we are still (re)computing it asynchronously;
781 * never null, never empty, never containing nulls nor duplicates
782 */
783 @SuppressWarnings("unchecked")
784 private static <E extends Enum<E>> List<E> _computeOptimalLayout(
785 final String callerName,
786 final DataSourceBean dsb,
787 final String layoutKHitsEventPrefix,
788 final String ctEventPrefix,
789 final DataSourceBean.UnlinkedKey cacheKey,
790 final E defaultValue)
791 throws InterruptedException
792 {
793 assert(callerName != null);
794 assert(dsb != null);
795 assert(layoutKHitsEventPrefix != null);
796 assert(ctEventPrefix != null);
797 assert(cacheKey != null);
798 assert(defaultValue != null);
799
800 // Retrieve any currently-cached value.
801 // or atomically create a temporary default one.
802 Tuple.Pair<List<E>, Long> cachedResult;
803 while(((null == (cachedResult = (Tuple.Pair<List<E>, Long>) dsb.getUnlinkedValue(cacheKey)))) ||
804 ((cachedResult.second.longValue()) < System.currentTimeMillis()))
805 {
806 // Store "cheat" guessed-best value immediately
807 // (providing that we can avoid overwriting a new non-stale value):
808 // 1) To prevent/minimise re-computation races.
809 // 2) In case something stops us doing the computation.
810 // We store the defaultValue supplied whilst calculating,
811 // and we allow plenty of time to do the recomputation,
812 // but not so long as to prevent retry reasonably quickly
813 // after a temporary problem (eg connectivity).
814 // We throw in the random time component to help avoid collisions/races
815 // with the activities of other parts of the system.
816 // We carefully avoid overwriting any non-null/non-stale value
817 // that materialises just after we get into the loop
818 // by use of the atomic primitives available for the DSB store.
819 final Tuple.Pair<List<E>, Long> holdingValue = new Tuple.Pair<List<E>, Long>(Collections.singletonList(defaultValue),
820 System.currentTimeMillis() + 300123 + Rnd.fastRnd.nextInt(600123));
821 final boolean anotherThreadWorking;
822 if(null == cachedResult)
823 {
824 // If a value has been stored against this key
825 // then another thread has started computing this value.
826 anotherThreadWorking = (null != dsb.putIfAbsentUnlinkedValue(cacheKey, holdingValue));
827 }
828 else
829 {
830 // If the stored value has changed under our feet
831 // then another thread has started computing this value.
832 anotherThreadWorking = !dsb.replaceUnlinkedValue(cacheKey, cachedResult, holdingValue);
833 }
834
835 // If we detect that another thread has started working on this value
836 // then abort now (returning the default) to avoid duplicate computations.
837 if(anotherThreadWorking) { return(holdingValue.first); }
838
839 final long recalcStart = System.currentTimeMillis();
840 // Treat this task as I/O-bound, which early ones will probably be.
841 final Future<List<E>> f = ThreadUtils.nonCPUThreadPool.submit(new Callable<List<E>>(){
842 /**Do the recomputation. */
843 public final List<E> call()
844 {
845 if(IsDebug.isDebug) { System.out.println("["+callerName+"(): recalculating optimal format/layout...]"); }
846
847 // Recompute...
848 try
849 {
850 final org.hd.d.pg2k.svrCore.props.GenProps gp = dsb.getGenProps(-1);
851
852 final long prevInterval = EventPeriod.VLONG.getIntervalNumber(System.currentTimeMillis()) - 1;
853 final BitSet whichIntervals = new BitSet(BEST_AD_LAYOUT_SAMPLE_PERIOD_MIN_VLONG_UNITS);
854 whichIntervals.set(0, BEST_AD_LAYOUT_SAMPLE_PERIOD_MIN_VLONG_UNITS);
855 final SimpleVariableDefinition def = SystemVariables.GENSTATS_STRING_GLOBAL_EVENT;
856 assert(def.isEvent() && (def.getType() == SimpleVariableDefinition.TYPE_STRING));
857
858 // Get global stats for enough historical periods, plus current.
859 // We put the "current" value at the end for efficiency
860 // and to conserve any recent-values weighting for the initial whole days/intervals.
861 final List<EventVariableValue> evvs = new ArrayList<EventVariableValue>(Arrays.asList(dsb.getEventValues(def,
862 EventPeriod.VLONG, prevInterval, whichIntervals)));
863 evvs.add(dsb.getEventValue(def, EventPeriod.VLONG, true));
864
865 // Optionally get local stats for the period matching the global stats.
866 final List<EventVariableValue> evvsLocal;
867 if(LOCAL_AD_STATS_WEIGHTING > 0)
868 {
869 final SimpleVariableDefinition defLocal = SystemVariables.GENSTATS_STRING_LOCAL_EVENT;
870 assert(defLocal.isEvent() && (defLocal.getType() == SimpleVariableDefinition.TYPE_STRING));
871 // We put the "current" value at the end for efficiency
872 // and to conserve any recent-values weighting for the initial whole days/intervals.
873 evvsLocal = new ArrayList<EventVariableValue>(Arrays.asList(dsb.getEventValues(defLocal,
874 EventPeriod.VLONG, prevInterval, whichIntervals)));
875 evvsLocal.add(dsb.getEventValue(defLocal, EventPeriod.VLONG, true));
876 }
877 else { evvsLocal = Collections.emptyList(); }
878
879 // Estimate value of a thousand impressions (VPM)
880 // of a given format (in arbitrary units)
881 // to try to chose the relatively-optimal format.
882 //
883 // Total up click-throughs for and pages in each layout
884 // (ie generate properly normalised measure of CTR)
885 // and add in any "CPM" external (non-clickthrough) factors
886 // and record the "best" layout found so far.
887 // Complain (gently) if no useful data is found.
888
889 // In-order (highest/best first) VPM value for each format.
890 final SortedMap<Double, E> best = new TreeMap<Double, E>(Collections.reverseOrder());
891 for(final E pos : (E[]) defaultValue.getClass().getEnumConstants())
892 {
893 // Global stats.
894 int clicks = 0;
895 int pageViews = 0;
896 int realPageViews = 0;
897
898 // Name of the event used to record (each thousand)
899 // page hits in the given format/layout.
900 final String nameKHits = layoutKHitsEventPrefix + pos;
901 final String oneHitName = make1HitName(nameKHits);
902 // Get eCPM value.
903 final int eCPM = computeECPM(gp, nameKHits);
904
905 // Name of event to record ad click-through
906 // in the given format/layout.
907 final String nameCT = ctEventPrefix + pos;
908 // Get eCPC value.
909 final int eCPC = computeECPC(gp, nameCT);
910
911 // Tot up global stats.
912 for(int i = evvs.size(); --i >= 0; )
913 {
914 final EventVariableValue evv = evvs.get(i);
915 if(evv == null) { continue; }
916 final int multiplier = (i > WT__VLONG_UNITS) ? 1 :
917 ((i == WT__VLONG_UNITS) ? WT_THIS_DAY_LAST_WEEK : WT_LAST_WEEK);
918 final int count = evv.getCount(nameCT);
919 clicks += (multiplier > 1) ? (multiplier * count) : count;
920 int pv = 1000 * evv.getCount(nameKHits);
921 if(oneHitName != null)
922 { pv += evv.getCount(oneHitName); }
923 pageViews += (multiplier > 1) ? (multiplier * pv) : pv;
924 realPageViews += pv; // Always unweighted.
925 }
926
927 // Tot up local stats, if used.
928 int clicksLocal = 0;
929 int pageViewsLocal = 0;
930 int realPageViewsLocal = 0;
931 for(int i = evvsLocal.size(); --i >= 0; )
932 {
933 final EventVariableValue evvLocal = evvsLocal.get(i);
934 if(evvLocal == null) { continue; }
935 final int multiplier = (i > WT__VLONG_UNITS) ? 1 :
936 ((i == WT__VLONG_UNITS) ? WT_THIS_DAY_LAST_WEEK : WT_LAST_WEEK);
937 final int count = evvLocal.getCount(nameCT);
938 clicksLocal += (multiplier > 1) ? (multiplier * count) : count;
939 int pv = 1000 * evvLocal.getCount(nameKHits);
940 if(oneHitName != null)
941 { pv += evvLocal.getCount(oneHitName); }
942 pageViewsLocal += (multiplier > 1) ? (multiplier * pv) : pv;
943 realPageViewsLocal += pv; // Always unweighted.
944 }
945
946 if((pageViews > 0) || (pageViewsLocal > 0))
947 {
948 final double ctr = clicks / (double) Math.max(1, pageViews);
949 final double ctrLocal = clicksLocal / (double) Math.max(1, pageViewsLocal);
950
951 // We avoid fully factoring in the local CTR if not enough data collected yet,
952 // but we'll still fold in the local data in a less noisy/dramatic way.
953 // We generally insist on a minimum sample size to use local stats,
954 // unless the global stats aren't that much better (by sample count).
955 final boolean notEnoughLocalStats =
956 (realPageViewsLocal < MIN_LOCAL_STATS_PAGEVIEWS) &&
957 (realPageViewsLocal < realPageViews/2);
958 final double ctrWeighted = notEnoughLocalStats ?
959 ((clicks + LOCAL_AD_STATS_WEIGHTING*clicksLocal) / (double) Math.max(1, pageViews + LOCAL_AD_STATS_WEIGHTING*pageViewsLocal)) :
960 (ctr + (LOCAL_AD_STATS_WEIGHTING*ctrLocal)) / (1+LOCAL_AD_STATS_WEIGHTING);
961 final double vpmWeighted = eCPM + (eCPC * ctrWeighted * 1000);
962
963 dsb.log("["+callerName+"(): calc VPM for "+pos+" is "+((float)vpmWeighted)+" at a weighted CTR of "+(100*(float)ctrWeighted)+"% (local="+(100*(float)ctrLocal)+"%"+(notEnoughLocalStats?" (noisy)":"")+", global="+(100*(float)ctr)+"%).]");
964
965 // Record this VPM (if positive).
966 if(vpmWeighted > 0)
967 { best.put(new Double(vpmWeighted), pos); }
968 }
969 }
970
971 if(!best.isEmpty())
972 {
973 dsb.log("["+callerName+"(): optimal ad format/layout calculated to be: "+best.get(best.firstKey())+" with a VPM of "+(best.firstKey().floatValue())+": took "+(System.currentTimeMillis()-recalcStart)+"ms.]");
974
975 // Put off next computation until shortly after
976 // the start of the next interval
977 // (when we will have new data available)
978 // but not immediately after so as to avoid
979 // colliding with other recomputation work,
980 // keeping different sites/mirrors in reasonable sync.
981 final long nextInterval = prevInterval + 2;
982 final long recompTime = EventPeriod.VLONG.getIntervalStartTime(nextInterval) +
983 30123 + Rnd.fastRnd.nextInt(300123); // Keep site sync within ~5min.
984 // Extract the best-first formats as an immutable List.
985 final List<E> bestFirst = Collections.unmodifiableList(new ArrayList<E>(best.values()));
986 // Cache the result against this DataSourceBean unconditionally.
987 final Tuple.Pair<List<E>, Long> result = new Tuple.Pair<List<E>, Long>(
988 bestFirst, recompTime);
989 dsb.putUnlinkedValue(cacheKey, result);
990 if(IsDebug.isDebug) { dsb.log("["+callerName+"(): next recalc not before "+(new Date(recompTime))+".]"); }
991
992 return(bestFirst);
993 }
994 else // Complain if we could not find the data needed...
995 {
996 System.err.println("WARNING: "+callerName+"(): recalc did not find enough data to compute optimal format/layout, using: " + defaultValue);
997 }
998 }
999 catch(final IOException e)
1000 {
1001 // Note that we were blown out of the water this time...
1002 System.err.println("WARNING: "+callerName+"(): recalc stopped by IOException: " + e.getMessage());
1003 }
1004 catch(final Throwable t)
1005 {
1006 // Note that we were blown out of the water this time...
1007 System.err.println("WARNING: "+callerName+"(): recalc stopped by unexpected Throwable: " + t.getMessage());
1008 t.printStackTrace();
1009 }
1010
1011 return(Collections.singletonList(defaultValue)); // No result could be computed.
1012 }
1013 });
1014
1015 // NOTE: This Future.get(wait) call was a problem under JDK 1.5.0_03...
1016 // Wait a very short while in case computation finishes quickly.
1017 // Otherwise the caller always gets to use the default instead.
1018 try { return(f.get(1 + CoreConsts.MAX_INTERACTIVE_DELAY_MS/3, TimeUnit.MILLISECONDS)); }
1019 catch(final Exception e) { /* e.printStackTrace(); */ }
1020
1021 // Extract the result.
1022 // (Guaranteed never to be null since defaultValue was not.)
1023 cachedResult = (Pair<List<E>, Long>) dsb.getUnlinkedValue(cacheKey); // This *may* be the new result.
1024 }
1025
1026 // Check some quick invariants of the returned value.
1027 assert(cachedResult != null);
1028 assert(cachedResult.first != null);
1029 assert(cachedResult.first.size() > 0);
1030
1031 return(cachedResult.first);
1032 }
1033
1034 /**Counters of page/ad basic tower format used, indexed by AdTowerInternalLayout; never null.
1035 * All elements are initialised to 0 at start-up,
1036 * so no element is null and the map itself is never changed,
1037 * and is implicitly thread-safe when used read-only this way.
1038 * <p>
1039 * All values will be non-negative, and usually less than 1000.
1040 * <p>
1041 * FIXME: should be keyed from DataSourceBean to allow for multiple sites in one VM.
1042 */
1043 private static final EnumMap<AdTowerInternalLayout, AtomicInteger> pvTowerInternalFormatHits = new EnumMap<AdTowerInternalLayout, AtomicInteger>(AdTowerInternalLayout.class);
1044
1045 /**Initialise pvTowerInternalFormatHits to all zero counts. */
1046 static
1047 {
1048 for(final AdTowerInternalLayout atp : AdTowerInternalLayout.values())
1049 { pvTowerInternalFormatHits.put(atp, new AtomicInteger()); }
1050 }
1051
1052 /**Key for thread-safe cache from Web site (DataSourceBean) to computed results for selectAdMidPageInternalLayout(); never null. */
1053 private static final DataSourceBean.UnlinkedKey _cache_sAMPIL_key = new DataSourceBean.UnlinkedKey("selectAdMidPageInternalLayout");
1054
1055 /**Preferred mid-page ad format. */
1056 public static final AdMidPageInternalLayout PREF_MP_FORMAT = AdMidPageInternalLayout.MRTF;
1057
1058 /**Number of distinct mid-page ad formats; strictly positive. */
1059 private static final int AMPI_NFORMATS = AdMidPageInternalLayout.values().length;
1060
1061 /**Chose internal layout of ad mid-age slot to be shown on page; never null.
1062 * Uses algorithm to ensure that all servers should generally choose
1063 * the same layout at any one time so that, for example,
1064 * switching between servers should not be disorienting.
1065 * <p>
1066 * May collect stats on layout to help monitor/select the most effective.
1067 *
1068 * @param request the RQPNAME_MIDPAGEINTLAYOUT attribute is set
1069 * to indicate which format has been selected
1070 */
1071 public static final AdMidPageInternalLayout selectAdMidPageInternalLayout(
1072 final DataSourceBean dsb,
1073 final HttpServletRequest request)
1074 throws InterruptedException
1075 {
1076 final AdMidPageInternalLayout result;
1077
1078 // Pick using prime-ish value much larger than number of available formats
1079 // so that a large chunk of ads will be in the computed "best" format.
1080 final Random r = (new Random(System.currentTimeMillis() >>> 19)); /* ~ 8mins */ r.nextLong();
1081 final int format = r.nextInt(3 + 2*AMPI_NFORMATS);
1082 // Show all formats by rotation at least a little...
1083 if(format < AMPI_NFORMATS) { result = AdMidPageInternalLayout.values()[format]; }
1084 // Extra showing for the preferred format and/or if not dynamically choosing layout/network.
1085 else if((!AdConsts.DYNAMICALLY_CHOOSE_AD_NETWORK) || (format == AMPI_NFORMATS)) { result = PREF_MP_FORMAT; }
1086 // Most slots are allocated to the 'best' formats.
1087 else
1088 {
1089 final List<AdMidPageInternalLayout> bestLayouts =
1090 _computeOptimalLayout("selectAdMidPageInternalLayout",
1091 dsb,
1092 AdConsts.MPINTLAYOUT_KHITS_EVENT_PREFIX,
1093 AdConsts.MPINTLAYOUT_EVENT_PREFIX,
1094 _cache_sAMPIL_key,
1095 PREF_MP_FORMAT);
1096 final int s = bestLayouts.size();
1097 assert(s > 0);
1098 // Have smoothish distribution between best formats,
1099 // but the top format always gets at least half the slots.
1100 result = r.nextBoolean() ? bestLayouts.get(0) : bestLayouts.get(r.nextInt(1+r.nextInt(s)));
1101 }
1102
1103 assert(result != null);
1104
1105
1106 // Note which mid-page internal format is being used,
1107 // and on each 1000 uses, log to the central stats.
1108 // We do this to reduce the load on the stats system.
1109 // Ignore hits from probable spiders.
1110 if(!WebUtils.requestProbablyFromSpider(request))
1111 {
1112 final AtomicInteger count = pvMPInternalFormatHits.get(result);
1113 final String kHitName = AdConsts.MPINTLAYOUT_KHITS_EVENT_PREFIX + result;
1114 countPageImpressions(count, dsb, kHitName);
1115 }
1116
1117
1118 // Automatically note with the request which format is being used.
1119 // This enables click-tracking and CTR measurement later.
1120 request.setAttribute(AdConsts.RQPNAME_MPINTLAYOUT, AdConsts.MPINTLAYOUT_EVENT_PREFIX + result);
1121
1122 return(result);
1123 }
1124
1125 /**Count page impressions in this layout/ad-style.
1126 * Logs thousands of impressions (kHits) to reduce event traffic,
1127 * but can log single hits initially for good resolution
1128 * and robustness for short-running servers and/or sparse events.
1129 */
1130 private static void countPageImpressions(final AtomicInteger count,
1131 final DataSourceBean dsb,
1132 final String kHitName)
1133 {
1134 final String oneHitName = make1HitName(kHitName);
1135 final int countNow = count.getAndIncrement();
1136 try
1137 {
1138 // Record a k-hit (one thousand clicks or hits).
1139 // If recording individual clicks initially
1140 // then we click down from 1000 + the higher threshold
1141 // rather than down from 1000.
1142 if(countNow >= 1000 * (1 + ((oneHitName == null) ? 0 : KHIT_THRESHOLD)))
1143 {
1144 count.addAndGet(-1000);
1145
1146 // Log (another) 1000 uses of this page/ad layout style...
1147 dsb.setVariable(new SimpleVariableValue(
1148 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
1149 kHitName));
1150 dsb.setVariable(new SimpleVariableValue(
1151 SystemVariables.GENSTATS_STRING_LOCAL_EVENT,
1152 kHitName));
1153 }
1154 else if((oneHitName != null) && (countNow < 1000*KHIT_THRESHOLD))
1155 {
1156 // We record the first few hits individually.
1157 dsb.setVariable(new SimpleVariableValue(
1158 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
1159 oneHitName));
1160 dsb.setVariable(new SimpleVariableValue(
1161 SystemVariables.GENSTATS_STRING_LOCAL_EVENT,
1162 oneHitName));
1163 }
1164 }
1165 catch(final IOException e)
1166 { e.printStackTrace(); /* Log but absorb any error. */ }
1167 }
1168
1169 /**Make single-hit event name from k-hit name, or return null if not possible.
1170 * This is done by replacing the initial "k" with a "1".
1171 */
1172 private static String make1HitName(final String kHitName)
1173 {
1174 if(!kHitName.startsWith("k")) { return(null); }
1175 return("1" + kHitName.substring(1));
1176 }
1177
1178 /**Counters of page/ad basic tower format used, indexed by AdMInternalLayout; never null.
1179 * All elements are initialised to 0 at start-up,
1180 * so no element is null and the map itself is never changed,
1181 * and is implicitly thread-safe when used read-only this way.
1182 * <p>
1183 * All values will be non-negative, and usually less than 1000.
1184 * <p>
1185 * FIXME: should be keyed from DataSourceBean to allow for multiple sites in one VM.
1186 */
1187 private static final EnumMap<AdMidPageInternalLayout, AtomicInteger> pvMPInternalFormatHits = new EnumMap<AdMidPageInternalLayout, AtomicInteger>(AdMidPageInternalLayout.class);
1188
1189 /**Initialise pvMPInternalFormatHits to all zero counts. */
1190 static
1191 {
1192 for(final AdMidPageInternalLayout atp : AdMidPageInternalLayout.values())
1193 { pvMPInternalFormatHits.put(atp, new AtomicInteger()); }
1194 }
1195
1196
1197 /**Key for thread-safe cache from Web site (DataSourceBean) to computed results for selectAdTowerInternalLayout(); never null. */
1198 private static final DataSourceBean.UnlinkedKey _cache_sATIL_key = new DataSourceBean.UnlinkedKey("selectAdTowerInternalLayout");
1199
1200 /**Preferred tower ad format. */
1201 public static final AdTowerInternalLayout PREF_TOWER_FORMAT = AdTowerInternalLayout.WideMixed;
1202
1203 /**Chose internal layout of ad tower(s) to be shown on page; never null.
1204 * Uses algorithm to ensure that all servers should generally choose
1205 * the same layout at any one time so that, for example,
1206 * switching between servers should not be disorienting.
1207 * <p>
1208 * May collect stats on layout to help monitor/select the most effective.
1209 *
1210 * @param request the RQPNAME_TOWERINTLAYOUT attribute is set
1211 * to indicate which format has been selected
1212 */
1213 public static final AdTowerInternalLayout selectAdTowerInternalLayout(
1214 final DataSourceBean dsb,
1215 final HttpServletRequest request)
1216 throws InterruptedException
1217 {
1218 final AdTowerInternalLayout result;
1219
1220 // Pick using prime-ish value larger than number of available formats
1221 // so that most ads will be in the computed "best" format.
1222 final Random r = (new Random(System.currentTimeMillis() / (37 * 60 * 1101))); r.nextLong();
1223 final int format = r.nextInt(AdConsts.DYNAMICALLY_CHOOSE_AD_NETWORK ? 11 : 4);
1224 switch(format)
1225 {
1226 case 0:
1227 result = AdTowerInternalLayout.WideMixed;
1228 break;
1229
1230 case 1:
1231 result = AdTowerInternalLayout.WideAS;
1232 break;
1233
1234 case 2:
1235 result = AdTowerInternalLayout.NarrowMixed;
1236 break;
1237
1238 case 3:
1239 result = AdTowerInternalLayout.NarrowShort;
1240 break;
1241
1242 default: // Use computed "best" style most of the time.
1243 {
1244 final List<AdTowerInternalLayout> bestLayouts =
1245 _computeOptimalLayout("selectAdTowerInternalLayout",
1246 dsb,
1247 AdConsts.TOWERINTLAYOUT_KHITS_EVENT_PREFIX,
1248 AdConsts.TOWERINTLAYOUT_EVENT_PREFIX,
1249 _cache_sATIL_key,
1250 PREF_TOWER_FORMAT);
1251 final int s = bestLayouts.size();
1252 assert(s > 0);
1253 // Have smoothish distribution between best formats,
1254 // but the top format always gets at least half the slots.
1255 result = r.nextBoolean() ? bestLayouts.get(0) : bestLayouts.get(r.nextInt(1+r.nextInt(s)));
1256 break;
1257 }
1258 }
1259
1260 assert(result != null);
1261
1262
1263 // Note which tower internal format is being used,
1264 // and on each 1000 uses, log to the central stats.
1265 // We do this to reduce the load on the stats system.
1266 // Ignore hits from probable spiders.
1267 if(!WebUtils.requestProbablyFromSpider(request))
1268 {
1269 final AtomicInteger count = pvTowerInternalFormatHits.get(result);
1270 final String kHitName = AdConsts.TOWERINTLAYOUT_KHITS_EVENT_PREFIX + result;
1271 countPageImpressions(count, dsb, kHitName);
1272 }
1273
1274
1275 // Automatically note with the request which tower format is being used.
1276 // This enables click-tracking and CTR measurement later.
1277 request.setAttribute(AdConsts.RQPNAME_TOWERINTLAYOUT, AdConsts.TOWERINTLAYOUT_EVENT_PREFIX + result);
1278
1279 assert(result != null);
1280 return(result);
1281 }
1282
1283 /**Preferred page ad layout. */
1284 public static final AdTowerPos PREF_PAGE_FORMAT = AdTowerPos.NOWHERE;
1285
1286 /**Key for thread-safe cache from Web site (DataSourceBean) to computed results for selectPageAdLayout(); never null. */
1287 private static final DataSourceBean.UnlinkedKey _cache_sPAL_key = new DataSourceBean.UnlinkedKey("selectPageAdLayout");
1288
1289 /**Choose general ad/page layout; never null.
1290 * Uses algorithm to ensure that all servers should generally choose
1291 * the same layout at any one time so that, for example,
1292 * switching between servers should not be disorienting.
1293 * <p>
1294 * May collect stats on layout to help monitor/select the most effective.
1295 * <p>
1296 * Note that most effective combo seems to be:
1297 * <ul>
1298 * <li>Wide tower on left (or no tower at all).
1299 * <li>Horizontal banner at top.
1300 * </ul>
1301 * <p>
1302 * Computed values may be cached wrt the DataSourceBean variable,
1303 * so should consistently be called with the same instance for one site.
1304 *
1305 * @param probeOnly this call is to find out what
1306 * the layout <em>should be</em>,
1307 * and should not count as a page view
1308 */
1309 public static final AdTowerPos selectPageAdLayout(final DataSourceBean dsb,
1310 final HttpServletRequest request,
1311 final boolean siteIsBusy,
1312 final boolean probeOnly)
1313 throws InterruptedException
1314 {
1315 // Short-cut when probing and no dynamic selection allowed.
1316 if((!AdConsts.DYNAMICALLY_CHOOSE_AD_NETWORK) && probeOnly)
1317 { return(PREF_PAGE_FORMAT); }
1318
1319 // Placement of tower to use.
1320 AdTowerPos towerPos;
1321
1322 // Chose tower-ad style using "hash" such that
1323 // all servers/mirrors will choose same style at once.
1324 // We choose a new layout several times per day
1325 // with a period that rotates relative to a 24-hour cycle
1326 // so that we don't always make the same choice (or switch)
1327 // at the same hour of the day.
1328 final long seed = System.currentTimeMillis() / (3 * 3600 * 1001);
1329 // Force every style to be used at least once in each rotation.
1330 // The best-performing style should get a clear majority of slots.
1331 final Random r = new Random(seed); r.nextLong();
1332 switch(probeOnly ? -1 : r.nextInt((!AdConsts.DYNAMICALLY_CHOOSE_AD_NETWORK) ? 3 : 19)) // Prime much larger than number of layouts.
1333 {
1334 case 0:
1335 towerPos = AdTowerPos.RIGHT;
1336 break;
1337
1338 case 1:
1339 towerPos = AdTowerPos.RIGHT_DROPPED;
1340 break;
1341
1342 case 2:
1343 towerPos = AdTowerPos.LEFT;
1344 break;
1345
1346 case 3:
1347 towerPos = AdTowerPos.NOWHERE;
1348 break;
1349
1350 default:
1351 // Use computed "best" style most of the time.
1352 // Forced in "probe-only" mode.
1353 {
1354 final List<AdTowerPos> bestLayouts =
1355 _computeOptimalLayout("selectPageAdLayout",
1356 dsb,
1357 AdConsts.PAGE_LAYOUT_KHITS_EVENT_PREFIX,
1358 AdConsts.LAYOUT_STYLE_EVENT_PREFIX,
1359 _cache_sPAL_key,
1360 PREF_PAGE_FORMAT);
1361 final int s = bestLayouts.size();
1362 assert(s > 0);
1363 // When probing we only want to know what the "best" format is.
1364 if(probeOnly || (s == 1)) { return(bestLayouts.get(0)); }
1365 // Have smoothish distribution between best formats,
1366 // but the top format always gets at least 67% of the slots.
1367 towerPos = (r.nextInt(3) != 0) ? bestLayouts.get(0) : bestLayouts.get(r.nextInt(1+r.nextInt(1+r.nextInt(s))));
1368 break;
1369 }
1370 }
1371
1372 // Should definitely be set by here...
1373 assert(towerPos != null);
1374
1375 // If the ads are not on the left
1376 // then we avoid ads on top-level pages (such as the home page)
1377 // or if the site is busy.
1378 // But if the ads are on the left then we don't do this
1379 // as it could be very disturbing to move the page content around.
1380 if(towerPos != AdTowerPos.LEFT)
1381 {
1382 // Drop right-hand towers when site is busy to reduce load a little.
1383 if(siteIsBusy)
1384 { towerPos = AdTowerPos.NOWHERE; }
1385 // Drop non-left (LHS) towers for top-level pages.
1386 else if(request != null)
1387 {
1388 final String reqURI = request.getRequestURI();
1389 final boolean topLevelURI = (reqURI == null) || (reqURI.lastIndexOf('/') <= 0);
1390 if(topLevelURI)
1391 { towerPos = AdTowerPos.NOWHERE; }
1392 }
1393 }
1394
1395
1396 // Note which layout style is being used,
1397 // and on each 1000 uses, log to the central stats.
1398 // We do this to reduce the load on the stats system.
1399 // Ignore hits from probable spiders.
1400 if(!probeOnly && !WebUtils.requestProbablyFromSpider(request))
1401 {
1402 final AtomicInteger count = pvLayoutHits.get(towerPos);
1403 final String kHitName = AdConsts.PAGE_LAYOUT_KHITS_EVENT_PREFIX + towerPos;
1404 countPageImpressions(count, dsb, kHitName);
1405 }
1406
1407
1408 return(towerPos);
1409 }
1410
1411 /**Compute the full event name for recording a clickthrough with a particular layout style. */
1412 public static String computeCTEventName(final AdTowerPos towerPos)
1413 {
1414 return(AdConsts.LAYOUT_STYLE_EVENT_PREFIX + towerPos);
1415 }
1416
1417 /**Counters of page/ad basic layout-style used, indexed by AdTowerPos; never null.
1418 * All elements are initialised to 0 at start-up,
1419 * so no element is null and the map itself is never changed,
1420 * and is implicitly thread-safe when used read-only this way.
1421 * <p>
1422 * All values will be non-negative, and usually less than 1000.
1423 * <p>
1424 * FIXME: should be keyed from DataSourceBean to allow for multiple sites in one VM.
1425 */
1426 private static final EnumMap<AdTowerPos, AtomicInteger> pvLayoutHits = new EnumMap<AdTowerPos, AtomicInteger>(AdTowerPos.class);
1427
1428 /**Initialise pvLayoutHits to all zero counts. */
1429 static
1430 {
1431 for(final AdTowerPos atp : AdTowerPos.values())
1432 { pvLayoutHits.put(atp, new AtomicInteger()); }
1433 }
1434
1435 /**Maximum exhibits to show in drop-in ad-tower replacement text; strictly positive. */
1436 private static final int DA_MAX_EXHIBITS_TO_SHOW = 4;
1437
1438 /**Get the HTML content for a fall-back/fill-in ad-tower filler; never null but may be "".
1439 * Returns "" if nothing appropriate to show,
1440 * or if nothing available quickly (since we don't want to delay page loading pointlessly),
1441 * or in case of error.
1442 */
1443 public static String generateAdTowerFallbackHTML(final ServletContext application,
1444 final LocaleBean localeBean,
1445 final int pxWidth,
1446 final int pxHeight)
1447 {
1448 if((application == null) ||
1449 (localeBean == null) ||
1450 (pxWidth <= 0) || (pxHeight <= 0))
1451 { throw new IllegalArgumentException(); }
1452
1453 try
1454 {
1455 final DataSourceBean dataSource = DataSourceBean.getApplicationInstance(application);
1456
1457 // Title to display (short and sweet).
1458 // Filled in in case statement.
1459 String DA_title;
1460 Name.ExhibitFull DA_exhibits[];
1461
1462 switch(Rnd.fastRnd.nextInt(3))
1463 {
1464 // Show the most recent additions...
1465 case 1:
1466 {
1467 DA_title = "<a href=\"" + WebConsts.VIRTUAL_COLLECTIONS_NEW_PAGE + "\" target=\"_top\">" +
1468 localeBean.getLocalisedMessage("common.selection.newExhibits.title") +
1469 "</a>";
1470 DA_exhibits = HTMLThumbnailInsertGenerators.getNewExhibitSelection(application,
1471 DA_MAX_EXHIBITS_TO_SHOW,
1472 (Rnd.fastRnd.nextBoolean() ? null : Rnd.fastRnd),
1473 true);
1474 // Fall through to next option if this did not generate anything...
1475 if(DA_exhibits.length > 0) { break; }
1476 }
1477
1478 // Show the "best" DA_exhibits
1479 // but only if we are relatively idle
1480 // since this can be expensive.
1481 // (Else fall through to cheaper default.)
1482 case 2: if(WebUtils.isLightlyLoaded(application))
1483 {
1484 DA_title = "<a href=\"" + WebConsts.VIRTUAL_COLLECTIONS_BEST_PAGE + "\" target=\"_top\">" +
1485 localeBean.getLocalisedMessage("common.selection.bestExhibits.title") +
1486 "</a>";
1487 DA_exhibits = HTMLThumbnailInsertGenerators.getBestExhibitSelection(application,
1488 DA_MAX_EXHIBITS_TO_SHOW,
1489 (Rnd.fastRnd.nextBoolean() ? null : Rnd.fastRnd),
1490 true);
1491 // Fall through to next option if this did not generate anything...
1492 if(DA_exhibits.length > 0) { break; }
1493 }
1494
1495 // Default to showing a random JPEG selection...
1496 default:
1497 {
1498 DA_title = localeBean.getLocalisedMessage("common.selection.randomJPEGs.title");
1499 DA_exhibits = HTMLThumbnailInsertGenerators.getRandomJPEGSelection(application,
1500 DA_MAX_EXHIBITS_TO_SHOW,
1501 true);
1502 break;
1503 }
1504 }
1505
1506 if(DA_exhibits.length < 1) { return(""); /* Nothing to show. */ }
1507
1508 final StringBuilder result = new StringBuilder(128 + (64*DA_exhibits.length));
1509
1510 result.append("<p align=center>");
1511 result.append("<font size=\"+1\" color=").
1512 append(PageSkinUtils.GENERIC_AH_COLOUR_QUOTED).
1513 append('>').append(DA_title).
1514 append("</font>");
1515 result.append("<br />");
1516 result.append(HTMLThumbnailInsertGenerators.makeHTMLExhibitColumnDisplay(DA_exhibits,
1517 pxWidth,
1518 pxHeight - 20, // Allow for a header...
1519 dataSource,
1520 localeBean,
1521 WebUtils.isOverloaded(application)));
1522 result.append("</p>");
1523 return(result.toString());
1524 }
1525 catch(final IOException e)
1526 {
1527 return(""); // Absorb the error quietly as this is non-essential.
1528 }
1529 }
1530 }