001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.webSvr.exhibit;
030
031 import java.io.IOException;
032 import java.net.InetAddress;
033 import java.nio.ByteBuffer;
034 import java.util.Enumeration;
035 import java.util.concurrent.ArrayBlockingQueue;
036 import java.util.concurrent.atomic.AtomicReference;
037
038 import javax.servlet.ServletConfig;
039 import javax.servlet.ServletContext;
040 import javax.servlet.ServletOutputStream;
041 import javax.servlet.http.HttpServlet;
042 import javax.servlet.http.HttpServletRequest;
043 import javax.servlet.http.HttpServletResponse;
044
045 import org.hd.d.pg2k.svrCore.AccessionData;
046 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
047 import org.hd.d.pg2k.svrCore.CoreConsts;
048 import org.hd.d.pg2k.svrCore.ExhibitName;
049 import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
050 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
051 import org.hd.d.pg2k.svrCore.GenUtils;
052 import org.hd.d.pg2k.svrCore.ImageUtils;
053 import org.hd.d.pg2k.svrCore.MemoryTools;
054 import org.hd.d.pg2k.svrCore.Name;
055 import org.hd.d.pg2k.svrCore.Rnd;
056 import org.hd.d.pg2k.svrCore.TextUtils;
057 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
058 import org.hd.d.pg2k.svrCore.location.GeoProximity;
059 import org.hd.d.pg2k.svrCore.location.GeoUtils;
060 import org.hd.d.pg2k.svrCore.props.LocalProps;
061 import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
062 import org.hd.d.pg2k.svrCore.vars.SystemVariables;
063 import org.hd.d.pg2k.webSvr.util.WebConsts;
064 import org.hd.d.pg2k.webSvr.util.WebUtils;
065
066 import ORG.hd.d.IsDebug;
067
068 /**This is the servlet that serves exhibit data.
069 * This is optimised to serve large quantities of binary data
070 * efficiently, and to get headers right to optimise performance
071 * of caches, spiders, etc, while not losing site of usage logging.
072 * <p>
073 * This expects to be loaded when the whole Web application
074 * starts and remain there until the Web application shuts down,
075 * and so creates and destroys the data pipeline as a side-effect.
076 */
077 public final class ExhibitServlet extends HttpServlet
078 {
079 /**Default transfer buffer size; chosen to be efficient but not huge. */
080 private static final int DEFAULT_BUF_SIZE = CoreConsts.BULK_DATA_TRANSFER_SIZE;
081
082 /**True if we know that this servlet is only unloaded when the site is brought down.
083 * We can use this to quickly destroy an old cache, for example.
084 */
085 public static final boolean ONLY_DESTROYED_ON_SHUTDOWN = true;
086
087 /**Shut down gracefully.
088 */
089 @Override
090 public void destroy()
091 {
092 System.out.println("ExhibitServlet shutting down...");
093
094 // We can safely shut down the cache if we know
095 // we are the last one out...
096 if(ONLY_DESTROYED_ON_SHUTDOWN)
097 {
098 getDataSource(getServletConfig(), null).destroy();
099 }
100 }
101
102 /**Get singleton (per-servlet-context) data pipeline/cache instance.
103 * The config param must not be null, but for some operations
104 * (such as calling destroy()) request can be null.
105 */
106 private static DataSourceBean getDataSource(
107 final ServletConfig config,
108 final HttpServletRequest request)
109 {
110 // Fetches/creates the data source...
111 final DataSourceBean dataSource =
112 DataSourceBean.getApplicationInstance(config.getServletContext());
113
114 // Ensure that the essential details are set up.
115 dataSource.setServletContext(config.getServletContext());
116 if(request != null)
117 { dataSource.setContextPath(request.getContextPath()); }
118
119 return(dataSource);
120 }
121
122 /**Private to init(); ensure exactly-once semantics for init(). */
123 private boolean initialised;
124
125 /**Retrieve some important configuration data and cache it away.
126 * A bug in Tomcat V3.1--V3.2.1 can cause this init() method to be
127 * called twice (concurrently) on the same instance of this servlet,
128 * so this routine must be safe in that scenario.
129 */
130 @Override
131 public void init(final ServletConfig config)
132 throws javax.servlet.ServletException
133 {
134 super.init(config);
135
136 // Work round Tomcat V3.x bug that produces repeated calls to init().
137 synchronized(this)
138 {
139 if(initialised) { return; }
140 initialised = true; // Don't try to reinitialise once started.
141
142 // I create the DataSourceBean here if possible,
143 // to have everything up and running before the first
144 // user visits, since else they may see a huge start-up delay
145 // and/or an empty site!
146 //
147 // These quick-start features will probably only be
148 // enabled if the cache is in aggressive mode.
149 getDataSource(config, null);
150 }
151 }
152
153
154 /**Respond to a GET request for the content served by this servlet.
155 *
156 * @param request The servlet request we are processing
157 * @param response The servlet response we are producing
158 *
159 * @exception IOException if an input/output error occurs
160 */
161 @Override
162 public void doGet(final HttpServletRequest request,
163 final HttpServletResponse response)
164 throws IOException //, ServletException
165 {
166 doAction(request, response, false);
167 }
168
169 /**Respond to a HEAD request for the content served by this servlet.
170 *
171 * @param request The servlet request we are processing
172 * @param response The servlet response we are producing
173 *
174 * @exception IOException if an input/output error occurs
175 */
176 @Override
177 public void doHead(final HttpServletRequest request,
178 final HttpServletResponse response)
179 throws IOException // , ServletException
180 {
181 doAction(request, response, true);
182 }
183
184 /**If true then enable async read-ahead for large exhibits where throughput is low. */
185 private static final boolean ENABLE_READAHEAD = true;
186
187 /**Minimum number of blocks to consider using async read-ahead; strictly positive.
188 * This reflects the fact that starting an additional thread
189 * may have a significant overhead.
190 */
191 private static final int MIN_BLOCKS_FOR_READAHEAD = 2;
192
193 /**Respond to a GET/HEAD request for the content served by this servlet.
194 * Returns the exhibit data, with a correct MIME type.
195 * <p>
196 * If an exhibit is requested by a syntactically-invalid name,
197 * we produce an HTTP `forbidden' (403) response.
198 * <p>
199 * If we request an exhibit with a valid name, but that does not
200 * exist, return return a `non found' (404) response (unless
201 * there are no exhibits available, perhaps because of a database
202 * problem, in which case we may return `service unavailable' in the
203 * hope that that will prevent search engines from purging our
204 * exhibits if they happen to visit when we are reloading the
205 * database).
206 * <p>
207 * We refuse to deal with some dubious requests
208 * (eg apparently links directly to our exhibits from external sites)
209 * if we are too busy, or if this server is marked as slow/expensive,
210 * so as to conserve resources for bona fide users.
211 * Dubious requests are rejected with a temporary failure (5xx) code
212 * or a redirection to an alternate URL.
213 * <p>
214 * Were we observe poor throughput due to excessive time in data fetches
215 * then we may switch to an asynchronous read-ahead mode.
216 * <p>
217 * As a matter of policy and for safety/simplicity
218 * this does <em>not<em> handle <code>Ranges</code>.
219 * <p>
220 * TODO: handle If-None-Match correctly
221 *
222 * @param request The servlet request we are processing
223 * @param response The servlet response we are producing
224 *
225 * @exception IOException if an input/output error occurs
226 */
227 public void doAction(final HttpServletRequest request,
228 final HttpServletResponse response,
229 final boolean isHEAD)
230 throws IOException //, ServletException
231 {
232 final long startTime = System.currentTimeMillis();
233
234 // Reject any non-human fetch by an over-keen "precaching" client.
235 // We can't spare the bandwidth for spurious cache-ahead behaviour.
236 final ServletContext servletContext = getServletContext();
237 if(WebUtils.isPrecacheRequest(request))
238 {
239 servletContext.log("ExhibitServlet: blocked attempt to 'precache' exhibit");
240 response.sendError(HttpServletResponse.SC_FORBIDDEN, "Please do not attempt to 'precache' exhibits!");
241 return;
242 }
243
244 // Get the relative request path...
245 final String rawPathInfo = request.getPathInfo();
246 //System.err.println("rawPathInfo = " + rawPathInfo);
247 // Adjust for any leading slash.
248 final String pathInfo = ((rawPathInfo != null) && rawPathInfo.startsWith("/")) ?
249 (rawPathInfo.substring(1)) : rawPathInfo;
250
251 // Attempt to get all the info on the exhibit, which
252 // also implies that it exists!
253 // If anything fails, report `not found'.
254 try
255 {
256 if(!ExhibitName.validNameSyntax(pathInfo))
257 {
258 if(!response.isCommitted()) // Don't bother with error if too late...
259 { response.sendError(HttpServletResponse.SC_NOT_FOUND); }
260 // request.getRequestDispatcher(WebConsts.NOT_FOUND_PAGE).forward(request, response);
261 return;
262 }
263
264 final DataSourceBean ds = getDataSource(getServletConfig(), request);
265 final AllExhibitProperties aep = ds.getAllExhibitProperties(-1);
266
267 if(aep.aeid.length < 1)
268 {
269 servletContext.log("WARNING: request for exhibit with none currently available.");
270 if(!response.isCommitted()) // Don't bother with error if too late...
271 { response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE); }
272 return;
273 }
274
275 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(pathInfo);
276 if(esa == null)
277 {
278 if(!response.isCommitted()) // Don't bother with error if too late...
279 { response.sendError(HttpServletResponse.SC_NOT_FOUND); }
280 return;
281 }
282
283 final org.hd.d.pg2k.svrCore.props.GenProps gp = ds.getGenProps(-1);
284
285 // Veto dubious requests with a temporary fail or redirection if we are too busy.
286 final boolean isBusy = WebUtils.isOverloaded(servletContext);
287 final String hotlinkHost = WebUtils.requestProbablyReferredFromExternalSite(request);
288 // If this looks like an unauthorised hotlinked referral,
289 // then we may reject/deflect it to conserve bandwidth.
290 if(hotlinkHost != null)
291 {
292 // If busy then reject request quickly with a 5xx error code.
293 // Likewise if this server has scarce/expensive bandwidth.
294 if(isBusy || (LocalProps.getServerSlowdownFactor() > 1))
295 {
296 if(!response.isCommitted()) // Don't bother with error if too late...
297 { response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE); }
298 return;
299 }
300
301 // Looks like a dodgy hotlink from someone unauthorised...
302 if(ServletUtils.noteAndOrBlockHotlinker(servletContext, request, hotlinkHost, ds, gp))
303 {
304 // Note the blocked hotlink attempt.
305 final SimpleVariableValue svvLoc = new SimpleVariableValue(
306 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
307 "blockedHotlinkFrom=" + hotlinkHost);
308 ds.setVariable(svvLoc);
309
310 // Just give up if already too late to signal an error, etc.
311 if(!response.isCommitted())
312 {
313 final Enumeration<?> rEn = request.getHeaders("Referer");
314 final boolean noRef = ((rEn == null) || !(rEn.hasMoreElements())); // No referrer...
315 final String referer = noRef ? "" : (String) rEn.nextElement();
316 //servletContext.log("WARNING: rejecting external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
317
318 // For hotlinkers that we explicitly hate,
319 // immediately send a 4xx to save time and effort.
320 if(gp.getHotLinkDisallowHosts().contains(hotlinkHost))
321 {
322 response.sendError(HttpServletResponse.SC_FORBIDDEN);
323 servletContext.log("WARNING: rejecting (404, disallowed hosts) external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
324 return;
325 }
326
327 // If a no-hotlinks-please image URL has been supplied then divert to it...
328 final String altURL = gp.getWEBSVR_EX_HOTLINK_DIVERT_URL();
329 if(/* !isBusy && */ (altURL != null))
330 {
331 response.sendRedirect(altURL);
332 servletContext.log("WARNING: redirecting (to alt URL) external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
333 return;
334 }
335 // Else redirect to the HTML catalogue page for the exhibit,
336 // which should work reasonably nicely
337 // for bona-fide search engine referrals, for example.
338 else
339 {
340 response.sendRedirect(WebUtils.makeCatPageURL(esa.getCharSequence(), WebConsts.F_secondary_generated_HTML_suffix).toExternalForm());
341 servletContext.log("WARNING: redirecting (to HTML cat page) external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
342 return;
343 }
344 }
345
346 return; // Terminate response without sending any exhibit data.
347 }
348 }
349
350
351 final long llength = esa.length;
352 assert(llength > 0);
353 assert(llength <= Integer.MAX_VALUE);
354 final int length = (int) llength;
355 final String type = ExhibitMIME.getMIMEType(pathInfo);
356
357 // If we get this far, everything is probably OK.
358
359
360 // Create (strong) ETag header from MD5hash if available, else from "size-timestamp", in hex.
361 final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(esa.getExhibitFullName());
362 final AccessionData ad = epl.getAccessionMetadata();
363 final String ETag;
364 if((ad != null) && (ad.hashMD5 != null))
365 { ETag = "\"" + ad.hashMD5.toHexString() + "\""; }
366 else
367 {
368 final StringBuilder etsb = new StringBuilder(27);
369 etsb.append('"');
370 etsb.append(Integer.toHexString(length));
371 etsb.append('-').append(Long.toHexString(esa.timestamp));
372 etsb.append('"');
373 ETag = etsb.toString();
374 }
375
376 // Set some cacheing headers too.
377 // Make the cache time a significant multiple of
378 // the interval between rechecks of exhibit immutable data
379 // (or our default slackness, if greater).
380 // Increase cache lifetime if we are busy, so as to reduce future server load.
381 // Also increase cache lifetime if, for example, it is the current background image...
382 final int basicCacheLifetime = 5 * Math.max(CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S * 1000,
383 gp.getWEBSVR_MIN_EX_IMATTR_RECHECK_MS());
384 final long cacheLifetime;
385 if(isBusy)
386 { cacheLifetime = ((long) basicCacheLifetime) << 3; }
387 else if(esa.getExhibitFullName().equals(gp.getWEBSVR_BG_IMAGE()))
388 { cacheLifetime = Math.max(WebConsts.MIN_STATIC_WEBITEMS_CACHE_MS, ((long) basicCacheLifetime) << 5); }
389 else
390 { cacheLifetime = basicCacheLifetime; }
391
392 // Set the HTTP/1.1 cache-control header to reflect
393 // the maximum time that any proxy/end-user should cache this non-private exhibit.
394 // Also, forbid transformation (messing-up!) by intermediate proxies.
395 response.setHeader("Cache-Control", "public,no-transform,max-age="+(cacheLifetime/1000));
396
397 // If this might be an HTML request from a search engine
398 // then we set a long expiry so that search engines will
399 // index and retain the page; of the order of weeks or even months.
400 // This *may* force HTTP/1.0-browser users to hit RELOAD
401 // more often than we'd like...
402 final boolean isFromSpider = WebUtils.requestProbablyFromSpider(request);
403 final long expiryTime = isFromSpider
404 ? Math.max(cacheLifetime, WebConsts.SPIDER_PAGE_EXPIRY_MS)
405 : cacheLifetime;
406 response.setDateHeader("Expires", System.currentTimeMillis() + expiryTime);
407
408 // Set the true last-modified date (known valid, eg not -1).
409 final long lastModified = esa.timestamp;
410 response.setDateHeader("Last-Modified", lastModified);
411 // Set ETag.
412 response.setHeader("ETag", ETag);
413
414
415 // Deal with If-Modified-Since here,
416 // BEFORE any other headers have been generated for HEAD and GET
417 // OTHER THAN cache control which should be resent for broken clients.
418 // (and before sending any body).
419 if(WebUtils.abortIfETagMatchOrNotModifiedSince(ETag, lastModified, request, response))
420 { return; }
421
422
423 // Set the main type and length headers for browsers.
424 response.setContentLength(length);
425 response.setContentType(type);
426
427 // If we have the exhibit MD5 hash available
428 // then we can add the appropriate HTTP header as a bonus.
429 if((ad != null) && (ad.hashMD5 != null))
430 {
431 final String base64Hash = TextUtils.encode8To6(ad.hashMD5.toByteArray());
432 response.setHeader("Content-MD5", base64Hash);
433 }
434
435
436 // If this is a HEAD request then return without providing a body.
437 if(isHEAD) { return; }
438
439
440 // Generate the body (ie send the raw exhibit).
441
442 // Get access to the output stream...
443 final ServletOutputStream os = response.getOutputStream();
444
445 // Monitor relative read and write performance.
446 long totalReadTime = 0;
447 long totalWriteTime = 0;
448
449 // Real humans fetching big exhibits (not hotlinked!)
450 // are eligible for async read-ahead to improve throughput.
451 // We use this to measure throughput
452 // even when a read-ahead thread is not allowed.
453 final int thresholdBytes = DEFAULT_BUF_SIZE * MIN_BLOCKS_FOR_READAHEAD;
454 final boolean mayNeedReadAhead = !isFromSpider && (hotlinkHost == null) &&
455 (esa.length >= 3*thresholdBytes);
456
457 // We copy exhibit data a chunk at a time, followed by a flush().
458 //
459 // We attempt to make this robust in the face of transient data-fetch errors,
460 // retrying for a time proportional to that already spent
461 // in order to reflect the cost of restarting from the beginning
462 // to the end user.
463 //
464 // In general we assume that we buffer enough data
465 // in the HTTP/TCP stack to avoid significant loss of throughput,
466 // but when we wait a similar fraction of service time for reads and writes
467 // then we can start a potentially-resource-hungry read-ahead.
468 //
469 // We use a direct buffer for maximum read performance
470 // providing the transfer is large enough to be worth the various overheads.
471 // We'll use this same buffer in the async and sync read modes.
472 final ByteBuffer buffer = (length >= DEFAULT_BUF_SIZE) ?
473 ByteBuffer.allocateDirect(DEFAULT_BUF_SIZE) :
474 ByteBuffer.allocate(length); // Small, lightweight, non-direct buffer,
475 // Queued data, for when we do read-ahead.
476 final ArrayBlockingQueue<byte[]> data = (!ENABLE_READAHEAD || !mayNeedReadAhead) ? null :
477 new ArrayBlockingQueue<byte[]>(1+2*MIN_BLOCKS_FOR_READAHEAD);
478 // Recycling (only default/full-size) buffers to save a little heap pressure and GC.
479 // Only useful if the exhibit is several full blocks' worth of data,
480 // enough to have some queued and some recycled, and reclaim the cost of this queue.
481 // This is useful for large exhibits regardless of whether read-head is used.
482 final ArrayBlockingQueue<byte[]> recycledBuffers = (length >= 5*DEFAULT_BUF_SIZE) ? null :
483 new ArrayBlockingQueue<byte[]>(1+2*MIN_BLOCKS_FOR_READAHEAD);
484 Thread reader = null; // Reader thread, if we use it.
485 // Signal thrown in thread, if any.
486 final AtomicReference<IOException> bgThreadErr = new AtomicReference<IOException>();
487 try
488 {
489 for(int start = 0; start < length; )
490 {
491 // Be prepared to retry to get the data if need be.
492 final long timeBeforeRead = System.currentTimeMillis();
493
494 // If read-ahead is enabled and has not been started yet
495 // (reads taking a significant fraction of write/wallclock time)
496 // and we're far enough through the transfer to have reasonable stats
497 // and we're far enough from the end to be worth starting a thread
498 // and we've been running slow (reads taking too long)
499 // and the write time is enough to make read-ahead/overlap useful
500 // and the download time so far has actually been significant
501 // and we're not horribly short of memory
502 // then start the async thread to read ahead for us.
503 if(ENABLE_READAHEAD && mayNeedReadAhead && (reader == null) &&
504 (start >= DEFAULT_BUF_SIZE) && (length-start > thresholdBytes) &&
505 (totalReadTime > (totalWriteTime>>>4)) /* Reads are slowing throughput >~10%. */ &&
506 (totalReadTime < (totalWriteTime<<4)) /* >~10% time to be saved by overlapping reads and writes. */ &&
507 ((timeBeforeRead - startTime) > WebConsts.MAX_PG_DOWNLOAD_MS/2) &&
508 !MemoryTools.isMemoryStressed())
509 {
510 // Actually create/start the read-ahead thread.
511 final int startPos = start;
512
513 /* if(IsDebug.isDebug) */ { servletContext.log("[ExhibitServlet: INFO: starting async read-ahead @"+startPos+" for slow download (fetch "+totalReadTime+"ms, send "+totalWriteTime+"ms) for "+esa+".]"); }
514
515 reader = new Thread("ExhibitServlet read-ahead for "+esa){
516 @Override public final void run()
517 {
518 try
519 {
520 try
521 {
522 for(int i = startPos; i < length; )
523 {
524 //if(IsDebug.isDebug) { System.out.println("[ExhibitServlet: async read-ahead for "+esa+".]"); }
525 // Read the data...
526 final byte b[] = readNextBlock(startTime, buffer, recycledBuffers, ds, esa, i, servletContext, null);
527 // Queue it up to be read.
528 data.put(b);
529 // Adjust for the data read.
530 i += b.length;
531 }
532 }
533 catch(final IOException e)
534 {
535 bgThreadErr.set(e);
536 data.put(QUEUE_POISON); // Poison the queue.
537 servletContext.log("ExhibitServlet: caught IOException in read-ahead thread: " + e.getClass().getName() + ": " + e.getMessage());
538 }
539 catch(final InterruptedException e)
540 {
541 data.put(QUEUE_POISON); // Poison the queue.
542 throw e;
543 }
544 catch(final Throwable t)
545 {
546 final IOException e = new IOException("unexpected error");
547 e.initCause(t); // Note the original exception.
548 bgThreadErr.set(e);
549 data.put(QUEUE_POISON); // Poison the queue.
550 e.printStackTrace();
551 }
552 }
553 catch(final InterruptedException e)
554 {
555 // We were presumably killed off by the servlet thread,
556 // So log the fact and quit immediately.
557 servletContext.log("read-ahead thread interrupted");
558 }
559 }
560 };
561 reader.setDaemon(true);
562 // Try to raise the reader priority a notch,
563 // but gracefully handle refusal!
564 try { reader.setPriority(Math.min(Thread.MAX_PRIORITY, 1+Thread.currentThread().getPriority()));}
565 catch(final Exception e) { e.printStackTrace(); }
566 reader.start();
567 }
568
569 // Read the next chunk of data.
570 final byte[] tmpBuf;
571 if(ENABLE_READAHEAD && (reader != null))
572 {
573 // Get data from async read-ahead thread.
574 //
575 // If the background thread got an error
576 // then rethrow it here to stop abruptly.
577 final IOException err = bgThreadErr.get();
578 if(err != null) { throw err; }
579 tmpBuf = data.take(); // Blocking read of data...
580 // Quit if the read-ahead thread deliberately poisoned the queue.
581 if(tmpBuf.length == 0) { throw new IOException("problem with read-ahead thread"); }
582 }
583 else
584 {
585 // Fetch the next block of exhibit data synchronously,
586 // ie using the current servlet thread.
587 tmpBuf = readNextBlock(startTime, buffer, recycledBuffers, ds, esa, start, servletContext, os);
588 }
589
590 // Note the amount of data actually read...
591 start += tmpBuf.length;
592
593 final long timeBeforeWrite = System.currentTimeMillis();
594 // Send the exhibit data to the user/client.
595 os.write(tmpBuf);
596 final long timeAfterWrite = System.currentTimeMillis();
597 totalReadTime += (timeBeforeWrite - timeBeforeRead);
598 totalWriteTime += (timeAfterWrite - timeBeforeWrite);
599
600 // Recycle the buffer if possible
601 // (but throw away any that we don't have room for).
602 // We only queue up default/max-size buffers.
603 if((recycledBuffers != null) && (tmpBuf.length == DEFAULT_BUF_SIZE))
604 { recycledBuffers.offer(tmpBuf); }
605 }
606
607 // If the background thread got an error not already caught
608 // then rethrow it here to stop abruptly.
609 final IOException err = bgThreadErr.get();
610 if(err != null) { throw err; }
611 }
612 finally
613 {
614 // Clean up the async read-ahead thread if (still) running,
615 // interrupt()ing it if need be to wake it up or unblock it.
616 if(ENABLE_READAHEAD && (reader != null))
617 {
618 while(reader.isAlive())
619 { reader.interrupt(); reader.join(CoreConsts.MAX_INTERACTIVE_DELAY_MS); }
620 }
621 }
622 os.flush(); // Done!
623 os.close(); // Try to catch any broken/aborted connection now.
624
625 assert((reader == null) || (!reader.isAlive())); // No reader thread running now.
626
627 // Note slow downloads to help us tune the system.
628 // We can only really hope to significantly speed up downloads
629 // of exhibits that were eligible for read-ahead.
630 if(mayNeedReadAhead)
631 {
632 // If the exhibit took significant time to download and
633 // if we spent longer fetching the data than sending it
634 // then we probably gave the user a slow download.
635 final long wallclockTime = (System.currentTimeMillis() - startTime);
636 if((totalReadTime > wallclockTime/4) &&
637 (wallclockTime > WebConsts.MAX_PG_DOWNLOAD_MS))
638 { servletContext.log("[ExhibitServlet: WARNING: slow download (fetch "+totalReadTime+"ms, send "+totalWriteTime+"ms, wallclock "+wallclockTime+"ms) for "+esa+", "+(length/(0.001f*Math.max(1,wallclockTime)))+"Bps.]"); }
639 }
640
641 // Rate/note only deliberate downloads by humans on our site(s),
642 // thus ignore:
643 // * Hits that appear to be from spiders.
644 // * Hits that appear to be direct hotlinks from external sites.
645 // * Hits for any (small) image/exhibit potentially
646 // being used as its own thumbnail.
647 // * Hits for any current GenProps-selected background image.
648 // Note proximity of user also...
649 final CharSequence bgImage;
650 if(!isFromSpider &&
651 (hotlinkHost == null) &&
652 !ImageUtils.canBeOwnThumbnail(esa) &&
653 !((null != (bgImage = gp.getWEBSVR_BG_IMAGE())) && TextUtils.contentEquals(esa.getCharSequence(), bgImage)))
654 {
655 try
656 {
657 final String shortNameAsString = esa.getExhibitFullName().getShortName().toString(); // FIXME: horribly inefficient to conv to/from String
658 // Record the short exhibit name as an event...
659 final SimpleVariableValue svvDL = new SimpleVariableValue(
660 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
661 shortNameAsString);
662 ds.setVariable(svvDL);
663 final SimpleVariableValue svvDLl = new SimpleVariableValue(
664 SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
665 shortNameAsString);
666 ds.setVariable(svvDLl);
667
668 final InetAddress clientIP =
669 InetAddress.getByName(request.getRemoteAddr());
670
671 // Note the download by approximate geographical region...
672 // Do so only AFTER the response completes to minimise latency,
673 // and only for requests that did not abort with an exception.
674 // We may be prepared to spend some time trying to look this up
675 // dynamically, hoping that DNS cacheing will keep the cost down.
676 final String regionByAddress = GeoUtils.getRegionByAddress(clientIP, false);
677 final SimpleVariableValue svvLoc = new SimpleVariableValue(
678 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
679 "download.loc=" + regionByAddress);
680 ds.setVariable(svvLoc);
681
682 // Note the rough proximity of the client so as to monitor
683 // how well geo-sensitive load balancing is doing.
684 // We can only do this if we know our physical location
685 // (eg if this is a mirror and thus has an embedded ccTLD).
686 // In particular we note downloads from very remote clients.
687 final String mirrorTag = LocalProps.getMirrorTag();
688 if(mirrorTag != null)
689 {
690 final GeoUtils.CCTLD cc = new GeoUtils.CCTLD(mirrorTag.substring(0, 2));
691 final GeoProximity proximity = GeoUtils.computeProximityByAddress(clientIP, cc, false);
692 final SimpleVariableValue svvProx = new SimpleVariableValue(
693 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
694 "download.prox=" + proximity);
695 ds.setVariable(svvProx);
696
697 // Log the location of users with no proximity
698 // since we might need to provide a server locally!
699 if(proximity == GeoProximity.NONE)
700 {
701 final SimpleVariableValue svvNoProx = new SimpleVariableValue(
702 SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
703 "download.noprox=" + regionByAddress);
704 ds.setVariable(svvNoProx);
705 }
706 }
707 }
708 catch(final Exception e) // Absorb (but briefly log) any errors...
709 {
710 servletContext.log("ExhibitServlet: caught Exception: " + e.getClass().getName() + ": " + e.getMessage());
711 }
712 }
713
714 return; // Finished OK!
715 }
716 catch(final Throwable t) // Absorb (but briefly log) any error...
717 {
718 servletContext.log("ExhibitServlet: caught Throwable: " + t.getClass().getName() + ": " + t.getMessage());
719 if(IsDebug.isDebug) { t.printStackTrace(); }
720 }
721
722 // Some problem handling the request...
723 if(!response.isCommitted()) // Don't bother with error if too late...
724 { response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); }
725 return;
726 }
727
728 /**Can be inserted into the data queue by the async thread to force the foreground thread to quit. */
729 private static final byte[] QUEUE_POISON = new byte[0];
730
731 /**Read the next block of data from the exhibit using the given buffer returning it as a byte[]; never null nor zero-length.
732 * Retries failed reads up to a time proportional to that already spent
733 * on the entire transfer (to reflect the cost of restarting).
734 *
735 * @param os if non-null may be used to flush() data down to the user;
736 * not safe to call from any thread other than the servlet thread
737 * @param byteBufs source of recycled byte buffers that we can use rather than using new,
738 * we freely discard anything we find that isn't suitable for our purposes; never null
739 */
740 private static byte[] readNextBlock(final long startTime,
741 final ByteBuffer buffer,
742 final ArrayBlockingQueue<byte[]> byteBufs,
743 final DataSourceBean ds,
744 final ExhibitStaticAttr esa,
745 final int start,
746 final ServletContext servletContext,
747 final ServletOutputStream os)
748 throws IOException, InterruptedException
749 {
750 assert(buffer != null);
751
752 final long timeBeforeRead = System.currentTimeMillis();
753 final long retryUntil = timeBeforeRead + ((timeBeforeRead - startTime) / 4) +
754 (WebConsts.MAX_PG_DOWNLOAD_MS<<4) + 30000;
755
756 // High-level recovery from temporary I/O errors.
757 for( ; ; )
758 {
759 try
760 {
761 buffer.clear();
762 assert(buffer.remaining() > 0); // Must not be zero-sized buffer.
763 // Get the data, and cache it locally if possible.
764 ds.getRawFile(buffer, esa.getExhibitFullName(), start, false);
765 buffer.flip();
766 final int bytesRead = buffer.limit();
767 if(bytesRead > 0)
768 { break; /* OK, got some data! (Normal case.) */ }
769 // Didn't get any data,
770 // so fall through to sleep and retry.
771 }
772 catch(final IOException e)
773 {
774 if(System.currentTimeMillis() > retryUntil)
775 {
776 servletContext.log("ERROR: ExhibitServlet: retry FAILED at offset "+start+" for "+esa+": " + e.getMessage() + ".");
777 throw e; // Abort.
778 }
779 }
780
781 // Flush what we already read/wrote down to the user if possible,
782 // thus aborting (SIGPIPE) if the user has given up and disconnected.
783 if(null != os) { os.flush(); }
784
785 // Wait a little while before trying again,
786 // with some randomisation to help avoid collisions.
787 // Not so long (1s+) as to force the TMF
788 // into thinking that we are "busy".
789 Thread.sleep((GenUtils.mustConservePower()?503:203) + Rnd.fastRnd.nextInt(203));
790 servletContext.log("[ExhibitServlet: retrying read at offset "+start+" for "+esa+".]");
791 }
792
793 // FIXME: for now, with the current servlet API, we have to copy data into a byte[].
794 final int bytesToWrite = buffer.limit();
795 assert(buffer.position() == 0);
796 assert(bytesToWrite > 0);
797
798 // Attempt to avoid a heap allocation to avoid stressing GC if we can
799 // by reusing a recycled buffer,
800 // but if we don't immediately find something exactly right
801 // then just use new...
802 // Note that only default/max-sized buffers should be in the pool.
803 byte[] tmpBuf;
804 if((bytesToWrite != DEFAULT_BUF_SIZE) || (byteBufs == null) || ((tmpBuf = byteBufs.poll()) == null) /* || (tmpBuf.length != bytesToWrite) */)
805 { tmpBuf = new byte[bytesToWrite]; }
806 assert(tmpBuf.length == bytesToWrite);
807
808 // Copy the data...
809 buffer.get(tmpBuf);
810 return(tmpBuf);
811 }
812
813
814 /**Get `last-modified' time for exhibit.
815 * If we can't find out, eg because the exhibit is not present,
816 * we return -1L, the default value.
817 * <p>
818 * If returning a last-modified value causes difficulty
819 * (eg with Tomcat 4.0.1) then we return -1.
820 *
821 * @param request The servlet request we are processing
822 */
823 @Override
824 public final long getLastModified(final HttpServletRequest request)
825 {
826 // if(WebConsts.AVOID_LAST_MODIFIED) { return(-1L); }
827
828 // Get the relative request path...
829 final String rawPathInfo = request.getPathInfo();
830 //System.err.println("rawPathInfo = " + rawPathInfo);
831 // Adjust for any leading slash.
832 final String pathInfo = ((rawPathInfo != null) && rawPathInfo.startsWith("/")) ?
833 (rawPathInfo.substring(1)) : rawPathInfo;
834
835 try
836 {
837 final Name.ExhibitFull fullName;
838 try { fullName = Name.ExhibitFull.create(pathInfo); }
839 catch(final IllegalArgumentException e)
840 {
841 // Invalid name; return default 'not-known' value.
842 return(-1L);
843 }
844
845 final ExhibitStaticAttr esa = getDataSource(getServletConfig(), request).
846 getStaticAttr(fullName);
847
848 // No such exhibit; return default 'not-known' value.
849 if(esa == null) { return(-1); }
850
851 // Return the exhibit's timestamp...
852 return(esa.timestamp);
853 }
854 catch(final Exception e)
855 {
856 // In case of error return default `not-known' value.
857 return(-1L);
858 }
859 }
860
861 /**Unique Serialisation class ID generated by http://random.hd.org/. */
862 private static final long serialVersionUID = -1774514190468389217L;
863 }