001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.webSvr.exhibit;
030    
031    import java.io.IOException;
032    import java.net.InetAddress;
033    import java.nio.ByteBuffer;
034    import java.util.Enumeration;
035    import java.util.concurrent.ArrayBlockingQueue;
036    import java.util.concurrent.atomic.AtomicReference;
037    
038    import javax.servlet.ServletConfig;
039    import javax.servlet.ServletContext;
040    import javax.servlet.ServletOutputStream;
041    import javax.servlet.http.HttpServlet;
042    import javax.servlet.http.HttpServletRequest;
043    import javax.servlet.http.HttpServletResponse;
044    
045    import org.hd.d.pg2k.svrCore.AccessionData;
046    import org.hd.d.pg2k.svrCore.AllExhibitProperties;
047    import org.hd.d.pg2k.svrCore.CoreConsts;
048    import org.hd.d.pg2k.svrCore.ExhibitName;
049    import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
050    import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
051    import org.hd.d.pg2k.svrCore.GenUtils;
052    import org.hd.d.pg2k.svrCore.ImageUtils;
053    import org.hd.d.pg2k.svrCore.MemoryTools;
054    import org.hd.d.pg2k.svrCore.Name;
055    import org.hd.d.pg2k.svrCore.Rnd;
056    import org.hd.d.pg2k.svrCore.TextUtils;
057    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
058    import org.hd.d.pg2k.svrCore.location.GeoProximity;
059    import org.hd.d.pg2k.svrCore.location.GeoUtils;
060    import org.hd.d.pg2k.svrCore.props.LocalProps;
061    import org.hd.d.pg2k.svrCore.vars.SimpleVariableValue;
062    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
063    import org.hd.d.pg2k.webSvr.util.WebConsts;
064    import org.hd.d.pg2k.webSvr.util.WebUtils;
065    
066    import ORG.hd.d.IsDebug;
067    
068    /**This is the servlet that serves exhibit data.
069     * This is optimised to serve large quantities of binary data
070     * efficiently, and to get headers right to optimise performance
071     * of caches, spiders, etc, while not losing site of usage logging.
072     * <p>
073     * This expects to be loaded when the whole Web application
074     * starts and remain there until the Web application shuts down,
075     * and so creates and destroys the data pipeline as a side-effect.
076     */
077    public final class ExhibitServlet extends HttpServlet
078        {
079        /**Default transfer buffer size; chosen to be efficient but not huge. */
080        private static final int DEFAULT_BUF_SIZE = CoreConsts.BULK_DATA_TRANSFER_SIZE;
081    
082        /**True if we know that this servlet is only unloaded when the site is brought down.
083         * We can use this to quickly destroy an old cache, for example.
084         */
085        public static final boolean ONLY_DESTROYED_ON_SHUTDOWN = true;
086    
087        /**Shut down gracefully.
088         */
089        @Override
090        public void destroy()
091            {
092    System.out.println("ExhibitServlet shutting down...");
093    
094            // We can safely shut down the cache if we know
095            // we are the last one out...
096            if(ONLY_DESTROYED_ON_SHUTDOWN)
097                {
098                getDataSource(getServletConfig(), null).destroy();
099                }
100            }
101    
102        /**Get singleton (per-servlet-context) data pipeline/cache instance.
103         * The config param must not be null, but for some operations
104         * (such as calling destroy()) request can be null.
105         */
106        private static DataSourceBean getDataSource(
107                    final ServletConfig config,
108                    final HttpServletRequest request)
109            {
110            // Fetches/creates the data source...
111            final DataSourceBean dataSource =
112                DataSourceBean.getApplicationInstance(config.getServletContext());
113    
114            // Ensure that the essential details are set up.
115            dataSource.setServletContext(config.getServletContext());
116            if(request != null)
117                { dataSource.setContextPath(request.getContextPath()); }
118    
119            return(dataSource);
120            }
121    
122        /**Private to init(); ensure exactly-once semantics for init(). */
123        private boolean initialised;
124    
125        /**Retrieve some important configuration data and cache it away.
126         * A bug in Tomcat V3.1--V3.2.1 can cause this init() method to be
127         * called twice (concurrently) on the same instance of this servlet,
128         * so this routine must be safe in that scenario.
129         */
130        @Override
131        public void init(final ServletConfig config)
132            throws javax.servlet.ServletException
133            {
134            super.init(config);
135    
136            // Work round Tomcat V3.x bug that produces repeated calls to init().
137            synchronized(this)
138                {
139                if(initialised) { return; }
140                initialised = true; // Don't try to reinitialise once started.
141    
142                // I create the DataSourceBean here if possible,
143                // to have everything up and running before the first
144                // user visits, since else they may see a huge start-up delay
145                // and/or an empty site!
146                //
147                // These quick-start features will probably only be
148                // enabled if the cache is in aggressive mode.
149                getDataSource(config, null);
150                }
151            }
152    
153    
154        /**Respond to a GET request for the content served by this servlet.
155         *
156         * @param request The servlet request we are processing
157         * @param response The servlet response we are producing
158         *
159         * @exception IOException if an input/output error occurs
160         */
161        @Override
162        public void doGet(final HttpServletRequest request,
163                          final HttpServletResponse response)
164            throws IOException //, ServletException
165            {
166            doAction(request, response, false);
167            }
168    
169        /**Respond to a HEAD request for the content served by this servlet.
170         *
171         * @param request The servlet request we are processing
172         * @param response The servlet response we are producing
173         *
174         * @exception IOException if an input/output error occurs
175         */
176        @Override
177        public void doHead(final HttpServletRequest request,
178                           final HttpServletResponse response)
179            throws IOException // , ServletException
180            {
181            doAction(request, response, true);
182            }
183    
184        /**If true then enable async read-ahead for large exhibits where throughput is low. */
185        private static final boolean ENABLE_READAHEAD = true;
186    
187        /**Minimum number of blocks to consider using async read-ahead; strictly positive.
188         * This reflects the fact that starting an additional thread
189         * may have a significant overhead.
190         */
191        private static final int MIN_BLOCKS_FOR_READAHEAD = 2;
192    
193        /**Respond to a GET/HEAD request for the content served by this servlet.
194         * Returns the exhibit data, with a correct MIME type.
195         * <p>
196         * If an exhibit is requested by a syntactically-invalid name,
197         * we produce an HTTP `forbidden' (403) response.
198         * <p>
199         * If we request an exhibit with a valid name, but that does not
200         * exist, return return a `non found' (404) response (unless
201         * there are no exhibits available, perhaps because of a database
202         * problem, in which case we may return `service unavailable' in the
203         * hope that that will prevent search engines from purging our
204         * exhibits if they happen to visit when we are reloading the
205         * database).
206         * <p>
207         * We refuse to deal with some dubious requests
208         * (eg apparently links directly to our exhibits from external sites)
209         * if we are too busy, or if this server is marked as slow/expensive,
210         * so as to conserve resources for bona fide users.
211         * Dubious requests are rejected with a temporary failure (5xx) code
212         * or a redirection to an alternate URL.
213         * <p>
214         * Were we observe poor throughput due to excessive time in data fetches
215         * then we may switch to an asynchronous read-ahead mode.
216         * <p>
217         * As a matter of policy and for safety/simplicity
218         * this does <em>not<em> handle <code>Ranges</code>.
219         * <p>
220         * TODO: handle If-None-Match correctly
221         *
222         * @param request The servlet request we are processing
223         * @param response The servlet response we are producing
224         *
225         * @exception IOException if an input/output error occurs
226         */
227        public void doAction(final HttpServletRequest request,
228                             final HttpServletResponse response,
229                             final boolean isHEAD)
230            throws IOException //, ServletException
231            {
232            final long startTime = System.currentTimeMillis();
233    
234            // Reject any non-human fetch by an over-keen "precaching" client.
235            // We can't spare the bandwidth for spurious cache-ahead behaviour.
236            final ServletContext servletContext = getServletContext();
237            if(WebUtils.isPrecacheRequest(request))
238                {
239                servletContext.log("ExhibitServlet: blocked attempt to 'precache' exhibit");
240                response.sendError(HttpServletResponse.SC_FORBIDDEN, "Please do not attempt to 'precache' exhibits!");
241                return;
242                }
243    
244            // Get the relative request path...
245            final String rawPathInfo = request.getPathInfo();
246    //System.err.println("rawPathInfo = " + rawPathInfo);
247            // Adjust for any leading slash.
248            final String pathInfo = ((rawPathInfo != null) && rawPathInfo.startsWith("/")) ?
249                (rawPathInfo.substring(1)) : rawPathInfo;
250    
251            // Attempt to get all the info on the exhibit, which
252            // also implies that it exists!
253            // If anything fails, report `not found'.
254            try
255                {
256                if(!ExhibitName.validNameSyntax(pathInfo))
257                    {
258                    if(!response.isCommitted()) // Don't bother with error if too late...
259                        { response.sendError(HttpServletResponse.SC_NOT_FOUND); }
260    //                request.getRequestDispatcher(WebConsts.NOT_FOUND_PAGE).forward(request, response);
261                    return;
262                    }
263    
264                final DataSourceBean ds = getDataSource(getServletConfig(), request);
265                final AllExhibitProperties aep = ds.getAllExhibitProperties(-1);
266    
267                if(aep.aeid.length < 1)
268                    {
269                    servletContext.log("WARNING: request for exhibit with none currently available.");
270                    if(!response.isCommitted()) // Don't bother with error if too late...
271                        { response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE); }
272                    return;
273                    }
274    
275                final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(pathInfo);
276                if(esa == null)
277                    {
278                    if(!response.isCommitted()) // Don't bother with error if too late...
279                        { response.sendError(HttpServletResponse.SC_NOT_FOUND); }
280                    return;
281                    }
282    
283                final org.hd.d.pg2k.svrCore.props.GenProps gp = ds.getGenProps(-1);
284    
285                // Veto dubious requests with a temporary fail or redirection if we are too busy.
286                final boolean isBusy = WebUtils.isOverloaded(servletContext);
287                final String hotlinkHost = WebUtils.requestProbablyReferredFromExternalSite(request);
288                // If this looks like an unauthorised hotlinked referral,
289                // then we may reject/deflect it to conserve bandwidth.
290                if(hotlinkHost != null)
291                    {
292                    // If busy then reject request quickly with a 5xx error code.
293                    // Likewise if this server has scarce/expensive bandwidth.
294                    if(isBusy || (LocalProps.getServerSlowdownFactor() > 1))
295                        {
296                        if(!response.isCommitted()) // Don't bother with error if too late...
297                            { response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE); }
298                        return;
299                        }
300    
301                    // Looks like a dodgy hotlink from someone unauthorised...
302                    if(ServletUtils.noteAndOrBlockHotlinker(servletContext, request, hotlinkHost, ds, gp))
303                        {
304                        // Note the blocked hotlink attempt.
305                        final SimpleVariableValue svvLoc = new SimpleVariableValue(
306                            SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
307                            "blockedHotlinkFrom=" + hotlinkHost);
308                        ds.setVariable(svvLoc);
309    
310                        // Just give up if already too late to signal an error, etc.
311                        if(!response.isCommitted())
312                            {
313                            final Enumeration<?> rEn = request.getHeaders("Referer");
314                            final boolean noRef = ((rEn == null) || !(rEn.hasMoreElements())); // No referrer...
315                            final String referer = noRef ? "" : (String) rEn.nextElement();
316                            //servletContext.log("WARNING: rejecting external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
317    
318                            // For hotlinkers that we explicitly hate,
319                            // immediately send a 4xx to save time and effort.
320                            if(gp.getHotLinkDisallowHosts().contains(hotlinkHost))
321                                {
322                                response.sendError(HttpServletResponse.SC_FORBIDDEN);
323                                servletContext.log("WARNING: rejecting (404, disallowed hosts) external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
324                                return;
325                                }
326    
327                            // If a no-hotlinks-please image URL has been supplied then divert to it...
328                            final String altURL = gp.getWEBSVR_EX_HOTLINK_DIVERT_URL();
329                            if(/* !isBusy && */ (altURL != null))
330                                {
331                                response.sendRedirect(altURL);
332                                servletContext.log("WARNING: redirecting (to alt URL) external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
333                                return;
334                                }
335                            // Else redirect to the HTML catalogue page for the exhibit,
336                            // which should work reasonably nicely
337                            // for bona-fide search engine referrals, for example.
338                            else
339                                {
340                                response.sendRedirect(WebUtils.makeCatPageURL(esa.getCharSequence(), WebConsts.F_secondary_generated_HTML_suffix).toExternalForm());
341                                servletContext.log("WARNING: redirecting (to HTML cat page) external exhibit hotlink from `"+hotlinkHost+"' for: " +esa.getCharSequence()+ " with Referer: " + TextUtils.sanitiseForXML(referer, 1024, true));
342                                return;
343                                }
344                            }
345    
346                        return; // Terminate response without sending any exhibit data.
347                        }
348                    }
349    
350    
351                final long llength = esa.length;
352                assert(llength > 0);
353                assert(llength <= Integer.MAX_VALUE);
354                final int length = (int) llength;
355                final String type = ExhibitMIME.getMIMEType(pathInfo);
356    
357                // If we get this far, everything is probably OK.
358    
359    
360                // Create (strong) ETag header from MD5hash if available, else from "size-timestamp", in hex.
361                final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(esa.getExhibitFullName());
362                final AccessionData ad = epl.getAccessionMetadata();
363                final String ETag;
364                if((ad != null) && (ad.hashMD5 != null))
365                    { ETag = "\"" + ad.hashMD5.toHexString() + "\""; }
366                else
367                    {
368                    final StringBuilder etsb = new StringBuilder(27);
369                    etsb.append('"');
370                    etsb.append(Integer.toHexString(length));
371                    etsb.append('-').append(Long.toHexString(esa.timestamp));
372                    etsb.append('"');
373                    ETag = etsb.toString();
374                    }
375    
376                // Set some cacheing headers too.
377                // Make the cache time a significant multiple of
378                // the interval between rechecks of exhibit immutable data
379                // (or our default slackness, if greater).
380                // Increase cache lifetime if we are busy, so as to reduce future server load.
381                // Also increase cache lifetime if, for example, it is the current background image...
382                final int basicCacheLifetime = 5 * Math.max(CoreConsts.DEFAULT_TEMPORAL_SLACKNESS_S * 1000,
383                    gp.getWEBSVR_MIN_EX_IMATTR_RECHECK_MS());
384                final long cacheLifetime;
385                if(isBusy)
386                    { cacheLifetime = ((long) basicCacheLifetime) << 3; }
387                else if(esa.getExhibitFullName().equals(gp.getWEBSVR_BG_IMAGE()))
388                    { cacheLifetime = Math.max(WebConsts.MIN_STATIC_WEBITEMS_CACHE_MS, ((long) basicCacheLifetime) << 5); }
389                else
390                    { cacheLifetime = basicCacheLifetime; }
391    
392                // Set the HTTP/1.1 cache-control header to reflect
393                // the maximum time that any proxy/end-user should cache this non-private exhibit.
394                // Also, forbid transformation (messing-up!) by intermediate proxies.
395                response.setHeader("Cache-Control", "public,no-transform,max-age="+(cacheLifetime/1000));
396    
397                // If this might be an HTML request from a search engine
398                // then we set a long expiry so that search engines will
399                // index and retain the page; of the order of weeks or even months.
400                // This *may* force HTTP/1.0-browser users to hit RELOAD
401                // more often than we'd like...
402                final boolean isFromSpider = WebUtils.requestProbablyFromSpider(request);
403                final long expiryTime = isFromSpider
404                    ? Math.max(cacheLifetime, WebConsts.SPIDER_PAGE_EXPIRY_MS)
405                    : cacheLifetime;
406                response.setDateHeader("Expires", System.currentTimeMillis() + expiryTime);
407    
408                // Set the true last-modified date (known valid, eg not -1).
409                final long lastModified = esa.timestamp;
410                response.setDateHeader("Last-Modified", lastModified);
411                // Set ETag.
412                response.setHeader("ETag", ETag);
413    
414    
415                // Deal with If-Modified-Since here,
416                // BEFORE any other headers have been generated for HEAD and GET
417                // OTHER THAN cache control which should be resent for broken clients.
418                // (and before sending any body).
419                if(WebUtils.abortIfETagMatchOrNotModifiedSince(ETag, lastModified, request, response))
420                    { return; }
421    
422    
423                // Set the main type and length headers for browsers.
424                response.setContentLength(length);
425                response.setContentType(type);
426    
427                // If we have the exhibit MD5 hash available
428                // then we can add the appropriate HTTP header as a bonus.
429                if((ad != null) && (ad.hashMD5 != null))
430                    {
431                    final String base64Hash = TextUtils.encode8To6(ad.hashMD5.toByteArray());
432                    response.setHeader("Content-MD5", base64Hash);
433                    }
434    
435    
436                // If this is a HEAD request then return without providing a body.
437                if(isHEAD) { return; }
438    
439    
440                // Generate the body (ie send the raw exhibit).
441    
442                // Get access to the output stream...
443                final ServletOutputStream os = response.getOutputStream();
444    
445                // Monitor relative read and write performance.
446                long totalReadTime = 0;
447                long totalWriteTime = 0;
448    
449                // Real humans fetching big exhibits (not hotlinked!)
450                // are eligible for async read-ahead to improve throughput.
451                // We use this to measure throughput
452                // even when a read-ahead thread is not allowed.
453                final int thresholdBytes = DEFAULT_BUF_SIZE * MIN_BLOCKS_FOR_READAHEAD;
454                final boolean mayNeedReadAhead = !isFromSpider && (hotlinkHost == null) &&
455                    (esa.length >= 3*thresholdBytes);
456    
457                // We copy exhibit data a chunk at a time, followed by a flush().
458                //
459                // We attempt to make this robust in the face of transient data-fetch errors,
460                // retrying for a time proportional to that already spent
461                // in order to reflect the cost of restarting from the beginning
462                // to the end user.
463                //
464                // In general we assume that we buffer enough data
465                // in the HTTP/TCP stack to avoid significant loss of throughput,
466                // but when we wait a similar fraction of service time for reads and writes
467                // then we can start a potentially-resource-hungry read-ahead.
468                //
469                // We use a direct buffer for maximum read performance
470                // providing the transfer is large enough to be worth the various overheads.
471                // We'll use this same buffer in the async and sync read modes.
472                final ByteBuffer buffer = (length >= DEFAULT_BUF_SIZE) ?
473                    ByteBuffer.allocateDirect(DEFAULT_BUF_SIZE) :
474                    ByteBuffer.allocate(length); // Small, lightweight, non-direct buffer,
475                // Queued data, for when we do read-ahead.
476                final ArrayBlockingQueue<byte[]> data = (!ENABLE_READAHEAD || !mayNeedReadAhead) ? null :
477                    new ArrayBlockingQueue<byte[]>(1+2*MIN_BLOCKS_FOR_READAHEAD);
478                // Recycling (only default/full-size) buffers to save a little heap pressure and GC.
479                // Only useful if the exhibit is several full blocks' worth of data,
480                // enough to have some queued and some recycled, and reclaim the cost of this queue.
481                // This is useful for large exhibits regardless of whether read-head is used.
482                final ArrayBlockingQueue<byte[]> recycledBuffers = (length >= 5*DEFAULT_BUF_SIZE) ? null :
483                    new ArrayBlockingQueue<byte[]>(1+2*MIN_BLOCKS_FOR_READAHEAD);
484                Thread reader = null; // Reader thread, if we use it.
485                // Signal thrown in thread, if any.
486                final AtomicReference<IOException> bgThreadErr = new AtomicReference<IOException>();
487                try
488                    {
489                    for(int start = 0; start < length; )
490                        {
491                        // Be prepared to retry to get the data if need be.
492                        final long timeBeforeRead = System.currentTimeMillis();
493    
494                        // If read-ahead is enabled and has not been started yet
495                        // (reads taking a significant fraction of write/wallclock time)
496                        // and we're far enough through the transfer to have reasonable stats
497                        // and we're far enough from the end to be worth starting a thread
498                        // and we've been running slow (reads taking too long)
499                        // and the write time is enough to make read-ahead/overlap useful
500                        // and the download time so far has actually been significant
501                        // and we're not horribly short of memory
502                        // then start the async thread to read ahead for us.
503                        if(ENABLE_READAHEAD && mayNeedReadAhead && (reader == null) &&
504                           (start >= DEFAULT_BUF_SIZE) && (length-start > thresholdBytes) &&
505                           (totalReadTime > (totalWriteTime>>>4)) /* Reads are slowing throughput >~10%. */ &&
506                           (totalReadTime < (totalWriteTime<<4)) /* >~10% time to be saved by overlapping reads and writes. */ &&
507                           ((timeBeforeRead - startTime) > WebConsts.MAX_PG_DOWNLOAD_MS/2) &&
508                           !MemoryTools.isMemoryStressed())
509                            {
510                            // Actually create/start the read-ahead thread.
511                            final int startPos = start;
512    
513    /* if(IsDebug.isDebug) */ { servletContext.log("[ExhibitServlet: INFO: starting async read-ahead @"+startPos+" for slow download (fetch "+totalReadTime+"ms, send "+totalWriteTime+"ms) for "+esa+".]"); }
514    
515                            reader = new Thread("ExhibitServlet read-ahead for "+esa){
516                                @Override public final void run()
517                                    {
518                                    try
519                                        {
520                                        try
521                                            {
522                                            for(int i = startPos; i < length; )
523                                                {
524    //if(IsDebug.isDebug) { System.out.println("[ExhibitServlet: async read-ahead for "+esa+".]"); }
525                                                // Read the data...
526                                                final byte b[] = readNextBlock(startTime, buffer, recycledBuffers, ds, esa, i, servletContext, null);
527                                                // Queue it up to be read.
528                                                data.put(b);
529                                                // Adjust for the data read.
530                                                i += b.length;
531                                                }
532                                            }
533                                        catch(final IOException e)
534                                            {
535                                            bgThreadErr.set(e);
536                                            data.put(QUEUE_POISON); // Poison the queue.
537    servletContext.log("ExhibitServlet: caught IOException in read-ahead thread: " + e.getClass().getName() + ": " + e.getMessage());
538                                            }
539                                        catch(final InterruptedException e)
540                                            {
541                                            data.put(QUEUE_POISON); // Poison the queue.
542                                            throw e;
543                                            }
544                                        catch(final Throwable t)
545                                            {
546                                            final IOException e = new IOException("unexpected error");
547                                            e.initCause(t); // Note the original exception.
548                                            bgThreadErr.set(e);
549                                            data.put(QUEUE_POISON); // Poison the queue.
550                                            e.printStackTrace();
551                                            }
552                                        }
553                                    catch(final InterruptedException e)
554                                        {
555                                        // We were presumably killed off by the servlet thread,
556                                        // So log the fact and quit immediately.
557                                        servletContext.log("read-ahead thread interrupted");
558                                        }
559                                    }
560                                };
561                            reader.setDaemon(true);
562                            // Try to raise the reader priority a notch,
563                            // but gracefully handle refusal!
564                            try { reader.setPriority(Math.min(Thread.MAX_PRIORITY, 1+Thread.currentThread().getPriority()));}
565                            catch(final Exception e) { e.printStackTrace(); }
566                            reader.start();
567                            }
568    
569                        // Read the next chunk of data.
570                        final byte[] tmpBuf;
571                        if(ENABLE_READAHEAD && (reader != null))
572                            {
573                            // Get data from async read-ahead thread.
574                            //
575                            // If the background thread got an error
576                            // then rethrow it here to stop abruptly.
577                            final IOException err = bgThreadErr.get();
578                            if(err != null) { throw err; }
579                            tmpBuf = data.take(); // Blocking read of data...
580                            // Quit if the read-ahead thread deliberately poisoned the queue.
581                            if(tmpBuf.length == 0) { throw new IOException("problem with read-ahead thread"); }
582                            }
583                        else
584                            {
585                            // Fetch the next block of exhibit data synchronously,
586                            // ie using the current servlet thread.
587                            tmpBuf = readNextBlock(startTime, buffer, recycledBuffers, ds, esa, start, servletContext, os);
588                            }
589    
590                        // Note the amount of data actually read...
591                        start += tmpBuf.length;
592    
593                        final long timeBeforeWrite = System.currentTimeMillis();
594                        // Send the exhibit data to the user/client.
595                        os.write(tmpBuf);
596                        final long timeAfterWrite = System.currentTimeMillis();
597                        totalReadTime += (timeBeforeWrite - timeBeforeRead);
598                        totalWriteTime += (timeAfterWrite - timeBeforeWrite);
599    
600                        // Recycle the buffer if possible
601                        // (but throw away any that we don't have room for).
602                        // We only queue up default/max-size buffers.
603                        if((recycledBuffers != null) && (tmpBuf.length == DEFAULT_BUF_SIZE))
604                            { recycledBuffers.offer(tmpBuf); }
605                        }
606    
607                    // If the background thread got an error not already caught
608                    // then rethrow it here to stop abruptly.
609                    final IOException err = bgThreadErr.get();
610                    if(err != null) { throw err; }
611                    }
612                finally
613                    {
614                    // Clean up the async read-ahead thread if (still) running,
615                    // interrupt()ing it if need be to wake it up or unblock it.
616                    if(ENABLE_READAHEAD && (reader != null))
617                        {
618                        while(reader.isAlive())
619                            { reader.interrupt(); reader.join(CoreConsts.MAX_INTERACTIVE_DELAY_MS); }
620                        }
621                    }
622                os.flush(); // Done!
623                os.close(); // Try to catch any broken/aborted connection now.
624    
625                assert((reader == null) || (!reader.isAlive())); // No reader thread running now.
626    
627                // Note slow downloads to help us tune the system.
628                // We can only really hope to significantly speed up downloads
629                // of exhibits that were eligible for read-ahead.
630                if(mayNeedReadAhead)
631                    {
632                    // If the exhibit took significant time to download and
633                    // if we spent longer fetching the data than sending it
634                    // then we probably gave the user a slow download.
635                    final long wallclockTime = (System.currentTimeMillis() - startTime);
636                    if((totalReadTime > wallclockTime/4) &&
637                       (wallclockTime > WebConsts.MAX_PG_DOWNLOAD_MS))
638                        { servletContext.log("[ExhibitServlet: WARNING: slow download (fetch "+totalReadTime+"ms, send "+totalWriteTime+"ms, wallclock "+wallclockTime+"ms) for "+esa+", "+(length/(0.001f*Math.max(1,wallclockTime)))+"Bps.]"); }
639                    }
640    
641                // Rate/note only deliberate downloads by humans on our site(s),
642                // thus ignore:
643                //   * Hits that appear to be from spiders.
644                //   * Hits that appear to be direct hotlinks from external sites.
645                //   * Hits for any (small) image/exhibit potentially
646                //     being used as its own thumbnail.
647                //   * Hits for any current GenProps-selected background image.
648                // Note proximity of user also...
649                final CharSequence bgImage;
650                if(!isFromSpider &&
651                   (hotlinkHost == null) &&
652                   !ImageUtils.canBeOwnThumbnail(esa) &&
653                   !((null != (bgImage = gp.getWEBSVR_BG_IMAGE())) && TextUtils.contentEquals(esa.getCharSequence(), bgImage)))
654                    {
655                    try
656                        {
657                        final String shortNameAsString = esa.getExhibitFullName().getShortName().toString(); // FIXME: horribly inefficient to conv to/from String
658                                            // Record the short exhibit name as an event...
659                        final SimpleVariableValue svvDL = new SimpleVariableValue(
660                            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD,
661                            shortNameAsString);
662                        ds.setVariable(svvDL);
663                        final SimpleVariableValue svvDLl = new SimpleVariableValue(
664                            SystemVariables.ACCESSPATTERN_COMPLETED_DOWNLOAD_LOCAL,
665                            shortNameAsString);
666                        ds.setVariable(svvDLl);
667    
668                        final InetAddress clientIP =
669                            InetAddress.getByName(request.getRemoteAddr());
670    
671                        // Note the download by approximate geographical region...
672                        // Do so only AFTER the response completes to minimise latency,
673                        // and only for requests that did not abort with an exception.
674                        // We may be prepared to spend some time trying to look this up
675                        // dynamically, hoping that DNS cacheing will keep the cost down.
676                        final String regionByAddress = GeoUtils.getRegionByAddress(clientIP, false);
677                        final SimpleVariableValue svvLoc = new SimpleVariableValue(
678                            SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
679                            "download.loc=" + regionByAddress);
680                        ds.setVariable(svvLoc);
681    
682                        // Note the rough proximity of the client so as to monitor
683                        // how well geo-sensitive load balancing is doing.
684                        // We can only do this if we know our physical location
685                        // (eg if this is a mirror and thus has an embedded ccTLD).
686                        // In particular we note downloads from very remote clients.
687                        final String mirrorTag = LocalProps.getMirrorTag();
688                        if(mirrorTag != null)
689                            {
690                            final GeoUtils.CCTLD cc = new GeoUtils.CCTLD(mirrorTag.substring(0, 2));
691                            final GeoProximity proximity = GeoUtils.computeProximityByAddress(clientIP, cc, false);
692                            final SimpleVariableValue svvProx = new SimpleVariableValue(
693                                SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
694                                "download.prox=" + proximity);
695                            ds.setVariable(svvProx);
696    
697                            // Log the location of users with no proximity
698                            // since we might need to provide a server locally!
699                            if(proximity == GeoProximity.NONE)
700                                {
701                                final SimpleVariableValue svvNoProx = new SimpleVariableValue(
702                                    SystemVariables.GENSTATS_STRING_GLOBAL_EVENT,
703                                    "download.noprox=" + regionByAddress);
704                                ds.setVariable(svvNoProx);
705                                }
706                            }
707                        }
708                    catch(final Exception e) // Absorb (but briefly log) any errors...
709                        {
710                        servletContext.log("ExhibitServlet: caught Exception: " + e.getClass().getName() + ": " + e.getMessage());
711                        }
712                    }
713    
714                return; // Finished OK!
715                }
716            catch(final Throwable t) // Absorb (but briefly log) any error...
717                {
718                servletContext.log("ExhibitServlet: caught Throwable: " + t.getClass().getName() + ": " + t.getMessage());
719    if(IsDebug.isDebug) { t.printStackTrace(); }
720                }
721    
722            // Some problem handling the request...
723            if(!response.isCommitted()) // Don't bother with error if too late...
724                { response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); }
725            return;
726            }
727    
728        /**Can be inserted into the data queue by the async thread to force the foreground thread to quit. */
729        private static final byte[] QUEUE_POISON = new byte[0];
730    
731        /**Read the next block of data from the exhibit using the given buffer returning it as a byte[]; never null nor zero-length.
732         * Retries failed reads up to a time proportional to that already spent
733         * on the entire transfer (to reflect the cost of restarting).
734         *
735         * @param os  if non-null may be used to flush() data down to the user;
736         *     not safe to call from any thread other than the servlet thread
737         * @param byteBufs  source of recycled byte buffers that we can use rather than using new,
738         *     we freely discard anything we find that isn't suitable for our purposes; never null
739         */
740        private static byte[] readNextBlock(final long startTime,
741                                            final ByteBuffer buffer,
742                                            final ArrayBlockingQueue<byte[]> byteBufs,
743                                            final DataSourceBean ds,
744                                            final ExhibitStaticAttr esa,
745                                            final int start,
746                                            final ServletContext servletContext,
747                                            final ServletOutputStream os)
748            throws IOException, InterruptedException
749            {
750            assert(buffer != null);
751    
752            final long timeBeforeRead = System.currentTimeMillis();
753            final long retryUntil = timeBeforeRead + ((timeBeforeRead - startTime) / 4) +
754                                        (WebConsts.MAX_PG_DOWNLOAD_MS<<4) + 30000;
755    
756            // High-level recovery from temporary I/O errors.
757            for( ; ; )
758                {
759                try
760                    {
761                    buffer.clear();
762                    assert(buffer.remaining() > 0); // Must not be zero-sized buffer.
763                    // Get the data, and cache it locally if possible.
764                    ds.getRawFile(buffer, esa.getExhibitFullName(), start, false);
765                    buffer.flip();
766                    final int bytesRead = buffer.limit();
767                    if(bytesRead > 0)
768                        { break; /* OK, got some data! (Normal case.) */ }
769                    // Didn't get any data,
770                    // so fall through to sleep and retry.
771                    }
772                catch(final IOException e)
773                    {
774                    if(System.currentTimeMillis() > retryUntil)
775                        {
776                        servletContext.log("ERROR: ExhibitServlet: retry FAILED at offset "+start+" for "+esa+": " + e.getMessage() + ".");
777                        throw e; // Abort.
778                        }
779                    }
780    
781                // Flush what we already read/wrote down to the user if possible,
782                // thus aborting (SIGPIPE) if the user has given up and disconnected.
783                if(null != os) { os.flush(); }
784    
785                // Wait a little while before trying again,
786                // with some randomisation to help avoid collisions.
787                // Not so long (1s+) as to force the TMF
788                // into thinking that we are "busy".
789                Thread.sleep((GenUtils.mustConservePower()?503:203) + Rnd.fastRnd.nextInt(203));
790                servletContext.log("[ExhibitServlet: retrying read at offset "+start+" for "+esa+".]");
791                }
792    
793            // FIXME: for now, with the current servlet API, we have to copy data into a byte[].
794            final int bytesToWrite = buffer.limit();
795            assert(buffer.position() == 0);
796            assert(bytesToWrite > 0);
797    
798            // Attempt to avoid a heap allocation to avoid stressing GC if we can
799            // by reusing a recycled buffer,
800            // but if we don't immediately find something exactly right
801            // then just use new...
802            // Note that only default/max-sized buffers should be in the pool.
803            byte[] tmpBuf;
804            if((bytesToWrite != DEFAULT_BUF_SIZE) || (byteBufs == null) || ((tmpBuf = byteBufs.poll()) == null) /* || (tmpBuf.length != bytesToWrite) */)
805                { tmpBuf = new byte[bytesToWrite]; }
806            assert(tmpBuf.length == bytesToWrite);
807    
808            // Copy the data...
809            buffer.get(tmpBuf);
810            return(tmpBuf);
811            }
812    
813    
814        /**Get `last-modified' time for exhibit.
815         * If we can't find out, eg because the exhibit is not present,
816         * we return -1L, the default value.
817         * <p>
818         * If returning a last-modified value causes difficulty
819         * (eg with Tomcat 4.0.1) then we return -1.
820         *
821         * @param request The servlet request we are processing
822         */
823        @Override
824        public final long getLastModified(final HttpServletRequest request)
825            {
826    //        if(WebConsts.AVOID_LAST_MODIFIED) { return(-1L); }
827    
828            // Get the relative request path...
829            final String rawPathInfo = request.getPathInfo();
830    //System.err.println("rawPathInfo = " + rawPathInfo);
831            // Adjust for any leading slash.
832            final String pathInfo = ((rawPathInfo != null) && rawPathInfo.startsWith("/")) ?
833                (rawPathInfo.substring(1)) : rawPathInfo;
834    
835            try
836                {
837                final Name.ExhibitFull fullName;
838                try { fullName = Name.ExhibitFull.create(pathInfo); }
839                catch(final IllegalArgumentException e)
840                    {
841                    // Invalid name; return default 'not-known' value.
842                    return(-1L);
843                    }
844    
845                final ExhibitStaticAttr esa = getDataSource(getServletConfig(), request).
846                        getStaticAttr(fullName);
847    
848                // No such exhibit; return default 'not-known' value.
849                if(esa == null) { return(-1); }
850    
851                // Return the exhibit's timestamp...
852                return(esa.timestamp);
853                }
854            catch(final Exception e)
855                {
856                // In case of error return default `not-known' value.
857                return(-1L);
858                }
859            }
860    
861        /**Unique Serialisation class ID generated by http://random&#46;hd&#46;org/. */
862        private static final long serialVersionUID = -1774514190468389217L;
863        }