001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    package org.hd.d.pg2k.webSvr.exhibit;
030    
031    import java.io.OutputStream;
032    
033    import javax.servlet.ServletConfig;
034    import javax.servlet.http.HttpServlet;
035    import javax.servlet.http.HttpServletRequest;
036    import javax.servlet.http.HttpServletResponse;
037    
038    import org.hd.d.pg2k.svrCore.ExhibitThumbnails;
039    import org.hd.d.pg2k.svrCore.Name;
040    import org.hd.d.pg2k.svrCore.TextUtils;
041    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
042    import org.hd.d.pg2k.svrCore.props.LocalProps;
043    import org.hd.d.pg2k.webSvr.util.WebConsts;
044    import org.hd.d.pg2k.webSvr.util.WebUtils;
045    
046    
047    /**This is the servlet that serves thumbnail data.
048     * This is optimised to serve medium quantities of binary data
049     * efficiently, and to get headers right to optimise performance
050     * of caches, spiders, etc, while not losing site of usage logging.
051     */
052    public final class ThumbnailServlet extends HttpServlet
053        {
054        /**Get singleton (per-servlet-context) data pipeline/cache instance.
055         * The config param must not be null, but for some operations
056         * (such as calling destroy()) request can be null.
057         */
058        private static DataSourceBean getDataSource(
059                final ServletConfig config,
060                final HttpServletRequest request)
061            {
062            // Fetches/creates the data source...
063            final DataSourceBean dataSource =
064                DataSourceBean.getApplicationInstance(config.getServletContext());
065    
066            // Ensure that the essential details are set up.
067            dataSource.setServletContext(config.getServletContext());
068            if(request != null)
069                { dataSource.setContextPath(request.getContextPath()); }
070    
071            return(dataSource);
072            }
073    
074        /**Respond to a GET request for the content served by this servlet.
075         *
076         * @param request The servlet request we are processing
077         * @param response The servlet response we are producing
078         *
079         */
080        @Override
081        public void doGet(final HttpServletRequest request,
082                          final HttpServletResponse response)
083    //        throws IOException, ServletException
084            {
085            doAction(request, response);
086            }
087    
088        /**Respond to a HEAD request for the content served by this servlet.
089         *
090         * @param request The servlet request we are processing
091         * @param response The servlet response we are producing
092         *
093         */
094        @Override
095        public void doHead(final HttpServletRequest request,
096                           final HttpServletResponse response)
097    //        throws IOException, ServletException
098            {
099            doAction(request, response);
100            }
101    
102        /**If true then we may generate/send the MD5 content hash for some/all thumbnails. */
103        private static final boolean SEND_TN_MD5 = true;
104    
105        /**Respond to a GET/HEAD request for the content served by this servlet.
106         * Returns the exhibit data, with a correct MIME type.
107         * <p>
108         * The request path is of the form /XXX/exhibitname where XXX
109         * identifies the required thumbnail/sample size (small or standard).
110         * <p>
111         * If an exhibit is requested by a syntactically-invalid name,
112         * we produce an HTTP `forbidden' (403) response.
113         * <p>
114         * If we request an exhibit with a valid name, but that does not
115         * exist, return return a `not-found' (404) response (unless
116         * there are no exhibits available, perhaps because of a database
117         * problem, in which case we may return `service unavailable' in the
118         * hope that that will prevent search engines from purging our
119         * exhibits if they happen to visit when we are reloading the
120         * database).
121         * <p>
122         * We refuse to deal with some dubious requests
123         * (eg apparently hotlinks directly to our exhibits from external sites)
124         * if we are too busy,
125         * so as to conserve resources for bona fide users.
126         * Dubious requests are rejected with a temporary failure (5xx) code.
127         * <p>
128         * Note: we use/honour <code>Last-Modified</code> / <code>If-Modified-Since</code>
129         * but <em>not</em> <code>ETag</code> / <code>If-None-Match</code> for the following reason...
130         * The thumbnail MD5 could also be used as a strong ETag,
131         * though note the potential problem if multiple mirrors
132         * generate their own copies of a thumbnail,
133         * all good but not byte-for-byte identical.
134         * A client connecting to different instances behind one URL
135         * will be forced to ping-pong between versions
136         * unless (say) the *exhibit* MD5 is used as a *weak* ETag,
137         * thus probably just keeping using Last-Modified is best
138         * so the client just fetches and retains the newest encountered
139         * (which also allows for bad thumbnails to be flushed with newer ones).
140         * Avoiding ETag avoids confusion, and bandwidth/overhead,
141         * especially for small thumbnails.
142         * <p>
143         * This <em>ignores</em> range requests for simplicity/safety
144         * since these entities are relatively small
145         * and should simply be cached whole, more-or-less indefinitely.
146         *
147         * @param request The servlet request we are processing
148         * @param response The servlet response we are producing
149         *
150         */
151        public void doAction(final HttpServletRequest request,
152                             final HttpServletResponse response)
153            {
154            // Get the relative request path...
155            final String rawPathInfo = request.getPathInfo();
156            // Adjust for any leading slash.
157            final String fullPathInfo = (rawPathInfo.startsWith("/")) ?
158                (rawPathInfo.substring(1)) : rawPathInfo;
159    
160            final int firstSlash = fullPathInfo.indexOf('/');
161            if(firstSlash == -1) // Whoops, syntax error...
162                {
163                response.setStatus(HttpServletResponse.SC_FORBIDDEN);
164                return;
165                }
166    
167            // Extract the putative exhibit name.
168            final String exhibitNameRaw = fullPathInfo.substring(firstSlash + 1);
169    
170            // Is the request for standard or small?
171            // Default to small to save bandwidth in case of mistakes!
172            final boolean smallTn = !fullPathInfo.substring(0, firstSlash).equals(WebConsts.PATH_TN_STD);
173    
174            // Attempt to get all the thumbnails for the underlying exhibit,
175            // which also implies that it exists!
176            // If anything fails, report `not found'.
177            try
178                {
179                final Name.ExhibitFull exhibitName;
180                try { exhibitName = Name.ExhibitFull.create(exhibitNameRaw); }
181                catch(final IllegalArgumentException e)
182                    {
183                    // Reject invalid exhibit name with a 404 / NOT FOUND.
184                    response.setStatus(HttpServletResponse.SC_NOT_FOUND);
185                    return;
186                    }
187    
188                final DataSourceBean ds = getDataSource(getServletConfig(), request);
189    
190                // Veto dubious requests with a temporary fail if we are too busy.
191                // Likewise if bandwidth is scarce/expensive for this server.
192                final String hotlinkHost = WebUtils.requestProbablyReferredFromExternalSite(request);
193                if((hotlinkHost != null) &&
194                   (LocalProps.isCloudMirrorInstance() || // Really don't want to pay for hotlink bandwidth.
195                       WebUtils.isOverloaded(getServletContext()) ||
196                       (LocalProps.getServerSlowdownFactor() > 1) ||
197                       ServletUtils.noteAndOrBlockHotlinker(getServletContext(), request, hotlinkHost, ds, ds.getGenProps(-1))))
198                    {
199                    if(!response.isCommitted()) // Don't bother with error if too late...
200                        { response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); }
201    //System.err.println("WARNING: rejecting external thumbnail referral from `"+hotlinkHost+"' for: " +pathInfo);
202                    return;
203                    }
204    
205    
206                // Now try and retrieve the specified thumbnail(s);
207                // trying to create them on the fly if necessary.
208                final ExhibitThumbnails tns = ds.getThumbnails(exhibitName, true);
209                // No thumbnails at all and never will be for this exhibit?
210                // Stop immediately with a 404 Not Found.
211                if(ExhibitThumbnails.NO_THUMBNAILS.equals(tns))
212                    { response.setStatus(HttpServletResponse.SC_NOT_FOUND); return; }
213                // Might be possible to get/return them in the future...
214                // So just note that the thumbnails are *currently* unavailable,
215                // ie this may be a temporary problem worth a retry later.
216                if(tns == null)
217                    { response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); return; }
218    
219                final ExhibitThumbnails.Thumbnail tn = smallTn ?
220                    tns.getSmall() : tns.getStandard();
221                // No thumbnail of requested size?
222                // Stop immediately with a 404 Not Found.
223                if(tn == null)
224                    { response.setStatus(HttpServletResponse.SC_NOT_FOUND); return; }
225    
226                // Set some cacheing headers too.
227                // We let thumbnails stay in proxy caches and the user's browser cache for a LONG time,
228                // as long as static items of Web-site furniture in fact,
229                // because we assume that they almost never change once generated.
230                // This should help improve system performance and reduce bandwidth requirements.
231                final long cacheLifetime = WebConsts.DEFAULT_STATIC_WEBITEMS_CACHE_MS;
232                // Stop proxies messing with the encoding/quality of the large thumbnail
233                // since that may be the best version that some devices (eg mobiles) get to see.
234                // We allow proxies/etc much more leeway with small thumbnails.
235                response.setHeader("Cache-Control", "public,max-age="+(cacheLifetime/1000) +
236                        (smallTn ? "" : ",no-transform"));
237                response.setDateHeader("Expires", System.currentTimeMillis() + cacheLifetime);
238    
239                // Set the last-modified date if available.
240                final long lastModified = getLastModified(request);
241                if(lastModified != -1)
242                    { response.setDateHeader("Last-Modified", lastModified); }
243    
244    
245                // Deal with If-Modified-Since here,
246                // BEFORE any other headers have been generated for HEAD and GET
247                // OTHER THAN cache control which should be resent for broken clients.
248                // (and before sending any body).
249                if(WebUtils.abortIfNotModifiedSince(lastModified, request, response))
250                    { return; }
251    
252    
253                final int length = tn.size();
254                final String type = ExhibitMIME.getMIMEType(exhibitName);
255    
256                // If we get this far, everything is probably OK.
257                response.setContentLength(length);
258                response.setContentType(type);
259    
260                // We send the MD5 content hash header for a standard thumbnail
261                // mainly to try to deter transparent proxies from transcoding
262                // to "optimise" the image which may unacceptably degrade it;
263                // some end users will use a standard thumbnail as-is.
264                // However, we only attempt this for standard (larger) thumbnails,
265                // and we let the small ones, of which more are transmitted, get munged.
266                // We also don't want to add even the bandwidth overhead
267                // of an extra HTTP header for small thumbnails.
268                //
269                // The thumbnail MD5 could also be used as an ETag,
270                // but it adds overhead and may cause confusion and performance loss
271                // with multiple mirrors behind one URL with distinct locally-generated thumbnails.
272                if(SEND_TN_MD5 && !smallTn)
273                    {
274                    final String base64Hash = TextUtils.encode8To6(tn.getMD5Hash());
275                    response.setHeader("Content-MD5", base64Hash);
276                    }
277    
278                // If this is a HEAD request then return without providing the body.
279                if("HEAD".equalsIgnoreCase(request.getMethod()))
280                    { return; }
281    
282                // Generate the GET body (ie send the raw thumbnail data).
283                // Write the thumbnail in one go to the output stream...
284                final OutputStream os = response.getOutputStream();
285                tn.writeData(os);
286                os.flush(); // Done!
287                os.close(); // Try to catch any broken/aborted connection now.
288                return; // Completed successfully.
289                }
290            catch(final Exception e)
291                {
292                getServletContext().log("unexpected exception serving thumbnail", e);
293                // Fall through to handle unexpected exception.
294                }
295    
296            // Failure while handling the result;
297            // ask the user to try again if possible...
298            if(!response.isCommitted())
299                { response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); }
300            }
301    
302    
303        /**Get `last-modified' time for a thumbnail, or -1 if not available.
304         * If we can find any thumbnails for the exhibit implied,
305         * and those thumbnails have a creation timestamp (older ones may not)
306         * then we return that as our last-modified date.
307         * <p>
308         * In case of any difficulty we return the -1 "not-known" value.
309         *
310         * @param request The servlet request being processed
311         */
312        @Override
313        public final long getLastModified(final HttpServletRequest request)
314            {
315            // Get the relative request path...
316            final String rawPathInfo = request.getPathInfo();
317            // Adjust for any leading slash.
318            final String fullPathInfo = (rawPathInfo.startsWith("/")) ?
319                (rawPathInfo.substring(1)) : rawPathInfo;
320    
321            final int firstSlash = fullPathInfo.indexOf('/');
322            if(firstSlash == -1) // Whoops, syntax error...
323                { return(-1); }
324    
325            // Extract the putative exhibit name.
326            final String exhibitNameRaw = fullPathInfo.substring(firstSlash + 1);
327    
328            // Attempt to get the thumbnails for the underlying exhibit.
329            // If anything fails, return -1 to indicate no last-modified-date available.
330            try
331                {
332                // If this name is invalid an exception will be thrown (and caught below).
333                final Name.ExhibitFull exhibitName = Name.ExhibitFull.create(exhibitNameRaw);
334    
335                final DataSourceBean ds = getDataSource(getServletConfig(), request);
336    
337                // Now try and retrieve the specified thumbnail(s) but don't create them.
338                final ExhibitThumbnails tns = ds.getThumbnails(exhibitName, false);
339    
340                // If no tns available, stop now.
341                if(tns == null)
342                    { return(-1); }
343    
344                // If this tns instance has no creation date
345                // (eg because we have an old-style thumbnails object cached)
346                // then use the AEID exhibit-set timestamp as a plausible proxy.
347                if(tns.created <= 0)
348                    { return(ds.getAllExhibitImmutableData(-1).timestamp); }
349    
350                // We have the thumbnails' creation date, so return it!
351                return(tns.created);
352                }
353            catch(final Exception e)
354                { return(-1); }
355            }
356    
357        /**Unique Serialisation class ID generated by http://random&#46;hd&#46;org/. */
358        private static final long serialVersionUID = -2497705561191352995L;
359        }