001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.webSvr.util;
030
031 import java.io.BufferedInputStream;
032 import java.io.ByteArrayOutputStream;
033 import java.io.IOException;
034 import java.io.InputStream;
035 import java.io.OutputStream;
036 import java.lang.ref.SoftReference;
037 import java.net.URLConnection;
038 import java.nio.ByteBuffer;
039 import java.util.SortedMap;
040 import java.util.zip.ZipEntry;
041 import java.util.zip.ZipInputStream;
042
043 import javax.servlet.ServletConfig;
044 import javax.servlet.http.HttpServlet;
045 import javax.servlet.http.HttpServletRequest;
046 import javax.servlet.http.HttpServletResponse;
047
048 import org.hd.d.pg2k.svrCore.AccessionData;
049 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
050 import org.hd.d.pg2k.svrCore.CoreConsts;
051 import org.hd.d.pg2k.svrCore.ExhibitPropsLoadable;
052 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
053 import org.hd.d.pg2k.svrCore.FileTools;
054 import org.hd.d.pg2k.svrCore.FileTools.ZE;
055 import org.hd.d.pg2k.svrCore.MemoryTools;
056 import org.hd.d.pg2k.svrCore.Name;
057 import org.hd.d.pg2k.svrCore.Tuple;
058 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
059 import org.hd.d.pg2k.svrCore.collections.LRUMapAutoSizeForHitRate;
060 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
061 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean.AEPLinkedKey;
062
063 import ORG.hd.d.IsDebug;
064
065 /**This is the servlet that serves the content of the (latest) Gallery javadoc bundle.
066 * Essentially this does what the default WAR container support would do
067 * if the javadoc files were simply laid out in WAR as static data,
068 * though giving us the option to tune cacheing and similar behaviour.
069 * <p>
070 * We only respond to GET (and possibly HEAD) requests.
071 */
072 public final class JavadocBundleServlet extends HttpServlet
073 {
074 /**Cache key for most frequently-fetched (small) files; never null.
075 * Linked to the AEP instance to be automatically discarded when the exhibit set changes.
076 */
077 private static final DataSourceBean.AEPLinkedKey fffKey = new AEPLinkedKey("fffKey");
078
079 /**Maximum uncompressed size in bytes of a file to be considered for the fff cache; strictly positive. */
080 private static final int MAX_FFF_BYTES = 1 << 18; // 256kB.
081
082 /**Maximum compressed size in bytes of a file to be considered for the fff cache; strictly positive. */
083 private static final int MAX_FFF_BYTES_COMPRESSED = 1 << 13; // 8kB.
084
085 /**Minimum number of entries ('working set') to try to hold in FFF cache; strictly positive.
086 * Much smaller than MAX_FFF_ENTRIES, even as small as (say) 10.
087 */
088 private static final int MIN_FFF_ENTRIES = 16;
089
090 /**Maximum number of entries to hold in FFF cache; strictly positive.
091 * Much larger than MIN_FFF_ENTRIES.
092 */
093 private static final int MAX_FFF_ENTRIES = 128;
094
095 /**Respond to a GET or HEAD request for the content served by this servlet.
096 *
097 * @param request The servlet request we are processing
098 * @param response The servlet response we are producing
099 *
100 * @exception IOException if an input/output error occurs
101 */
102 private void doAction(final HttpServletRequest request,
103 final HttpServletResponse response,
104 final boolean isHEAD)
105 throws IOException //, ServletException
106 {
107 final DataSourceBean dsb = getDataSource(getServletConfig(), request);
108 final Name.ExhibitFull bundleExhibitName = getBundleExhibitName(dsb);
109 if(null == bundleExhibitName)
110 { response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); return; /* Absence should be only transient. */ }
111
112 // Get the relative request path...
113 final String rawPathInfo = request.getPathInfo();
114 //System.err.println("*** javadoc pathInfo = "+rawPathInfo);
115 // Reject any trailing slash (eg directory) or empty/null request.
116 if((rawPathInfo == null) || rawPathInfo.endsWith("/"))
117 {
118 // However, if request is for the root itself, then redirect to the index.html page.
119 if("/".equals(rawPathInfo))
120 { response.sendRedirect("index.html"); return; }
121 // Other unwanted random directory request...
122 response.setStatus(HttpServletResponse.SC_FORBIDDEN); return; /* Bad request... */
123 }
124 // Adjust for any leading slash.
125 final String pathInfo = ((rawPathInfo != null) && rawPathInfo.startsWith("/")) ?
126 (rawPathInfo.substring(1)) : rawPathInfo;
127
128 // Barf with 'not found' if we can't find the file in the bundle...
129 final SortedMap<CharSequence, ZE> dir = getZIPEntryOffsets(dsb);
130 if(null == dir)
131 {
132 dsb.log("ERROR: unavailable or corrupt javadoc bundle (I/O timeout or missing ZIP dir?)");
133 response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); // Absence should be only transient.
134 return;
135 }
136 final ZE entryInfo = dir.get(pathInfo);
137 if(null == entryInfo)
138 {
139 if(IsDebug.isDebug) { dsb.log("WARNING: not found in javadoc bundle: " + pathInfo); }
140 response.setStatus(HttpServletResponse.SC_NOT_FOUND);
141 return;
142 }
143 final int eLength = entryInfo.length;
144 final int eOffset = entryInfo.offset;
145
146 final org.hd.d.pg2k.svrCore.props.GenProps gp = dsb.getGenProps(-1);
147 final AllExhibitProperties aep = dsb.getAllExhibitProperties(-1);
148
149 // This input stream is assumed to be a light-weight wrapper
150 // that does not hold any scarce resources such as file descriptors.
151 // NOTE: if we found the content in our LRU cache then we don't create the wrapper stream.
152 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(bundleExhibitName);
153 if(null == esa)
154 {
155 dsb.log("ERROR: missing esa for "+bundleExhibitName);
156 response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); // Absence should be only transient.
157 return;
158 }
159
160 // Use the bundle timestamp rather than the individual entry's.
161 // For consistency we'll use getLastModified(request).
162 // Must behave appropriately if this returns -1.
163 final long timestamp = esa.timestamp;
164
165 // Look for file in our cache in case it is a frequently-requested but small one,
166 // such as the top index.html or stylesheet.
167 // We hold a thread-safe LRU cache of (compressed) small files and their ZIP descriptors
168 // as if we had just fetched them from the ZIP stream...
169 // We always keep the data in (zlib) compressed form as a raw byte[]
170 // as that is simple and does save space overall for this application.
171 // We limit the cache side to avoid contributing to memory shortage.
172 // We hold the entire cache via a SoftReference to allow automatic discard if memory runs low.
173 //
174 // The cache is parameterised/sized to be able to hold at least all of the files
175 // downloaded by a browser on visiting the opening/index page,
176 // as of 20080609 with plenty of room to spare (ie accommodating reasonable growth).
177 //
178 // Both the cache and the reference are guaranteed non-null after the loop.
179 SoftReference<MemoryTools.CacheMiniMap<String, Tuple.Pair<ZipEntry, byte[]>>> fffCacheSR;
180 MemoryTools.CacheMiniMap<String, Tuple.Pair<ZipEntry, byte[]>> fffCache;
181 // (Re)create cache if null or SoftReference has been cleared.
182 while((null == (fffCacheSR = (SoftReference<MemoryTools.CacheMiniMap<String, Tuple.Pair<ZipEntry, byte[]>>>)dsb.getAEPLinkedValue(fffKey))) ||
183 (null == (fffCache = fffCacheSR.get())))
184 {
185 final SoftReference<MemoryTools.CacheMiniMap<String, Tuple.Pair<ZipEntry, byte[]>>> newCacheSR =
186 new SoftReference<MemoryTools.CacheMiniMap<String,Tuple.Pair<ZipEntry,byte[]>>>(
187 fffCache = LRUMapAutoSizeForHitRate.<String,Tuple.Pair<ZipEntry,byte[]>>create(MIN_FFF_ENTRIES, MAX_FFF_ENTRIES, fffKey.comment));
188 // Replace atomically avoiding race problems.
189 if(null == fffCacheSR) { dsb.putIfAbsentAEPLinkedValue(fffKey, newCacheSR); }
190 else { dsb.replaceAEPLinkedValue(fffKey, fffCacheSR, newCacheSR); }
191 }
192 // See if we have something in cache for this request.
193 assert((fffCacheSR != null) && (fffCache != null));
194 final Tuple.Pair<ZipEntry,byte[]> cachedValue = fffCache.get(pathInfo);
195 final boolean haveCachedEntry = (null != cachedValue);
196 //if(haveCachedEntry) { System.out.println("FOUND IN CACHE "+pathInfo+" "+cachedValue.second.length+" bytes."); }
197
198 final InputStream is = haveCachedEntry ? null : FileTools.wrapExhibitAsStream(dsb).getInputStream(esa);
199 // Attempt to skip/seek to the start of the entry we want.
200 // We hope that this is in fact an efficient constant-time seek.
201 if(!haveCachedEntry) { is.skip(eOffset); }
202 // Buffer input to ZIP reader to aggregate small reads
203 // (which might, if the ZIP file is uncached, go as inefficient separate HTTP requests upstream)
204 // into larger single requests that should comfortably fit in a single TCP packet,
205 // thus minimising the cost/waste of any subsequent over-length read.
206 // Being uncached is quite likely due to random-access into this ZIP archive.
207 // NOTE: if we found the content in our LRU cache then we don't create the ZIP stream.
208 final ZipInputStream zis = haveCachedEntry ? null : new ZipInputStream(new BufferedInputStream(is, 1024));
209 try
210 {
211 final ZipEntry ze = haveCachedEntry ? cachedValue.first : zis.getNextEntry();
212 if((null == ze) || !pathInfo.equals(ze.getName())) // Shouldn't happen...
213 {
214 getServletContext().log("ERROR: could not find ZIP entry");
215 response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
216 return;
217 }
218
219 // Very similar HTTP header set-up to exhibits,
220 // since this data comes from an exhibit.
221 // If we know the exact length then we use it for efficiency and robustness, else -1.
222 final long llength = (eLength >= 0) ? eLength : ze.getSize(); // -1 if not known...
223 assert(llength >= -1);
224 assert(llength <= Integer.MAX_VALUE);
225 final int length = (int) llength;
226
227 // Select the content type as if the entry was an exhibit,
228 // else try a fallback.
229 String type = null;
230 try { type = ExhibitMIME.getMIMEType(pathInfo); }
231 catch(final Exception e)
232 {
233 // Try to guess...
234 type = URLConnection.guessContentTypeFromName(pathInfo);
235 }
236
237
238 // Set some cacheing headers.
239 // Make the cache time usually a significant multiple of
240 // the interval between rechecks of exhibit immutable data
241 // as this is expected to change relatively slowly.
242 // Increase it if we are busy/etc so as to reduce future server load.
243 // Actually extend to a reasonable fraction of the javadoc bundle age
244 // capped to the maximum allowed for static content.
245 final long cacheLifetime = WebUtils.computeCacheMaxAgeMSFromTimestamp(timestamp, getServletContext(), gp);
246
247 // Set the HTTP/1.1 cache-control header to reflect
248 // the maximum time that any proxy/end-user should cache the exhibit.
249 response.setHeader("Cache-Control", "public,max-age="+(cacheLifetime/1000));
250
251 // If this might be an HTML request from a search engine
252 // then we set a long expiry so that search engines will
253 // index and retain the page; of the order of weeks or even months.
254 // This *may* force HTTP/1.0-browser users to hit RELOAD
255 // more often than we'd like...
256 final boolean isFromSpider = WebUtils.requestProbablyFromSpider(request);
257 final long expiryTime = isFromSpider
258 ? Math.max(cacheLifetime, WebConsts.SPIDER_PAGE_EXPIRY_MS)
259 : cacheLifetime;
260 response.setDateHeader("Expires", System.currentTimeMillis() + expiryTime);
261
262 // Create (strong) ETag header from MD5hash if available, else no ETag.
263 final ExhibitPropsLoadable epl = aep.getExhibitPropsLoadable(esa.getExhibitFullName());
264 final AccessionData ad = epl.getAccessionMetadata();
265 final String ETag = ((ad != null) && (ad.hashMD5 != null)) ?
266 ("\"" + ad.hashMD5.toHexString() + "\"") : null;
267 response.setHeader("ETag", ETag);
268
269 // Set Last-Modified from known-good timestamp
270 response.setDateHeader("Last-Modified", timestamp);
271
272
273 // Handle If-Modified-Since / If-None-Match before setting most headers
274 // other than cache-related ones for broken clients.
275 if(WebUtils.abortIfETagMatchOrNotModifiedSince(ETag, timestamp, request, response))
276 { return; }
277
278
279 // Set the main type and length headers for browsers.
280 if(length != -1) { response.setContentLength(length); }
281 if(type != null) { response.setContentType(type); }
282
283
284 // If this is a HEAD request then return without providing the body.
285 if(isHEAD) { return; }
286
287
288 // Send the data downstream...
289 final OutputStream os = response.getOutputStream();
290 if(haveCachedEntry)
291 {
292 // Write out data from our cached entry in one chunk for efficiency.
293 os.write(FileTools.decompressDeflatedData(cachedValue.second));
294 }
295 else
296 {
297 // Play out data from the ZIP stream.
298 // We want to capture this in our fff cache if small enough.
299
300 // Set true when we decide that we won't fff cache this file.
301 boolean tooBigToCache = (length > MAX_FFF_BYTES);
302 // Buffer for fffCache data (we stop recording and null this out when too big).
303 ByteArrayOutputStream baos = tooBigToCache ? null : new ByteArrayOutputStream();
304
305 // Buffer for transferring data...
306 final byte[] buf = new byte[CoreConsts.BULK_DATA_TRANSFER_SIZE];
307 // Do the transfer...
308 for( ; ; )
309 {
310 final int n = zis.read(buf);
311 if(n == -1) { break; }
312 os.write(buf, 0, n);
313
314 // If still not excluded from being fff cache material then capture these bytes too.
315 // If it is currently exactly the right size then we must capture the excess
316 // in order to know that it is too big to cache...
317 if(!tooBigToCache && (baos.size() <= MAX_FFF_BYTES))
318 {
319 if(baos.size() + n <= MAX_FFF_BYTES) { baos.write(buf, 0, n); }
320 else
321 {
322 // If this would make the file too big to cache,
323 // stop collecting immediately and discard what we have already.
324 tooBigToCache = true;
325 baos = null;
326 }
327 }
328 }
329
330 // Cache this entry (compressed) if it meets our criteria...
331 if(!tooBigToCache && (baos != null))
332 {
333 final byte[] compressedData = FileTools.compressDeflatableData(baos.toByteArray());
334 // The compressed form must be smaller than its (smaller) threshold to be cached.
335 if(compressedData.length <= MAX_FFF_BYTES_COMPRESSED)
336 {
337 // Cache it!
338 //System.out.println("CACHED "+pathInfo+" "+baos.size()+" bytes.");
339 ze.setSize(baos.size()); // Store this for next time around.
340 fffCache.put(pathInfo, new Tuple.Pair<ZipEntry,byte[]>(ze, compressedData));
341 }
342 }
343 }
344 }
345 finally { if(zis != null) { zis.close(); /* Free resources. */ } }
346 }
347
348 /**Respond to a GET request for the content served by this servlet.
349 *
350 * @param request The servlet request we are processing
351 * @param response The servlet response we are producing
352 *
353 * @exception IOException if an input/output error occurs
354 */
355 @Override
356 public void doGet(final HttpServletRequest request,
357 final HttpServletResponse response)
358 throws IOException //, ServletException
359 {
360 doAction(request, response, false);
361 }
362
363 /**Respond to a HEAD request for the content served by this servlet.
364 *
365 * @param request The servlet request we are processing
366 * @param response The servlet response we are producing
367 *
368 * @exception IOException if an input/output error occurs
369 */
370 @Override
371 public void doHead(final HttpServletRequest request,
372 final HttpServletResponse response)
373 throws IOException // , ServletException
374 {
375 doAction(request, response, true);
376 }
377
378 /**Get `last-modified' time for the entire bundle.
379 * If we can't find out then we return -1L, the default value.
380 * <p>
381 * If returning a last-modified value causes difficulty
382 * (eg with Tomcat 4.0.1) then we return -1.
383 *
384 * @param request The servlet request we are processing
385 */
386 @Override
387 public final long getLastModified(final HttpServletRequest request)
388 {
389 // if(WebConsts.AVOID_LAST_MODIFIED) { return(-1L); }
390
391 final DataSourceBean dsb = getDataSource(getServletConfig(), request);
392 final Name.ExhibitFull bundleExhibitName = getBundleExhibitName(dsb);
393 try
394 {
395 if(null != bundleExhibitName)
396 {
397 final ExhibitStaticAttr esa = dsb.getStaticAttr(bundleExhibitName);
398 return(esa.timestamp);
399 }
400 }
401 catch(final Exception e) { /* Fall through in case of error... */ }
402
403 return(-1); // Don't know.
404 }
405
406 /**Get singleton (per-servlet-context) data pipeline/cache instance.
407 * The config param must not be null, but for some operations
408 * (such as calling destroy()) request can be null.
409 */
410 private static DataSourceBean getDataSource(
411 final ServletConfig config,
412 final HttpServletRequest request)
413 {
414 // Fetches/creates the data source...
415 final DataSourceBean dataSource =
416 DataSourceBean.getApplicationInstance(config.getServletContext());
417
418 return(dataSource);
419 }
420
421 /**Get full exhibit name of the latest javadoc bundle; null if none. */
422 private static final Name.ExhibitFull getBundleExhibitName(final DataSourceBean dsb)
423 { return(WebUtils.findLatestCodeBundle(dsb, WebConsts.PREFIX_JAVADOC_BUNDLE)); }
424
425 /**Private key for cache of ZIP directory against current AEP instance; never null. */
426 private static final AEPLinkedKey dirKey = new AEPLinkedKey("dirKey");
427
428 /**Get ZIP directory/offsets; null if none.
429 * The keys of the returned (immutable) map are valid files/entries in the ZIP.
430 * <p>
431 * Each value is the (non-negative) offset from the start of the ZIP file
432 * to the start of its entry as can be read with ZipInputStream.
433 * <p>
434 * This is cached against the AEP instance for efficiency.
435 * (We might have to go back across the network to the master to fetch this
436 * if we don't have the entire bundle cached locally, for example, which is slow.)
437 * <p>
438 * On successfully cacheing this we read the first byte of the archive
439 * to try to trigger (pre)cacheing of at least an enclosing early chunk.
440 */
441 private static SortedMap<CharSequence, ZE> getZIPEntryOffsets(final DataSourceBean dsb)
442 {
443 // Try for this from cache...
444 SortedMap<CharSequence, ZE> result = (SortedMap<CharSequence, ZE>) dsb.getAEPLinkedValue(dirKey);
445 if(result != null) { return(result); }
446
447 final Name.ExhibitFull exhibitName = getBundleExhibitName(dsb);
448 if(exhibitName == null) { return(null); }
449 try { result = FileTools.getZIPEntriesLengthAndOffset(FileTools.wrapExhibitAsRandomAccessData(dsb, exhibitName)); }
450 catch(final Exception e) { dsb.log(e.getMessage()); return(null); }
451
452 // Save (positive) result in cache.
453 if(result != null)
454 {
455 if(null == dsb.putIfAbsentAEPLinkedValue(dirKey, result))
456 {
457 // We just won the race to cache the ZIP directory
458 // so now try to also force-load the ZIP start with a minimal initial read.
459 try { dsb.getRawFile(ByteBuffer.allocate(1), exhibitName, 0, false); }
460 catch(final IOException e) { dsb.log("Precache attempt on javadoc ZIP threw exception: "+e.getMessage()); }
461 }
462 }
463
464 return(result);
465 }
466
467 /**Unique serialisation ID. */
468 private static final long serialVersionUID = 8287758876065863228L;
469 }