001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029 package org.hd.d.pg2k.webSvr.catalogue;
030
031 import java.io.IOException;
032 import java.io.UnsupportedEncodingException;
033 import java.net.URLEncoder;
034 import java.util.ArrayList;
035 import java.util.Arrays;
036 import java.util.Collections;
037 import java.util.HashMap;
038 import java.util.Iterator;
039 import java.util.List;
040 import java.util.Map;
041 import java.util.SortedMap;
042 import java.util.SortedSet;
043 import java.util.TreeMap;
044
045 import org.hd.d.pg2k.svrCore.AllExhibitImmutableData;
046 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
047 import org.hd.d.pg2k.svrCore.CoreConsts;
048 import org.hd.d.pg2k.svrCore.ExhibitAttrUtils;
049 import org.hd.d.pg2k.svrCore.ExhibitName;
050 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
051 import org.hd.d.pg2k.svrCore.LocaleBeanBase;
052 import org.hd.d.pg2k.svrCore.Name;
053 import org.hd.d.pg2k.svrCore.Name.ExhibitFull;
054 import org.hd.d.pg2k.svrCore.TextUtils;
055 import org.hd.d.pg2k.svrCore.uploader.ExhibitHandlerBeanBase;
056 import org.hd.d.pg2k.svrCore.uploader.UploaderUtils;
057 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
058 import org.hd.d.pg2k.webSvr.util.WebConsts;
059
060 import ORG.hd.d.jIndexer.server.JIndexBean;
061
062 /**This is the JavaBean that backs the (HTML) catalogue search page.
063 * This should be held at page scope so that its state is
064 * set from fresh on each page load.
065 * <p>
066 * This attempts to sanitise values as they are set.
067 * <p>
068 * Most methods are synchronized to provide thread-safety.
069 */
070 public final class SearchPageJavaBean extends ExhibitHandlerBeanBase
071 {
072 /**The sanitised simple text query, or "" if none. */
073 private String q = "";
074
075 /**Sets the simple query, or "" if none.
076 * This truncates and canonicalises and sanitises
077 * the query text as necessary.
078 */
079 public synchronized void setQ(final String rawQueryText)
080 { q = JIndexBean.canonicaliseSimpleByWordQuery(rawQueryText, WebConsts.MAX_SIMPLE_QUERY_LEN); }
081
082 /**Get the sanitised query text, or "" if none (never null, never longer than WebConsts.MAX_SIMPLE_QUERY_LEN). */
083 public synchronized String getQ() { return(q); }
084
085
086 /**How long in milliseconds we consider a day to be. */
087 public static final int DAY_MS = 24 * 3600 * 1000;
088
089 /**The number of recent days to filter for; 0 means no filter. */
090 private int recentDaysFilter = 0;
091
092 /**Map from symbolic filter period to number of days (String to Integer); immutable.
093 * Keys in here can also be suffixed to common i18n resource "common.cat.filtersince.recent."
094 * to get descriptive text.
095 */
096 public static final Map<String,Integer> symFilterDays;
097 /**Sorted map from number of days back to symbolic name if any (Integer to String). */
098 public static final SortedMap<Integer,String> daysFilterSym;
099 /**Initialise symFilterDays and daysFilterSym. */
100 static
101 {
102 final Map<String,Integer> m = new HashMap<String, Integer>(11);
103 m.put("d", Integer.valueOf(1));
104 m.put("w", Integer.valueOf(7));
105 m.put("m", Integer.valueOf(30));
106 m.put("q", Integer.valueOf(90));
107 m.put("y", Integer.valueOf(365));
108 symFilterDays = Collections.unmodifiableMap(m);
109
110 // Reverse mapping of m.
111 final SortedMap<Integer,String> r = new TreeMap<Integer, String>();
112 for(final Iterator<String> it = m.keySet().iterator(); it.hasNext(); )
113 {
114 final String s = it.next();
115 r.put(m.get(s), s);
116 }
117 daysFilterSym = Collections.unmodifiableSortedMap(r);
118 }
119
120 /**Set the recent period (to today) for which exhibits are wanted.
121 * The parameter can either be one of the single letter keys in symFilterDays,
122 * or null or "" or "-" (SETTER_ALL) or "0" for no filter,
123 * or a positive Integer number of days.
124 */
125 public synchronized void setRecentDaysFilter(final String sRecentDaysFilter)
126 {
127 // Look for all cases meaning "no filter".
128 if((sRecentDaysFilter == null) ||
129 (sRecentDaysFilter.length() == 0) ||
130 sRecentDaysFilter.equals(ExhibitHandlerBeanBase.SETTER_ALL) ||
131 sRecentDaysFilter.equals("0"))
132 {
133 recentDaysFilter = 0;
134 return;
135 }
136
137 // If this is a symbolic value, convert it to the actual number of days.
138 final Integer ir = symFilterDays.get(sRecentDaysFilter);
139 if(ir != null)
140 {
141 recentDaysFilter = ir.intValue();
142 return;
143 }
144
145 // Try to interpret as integer number of days...
146 try {
147 final int i = Integer.parseInt(sRecentDaysFilter, 10);
148 // Convert negative/zero values to "no filter".
149 if(i <= 0) { recentDaysFilter = 0; }
150 // Else store the number of days specified.
151 else { recentDaysFilter = i; }
152 }
153 catch(final NumberFormatException e)
154 {
155 // Treat malformed input as "no filter".
156 recentDaysFilter = 0;
157 }
158 }
159
160 /**Get the "recent days" filter period; 0 for none else (positive) number of days. */
161 public synchronized int getRecentDaysFilter()
162 {
163 return(recentDaysFilter);
164 }
165
166 /**Generates body of recent-days filter select statement (dependent on old value, if any).
167 * Assumed not to require localisation or internationalisation.
168 * <p>
169 * This takes the symbolic values in increasing number of days.
170 * If the locale bean is passed in this makes full labels else it makes none.
171 */
172 public synchronized String makeRecentDaysFilterBody(final LocaleBeanBase l)
173 {
174 final String legit[] = new String[daysFilterSym.size()];
175 daysFilterSym.values().toArray(legit);
176 final String fullList[] = new String[legit.length + 1];
177 fullList[0] = SETTER_ALL; // Meaning ``all''.
178 System.arraycopy(legit, 0, fullList, 1, legit.length);
179
180 // Now make the labels...
181 final String labels[] = new String[fullList.length];
182 labels[0] = "*"; // Meaning all.
183
184 // Make a meaningful set of labels.
185 for(int i = labels.length; --i > 0; )
186 {
187 labels[i] = fullList[i]; // Default is to use symbolic name unaltered.
188
189 if(l != null)
190 {
191 final String name = CoreConsts.SYMTIME_I18N_PREFIX + fullList[i];
192 final String m = l.getLocalisedMessage(name);
193 if(!name.equals(m))
194 { labels[i] = m; }
195 }
196 }
197
198 return(UploaderUtils.makeSelectBody(fullList, labels,
199 daysFilterSym.get(Integer.valueOf(getRecentDaysFilter()))));
200 }
201
202
203
204 /**Gets the search filter, or null if none.
205 * Note that we expect short exhibit names to be presented
206 * to these filters, as that is what is stored in the index.
207 */
208 public synchronized JIndexBean.SearchFilterByName getSearchFilter()
209 {
210 JIndexBean.SearchFilterByName result = null;
211
212 final SortedSet<String> attrWords = ExhibitAttrUtils.getAttrWords().getAttrWordsSortedSet();
213
214 // If an attribute-word list is set then filter with it.
215 // Relatively slow, so should be towards the tail of the filter chain,
216 // ie towards the start of this function.
217 // This will be more efficient if the attribute list is deduped first.
218 final List<String> attributes = getAttributeWordsAsList();
219 if(!attributes.isEmpty())
220 {
221 final JIndexBean.SearchFilterByName upstream = result;
222 result = (new JIndexBean.SearchFilterByName(){
223 final public boolean accept(final CharSequence name)
224 {
225 final Name.ExhibitFull fullName = getAep().aeid.getFullName(name);
226 if(null == fullName) { return(false); }
227
228 // Get attributes of this exhibit, if any.
229 final SortedSet<String> nameAttrWords = ExhibitName.getAttributeWordsComponentSortedSet(fullName, attrWords);
230 // If this has fewer attributes than those being searched for then return false.
231 if(nameAttrWords.size() < attributes.size()) { return(false); }
232
233 // If any of the specified search attribute words are missing then return false.
234 final List<String> attrsNotPresent = new ArrayList<String>(attributes);
235 attrsNotPresent.removeAll(nameAttrWords);
236 if(!attrsNotPresent.isEmpty()) { return(false); }
237
238 // Test using possibly-memory-hungry map...
239 // final Map<String,Set<Name.ExhibitFull>> ebam = getAep().getExhibitsByAttribute();
240 // for(final Iterator<String> it = attributes.iterator(); it.hasNext(); )
241 // {
242 // final Set<Name.ExhibitFull> s = ebam.get(it.next());
243 // if(s == null) { continue; } // Ignore invalid attribute word!
244 // if(!s.contains(fullName)) { return(false); } // Does not have this attribute.
245 // }
246
247 if(upstream != null) { return(upstream.accept(name)); } // Chain previous filter.
248 return(true); // OK.
249 }
250 });
251 }
252
253 // If the recent-addition value is set then filter with it.
254 // Relatively slow, so should be towards the tail of the filter chain,
255 // ie towards the start of this function.
256 if(getRecentDaysFilter() > 0)
257 {
258 final long now = System.currentTimeMillis();
259 final long since = now - (getRecentDaysFilter() * (long)DAY_MS);
260 assert(since < now);
261 final AllExhibitImmutableData aeid = getAep().aeid;
262 final JIndexBean.SearchFilterByName upstream = result;
263 result = (new JIndexBean.SearchFilterByName(){
264 final public boolean accept(final CharSequence name)
265 {
266 // Note that we have to extend name to full form to check its age...
267 final ExhibitStaticAttr esa = aeid.getStaticAttr(aeid.getFullName(name));
268 if(null == esa) { return(false); }
269 if(esa.timestamp < since) { return(false); } // Definitely not OK.
270 if(upstream != null) { return(upstream.accept(name)); } // Chain previous filter.
271 return(true); // OK.
272 }
273 });
274 }
275
276 // If the category is set then filter with it.
277 // Relatively slow, so should be towards the tail of the filter chain,
278 // ie towards the start of this function.
279 if(!"".equals(getCategory()))
280 {
281 final String cat = getCategory();
282 final AllExhibitImmutableData aeid = getAep().aeid;
283 final JIndexBean.SearchFilterByName upstream = result;
284 result = (new JIndexBean.SearchFilterByName(){
285 final public boolean accept(final CharSequence name)
286 {
287 // Note that we have to extend name to full form to check its category...
288 final ExhibitFull fullName = aeid.getFullName(name);
289 if(null == fullName) { return(false); }
290 if(!TextUtils.contentEquals(cat, ExhibitName.getCategoryComponent(fullName))) { return(false); } // Definitely not OK.
291 if(upstream != null) { return(upstream.accept(name)); } // Chain previous filter.
292 return(true); // OK.
293 }
294 });
295 }
296
297 // If the author is set then filter with it.
298 // Relatively fast, so should be towards the head of the filter chain,
299 // ie towards the end of this function.
300 if(!"".equals(getAuthor()))
301 {
302 final String auth = getAuthor();
303 final AllExhibitImmutableData aeid = getAep().aeid;
304 final JIndexBean.SearchFilterByName upstream = result;
305 result = (new JIndexBean.SearchFilterByName(){
306 final public boolean accept(final CharSequence name)
307 {
308 final ExhibitFull fullName = aeid.getFullName(name);
309 if(null == fullName) { return(false); }
310 if(!TextUtils.contentEquals(auth, ExhibitName.getAuthorComponent(fullName))) { return(false); } // Definitely not OK.
311 if(upstream != null) { return(upstream.accept(name)); } // Chain previous filter.
312 return(true); // OK.
313 }
314 });
315 }
316
317 // If the suffix is set then filter with it.
318 // Always fast, so should be towards the head of the filter chain,
319 // ie towards the end of this function.
320 if(!"".equals(getSuffix()))
321 {
322 final String suf = getSuffix();
323 final AllExhibitImmutableData aeid = getAep().aeid;
324 final JIndexBean.SearchFilterByName upstream = result;
325 result = (new JIndexBean.SearchFilterByName(){
326 final public boolean accept(final CharSequence name)
327 {
328 final ExhibitFull fullName = aeid.getFullName(name);
329 if(null == fullName) { return(false); }
330 if(!TextUtils.endsWith(fullName, suf)) { return(false); } // Definitely not OK.
331 if(upstream != null) { return(upstream.accept(name)); } // Chain previous filter.
332 return(true); // OK.
333 }
334 });
335 }
336
337 return(result);
338 }
339
340 /**Get search results as immutable list of full exhibit names; may be zero-length but never null.
341 * Using the query string and other details that the user has supplied
342 * this does the search and returns
343 * a (possibly-zero-length) list of exhibit names.
344 * <p>
345 * As a side-effect this sets the time the search takes in milliseconds
346 * accessible by getLastSearchTimeMs().
347 *
348 * @param maxResults maximum length list of results to return;
349 * should be positive
350 */
351 public synchronized List<Name.ExhibitFull> doLookup(final DataSourceBean dataSource,
352 final int maxResults)
353 throws IOException
354 {
355 final long startTime = System.currentTimeMillis();
356 try {
357 if(maxResults < 1) { return(Collections.emptyList()); }
358
359 final String q = getQ();
360 final JIndexBean.SearchFilterByName sfbn = getSearchFilter();
361
362 // Special cases if the query string is empty...
363 if(q.length() == 0)
364 {
365 // If there is no search string and no filter,
366 // return an empty list immediately.
367 if(sfbn == null) { return(Collections.emptyList()); }
368
369 // If there is no search string but there is a filter,
370 // apply that filter to the whole exhibit list...
371 // This returns full names, which we convert to short names
372 // before returning...
373 final List<Name.ExhibitFull> result = Collections.unmodifiableList(Arrays.asList(
374 getAep().select((new AllExhibitProperties.AEPFilter(){
375 public final boolean accept(final AllExhibitProperties _aep, final Name.ExhibitFull fullName)
376 { return(sfbn.accept(fullName.getShortName().toString())); }
377 }), null, maxResults)));
378 return(result);
379 }
380
381 // Try doing a strict match on "all" terms first, which may be quick and gives high relevance.
382 final List<Name.ExhibitFull> result =
383 dataSource.findExhibitsByWord(q, DataSourceBean.FEBY_MATCH_TYPE_ALL, maxResults, sfbn);
384 if(!result.isEmpty()) { return(result); }
385
386 // Default lookup by most words, as fallback to try to find some results.
387 return(dataSource.findExhibitsByWord(q, DataSourceBean.FEBY_MATCH_TYPE_MOST, maxResults, sfbn));
388 }
389 finally
390 {
391 // Record the time taken to do the search.
392 lastSearchTimeMs = System.currentTimeMillis() - startTime;
393 }
394 }
395
396 /**Maximum (UTF-8 URL-encoded) GET-style query string that we will generate; strictly positive.
397 * We base this on the maximum simple query text length that we support.
398 */
399 public static final int MAX_GET_QUERY_CHARS = 128 + WebConsts.MAX_SIMPLE_QUERY_LEN;
400
401 /**Make simple (length-limited) GET query string.
402 * Can be used where POST is not possible or desirable.
403 * <p>
404 * Takes an optional (positive) page number argument.
405 * <p>
406 * This is UTF-8 encoded.
407 * <p>
408 * Parameter are separated with <code>&amp;</code>
409 * ready for direct inclusion at the end of a URL.
410 * <p>
411 * If the result is getting too long we stop adding fields,
412 * so we encode the more critical ones first.
413 *
414 * @param pg if positive then the pg (page) parameter is included
415 *
416 * @return non-empty result starting with '?' suitably encoded
417 * to drop into an [X]HTML attribute value at the end of a URL
418 */
419 public String computeGETQueryStringURLTail(final int pg)
420 {
421 final StringBuffer sb = new StringBuffer(MAX_GET_QUERY_CHARS);
422
423 // Parameter separator to use: & for [X][HT]ML attribute values.
424 final String paramSep = "&";
425
426 // Hand-encode (as necessary) the main values for speed and reliability.
427 // Always include the query text.
428 sb.append("?q=").append(getQ().replace(' ', '+'));
429 // Always include the page number if supplied.
430 if(pg > 0)
431 { sb.append(paramSep).append(WebConsts.PAGE_NUMBER_PARAMETER).append('=').append(pg); }
432
433 try
434 {
435 final String encFormat = "UTF-8";
436
437 // Handle optional parameters, most important first.
438 // Use dummy loop to enable break-out when result getting too big.
439 do
440 {
441 // Category.
442 final String category = getCategory();
443 if(category.length() > 0)
444 {
445 final StringBuffer tmp = new StringBuffer();
446 tmp.append(paramSep).append("category=").append(URLEncoder.encode(category, encFormat));
447 if(sb.length() + tmp.length() > MAX_GET_QUERY_CHARS) { break; }
448 sb.append(tmp);
449 }
450
451 // Suffix.
452 final String suffix = getSuffix();
453 if(suffix.length() > 0)
454 {
455 final StringBuffer tmp = new StringBuffer();
456 tmp.append(paramSep).append("suffix=").append(URLEncoder.encode(suffix, encFormat));
457 if(sb.length() + tmp.length() > MAX_GET_QUERY_CHARS) { break; }
458 sb.append(tmp);
459 }
460
461 // Author.
462 final String author = getAuthor();
463 if(author.length() > 0)
464 {
465 final StringBuffer tmp = new StringBuffer();
466 tmp.append(paramSep).append("author=").append(author); // Needs no encoding.
467 if(sb.length() + tmp.length() > MAX_GET_QUERY_CHARS) { break; }
468 sb.append(tmp);
469 }
470 } while(false);
471 }
472 catch(final UnsupportedEncodingException e)
473 {
474 // TODO Auto-generated catch block
475 e.printStackTrace();
476 }
477
478 return(sb.toString());
479 }
480
481 /**Last search time in ms, -1 if no search yet done. */
482 private long lastSearchTimeMs = -1;
483
484 /**Gets the last search time in ms, -1 if no search yet done.
485 */
486 public synchronized long getLastSearchTimeMs()
487 { return(lastSearchTimeMs); }
488
489 /**Unique Serialisation class ID generated by http://random.hd.org/. */
490 private static final long serialVersionUID = -8656439331186143583L;
491 }