001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    /*Created by IntelliJ IDEA.
030     * User: Damon Hart-Davis
031     * Date: 14-Apr-2003
032     * Time: 18:00:51
033     */
034    package org.hd.d.pg2k.webSvr.exhibit;
035    
036    import java.util.Collections;
037    import java.util.Comparator;
038    import java.util.List;
039    import java.util.Set;
040    import java.util.TreeSet;
041    import java.util.Vector;
042    
043    import org.hd.d.pg2k.svrCore.ExhibitName;
044    import org.hd.d.pg2k.svrCore.Name;
045    
046    /**A simple fast mechanism to collect "similar" exhibits to a named one.
047     * Can be used in a catalogue page to find similar exhibits quickly (in a CPU-efficient way),
048     * possibly (when the site is busy) as an alternative to the more general default search.
049     */
050    public final class SimpleSimilarExhibitFinder
051        {
052        /**Quickly returns a set of exhibits (full names) similar to the named exhibit (passed by full name); never null.
053         * This should be constructed with a sorted List of exhibits (full names)
054         * and an exhibit that exists in that list.
055         * <p>
056         * The sort order should probably be SMART_ORDER or something similar,
057         * eg case-insensitively by the first "main" word in the name
058         * (ignoring the directory component).
059         * <p>
060         * This will then return a list of exhibits adjacent in the original named
061         * exhibit that has the first main word (matched case-insensitively)
062         * but not including any variant of the original exhibit
063         * and no more than one variant of any of the returned names.
064         * <p>
065         * A maximum number of results is specified,
066         * and a list no longer than this will be returned.
067         * <p>
068         * This is designed to be CPU efficient yet return a useful set of results.
069         * <p>
070         * Results in same order as original list.
071         * <p>
072         * This is designed to be efficient generating a small number of results
073         * (eg less than ten).
074         * <p>
075         * This attempts to be reasonably unbiased in terms of direction.
076         *
077         * @param exhibits  list of full exhibit names sorted with the exhibitComparator;
078         *     not modified by the routine
079         * @param exhibitComparator  exhibits must be sorted with this
080         * @param exhibitName  full name of main exhibit; must be in exhibits list
081         *     and neither it nor any variant will appear in the output
082         * @param attrWords  set of all valid attribute words
083         * @param maxResults  maximum number if results to return
084         *
085         * @deprecated  slow and not very smart
086         */
087        @Deprecated
088        public static Name.ExhibitFull[] getSimpleSimilarExhibitList(
089                final List<Name.ExhibitFull> exhibits,
090                final Comparator<? super Name.ExhibitFull> exhibitComparator,
091                final Name.ExhibitFull exhibitName,
092                final Set<String> attrWords,
093                final int maxResults)
094            {
095            if((exhibits == null) ||
096                (exhibitComparator == null) ||
097                (exhibitName == null) ||
098                (attrWords == null))
099                { throw new IllegalArgumentException(); }
100    
101            // If no results required, don't compute any...
102            if(maxResults <= 0) { return(NO_RESULTS); }
103    
104            // Find our own location in the array using same order as sort used...
105            final int ourPos = Collections.binarySearch(exhibits, exhibitName, exhibitComparator);
106            // If exhibit is not in list, return no results!  SHOULD NOT HAPPEN IF INPUTS ARE WELL-FORMED...
107            if(ourPos == -1) { return(NO_RESULTS); }
108    
109            // Save recomputing the upper bound frequently.
110            final int exhibitsSize = exhibits.size();
111            // If there are no exhibits,
112            // or only one which should by rights be exhibitName,
113            // we can return an empty result set.
114            if(exhibitsSize < 2) { return(NO_RESULTS); }
115    
116            // Get our main word...
117            final String mainWord = (String) (ExhibitName.getMainWords(exhibitName, attrWords).nextElement());
118    
119            // Case-insensitive store of main stems; primed with the initial exhibit.
120            final Set<String> mainStems = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
121            mainStems.add(exhibitName.getShortName().getMainWordsComponent(attrWords).toString()); // FIXME: expensive conversion to String
122    
123            // Gather results; keep them sorted as we go, though this might be inefficient.
124            // Set our working result size to the largest it could possibly be
125            // as we assume (a) that's not huge (b) we will often fill it.
126            final Vector<Name.ExhibitFull> result = new Vector<Name.ExhibitFull>(Math.min(maxResults, exhibitsSize - 1));
127    
128            int lPos = ourPos;
129            int hPos = ourPos;
130            while((result.size() < maxResults))
131                {
132                // If we've fallen off the ends of the array,
133                // we have to stop...
134                if((lPos <= 0) && (hPos >= exhibitsSize - 1))
135                    { break; }
136    
137                // If the lower bound is still on the right word,
138                // check out the entry.
139                if(lPos > 0)
140                    {
141                    final Name.ExhibitFull name = exhibits.get(--lPos);
142    
143                    // If it's got the wrong main word stop searching
144                    // in this direction immediately...
145                    // FIXME: inefficient via full name and tokenizer
146                    final String lMainWord = (String) (ExhibitName.getMainWords(name, attrWords).nextElement());
147                    if(!mainWord.equalsIgnoreCase(lMainWord))
148                        { lPos = -1; } // Put index out of bounds.
149                    else
150                        {
151                        // If we've not already seen something with this main stem,
152                        // add it at the right end of our results!
153                        if(mainStems.add(name.getShortName().getMainWordsComponent(attrWords).toString())) // FIXME: expensive conversion to String
154                            { result.insertElementAt(name, 0); }
155                        }
156                    }
157    
158                // If the upper bound is still on the right word,
159                // check out the entry.
160                if(hPos < exhibitsSize - 1)
161                    {
162                    final Name.ExhibitFull name = exhibits.get(++hPos);
163    
164                    // If it's got the wrong main word stop searching
165                    // in this direction immediately...
166                    // FIXME: inefficient via full name and tokenizer
167                    final String hMainWord = (String) (ExhibitName.getMainWords(name, attrWords).nextElement());
168                    if(!mainWord.equalsIgnoreCase(hMainWord))
169                        { hPos = exhibitsSize; } // Put index out of bounds.
170                    else
171                        {
172                        // If we've not already seen something with this main stem,
173                        // add it at the right end of our results!
174                        if(mainStems.add(name.getShortName().getMainWordsComponent(attrWords).toString())) // FIXME: expensive conversion to String
175                            { result.addElement(name); }
176                        }
177                    }
178                }
179    
180            // Truncate to size if we overshot a little...
181            if(result.size() > maxResults)
182                { result.setSize(maxResults); } // Truncate to requested size...
183    
184            // Convert to array of full names.
185            if(result.size() == 0) { return(NO_RESULTS); } // Saves an allocation...
186            final Name.ExhibitFull r[] = new Name.ExhibitFull[result.size()];
187            result.toArray(r);
188    //        // Finally, convert to short names...
189    //        for(int i = r.length; --i >= 0; )
190    //            { r[i] = ExhibitName.getFileComponent(r[i]).toString(); }
191            return(r);
192            }
193    
194        /**Fixed empty result object. */
195        private static final Name.ExhibitFull[] NO_RESULTS = new Name.ExhibitFull[0];
196        }