001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 /*
031 * Created by IntelliJ IDEA.
032 * User: Administrator
033 * Date: 28-Dec-02
034 * Time: 22:24:51
035 */
036 package org.hd.d.pg2k.test.dev;
037
038 import java.io.ByteArrayInputStream;
039 import java.util.Random;
040
041 import junit.framework.TestCase;
042
043 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
044 import org.hd.d.pg2k.svrCore.FileTools;
045 import org.hd.d.pg2k.svrCore.Name;
046 import org.hd.d.pg2k.webSvr.exhibit.DataSourceBean;
047
048 import ORG.hd.d.jIndexer.reader.InvertedIndexSearchFilter;
049 import ORG.hd.d.jIndexer.server.JIndexBean;
050 import ORG.hd.d.jIndexer.server.JIndexBean.SearchFilterByName;
051
052 /**Simple tests of the by-word index, including building an index. */
053 public final class IndexTest extends TestCase
054 {
055 public IndexTest(final String name)
056 {
057 super(name);
058 }
059
060 /**Check basic building of index on a representative snapshot of our exhibit data. */
061 public static void testIndexBuilding()
062 throws Exception
063 {
064 // Test performance on a realistic (latest available) exhibit-name data set.
065 final AllExhibitProperties lastAEP =
066 (AllExhibitProperties) FileTools.deserialiseFromFile(BackCompatTest.lastAEPFile, true);
067
068 // Test actually building the index.
069 final long startTime = System.currentTimeMillis();
070 final JIndexBean jib = DataSourceBean.computeByWordIndex(lastAEP);
071 final long endTime = System.currentTimeMillis();
072 assertNotNull("constructed index must not be null", jib);
073 Main.getOut().println("[Index construction time: "+(endTime-startTime)+"ms.]");
074
075 assertTrue("large fraction of exhibits expected to have unique index terms", jib.docCount() > lastAEP.aeid.length / 4);
076
077 final CharSequence[] searchResults1 = jib.simpleSearchToDocNames("cat", 1, 100, (InvertedIndexSearchFilter) null);
078 assertTrue("must get some results for known-present word", searchResults1.length > 0);
079 assertTrue("search results must be Name.ExhibitFull", searchResults1[0] instanceof Name.ExhibitFull);
080
081 // FIXME: Check that persistence is possible via serialisation.
082 //SerializationTest.checkObjectCanBeSerialisedAndDeserialised(jIndexBean);
083
084 // // Pause here if necessary to examine the heap.
085 // for( ; ; )
086 // {
087 // Main.getOut().println("Done: waiting for heap walk to be completed...");
088 // Thread.sleep(10000);
089 // }
090 }
091
092 /**Test basic construction of an index. */
093 public static void testBasicIndexBuild()
094 throws Exception
095 {
096 for(final boolean noSave : new boolean[]{false,true})
097 {
098 final JIndexBean jib = new JIndexBean(noSave);
099
100 // Add first document...
101 jib.setDocumentSimple(rnd.nextBoolean(),
102 (CharSequence) "doc0",
103 new ByteArrayInputStream("cat sat on mat".getBytes("ASCII7")));
104 // Add second document...
105 jib.setDocumentSimple(rnd.nextBoolean(),
106 "doc1",
107 new ByteArrayInputStream("cat again".getBytes("ASCII7")));
108
109 assertEquals("request should return both docs", 2, jib.simpleSearchToDocNames("cat", 1, 2 + rnd.nextInt(100000), (SearchFilterByName) null).length);
110 assertEquals("request should return one doc", 1, jib.simpleSearchToDocNames("again", 1, 2 + rnd.nextInt(100000), (SearchFilterByName) null).length);
111 assertEquals("request should return both docs", 2, jib.simpleSearchToDocNames("cat", 1, 2 + rnd.nextInt(100000), (InvertedIndexSearchFilter) null).length);
112 assertEquals("request should return one doc", 1, jib.simpleSearchToDocNames("again", 1, 2 + rnd.nextInt(100000), (InvertedIndexSearchFilter) null).length);
113
114 jib.compact();
115 assertEquals("request should return both docs", 2, jib.simpleSearchToDocNames("cat", 1, 2 + rnd.nextInt(100000), (SearchFilterByName) null).length);
116 assertEquals("request should return one doc", 1, jib.simpleSearchToDocNames("again", 1, 2 + rnd.nextInt(100000), (SearchFilterByName) null).length);
117 assertEquals("request should return both docs", 2, jib.simpleSearchToDocNames("cat", 1, 2 + rnd.nextInt(100000), (InvertedIndexSearchFilter) null).length);
118 assertEquals("request should return one doc", 1, jib.simpleSearchToDocNames("again", 1, 2 + rnd.nextInt(100000), (InvertedIndexSearchFilter) null).length);
119 }
120 }
121
122 /**Private source of OK pseudo-random numbers. */
123 private static final Random rnd = new Random();
124 }