001    /*
002    Copyright (c) 1996-2011, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    /*
031     * Created by IntelliJ IDEA.
032     * User: Administrator
033     * Date: 28-Dec-02
034     * Time: 22:24:51
035     */
036    package org.hd.d.pg2k.test.dev;
037    
038    import java.io.BufferedInputStream;
039    import java.io.ByteArrayInputStream;
040    import java.io.ByteArrayOutputStream;
041    import java.io.File;
042    import java.io.FileInputStream;
043    import java.io.FilterInputStream;
044    import java.io.IOException;
045    import java.io.ObjectInputStream;
046    import java.io.ObjectOutputStream;
047    import java.io.OutputStream;
048    import java.io.PrintStream;
049    import java.util.ArrayList;
050    import java.util.Arrays;
051    import java.util.Collections;
052    import java.util.HashMap;
053    import java.util.List;
054    import java.util.Map;
055    import java.util.Random;
056    import java.util.concurrent.ConcurrentHashMap;
057    import java.util.concurrent.ConcurrentMap;
058    import java.util.concurrent.Future;
059    import java.util.concurrent.atomic.AtomicInteger;
060    import java.util.zip.GZIPInputStream;
061    
062    import junit.framework.TestCase;
063    
064    import org.hd.d.pg2k.svrCore.AccessionData;
065    import org.hd.d.pg2k.svrCore.AllExhibitProperties;
066    import org.hd.d.pg2k.svrCore.AllExhibitPropertiesDelta;
067    import org.hd.d.pg2k.svrCore.CompressionLevel;
068    import org.hd.d.pg2k.svrCore.FileTools;
069    import org.hd.d.pg2k.svrCore.GenUtils;
070    import org.hd.d.pg2k.svrCore.MemoryTools;
071    import org.hd.d.pg2k.svrCore.ROByteArray;
072    import org.hd.d.pg2k.svrCore.Tuple;
073    import org.hd.d.pg2k.svrCore.datasource.ExhibitDataTunnelSource;
074    import org.hd.d.pg2k.svrCore.vars.BasicVarMgr;
075    import org.hd.d.pg2k.svrCore.vars.EventPeriod;
076    import org.hd.d.pg2k.svrCore.vars.EventVariableValueSet;
077    import org.hd.d.pg2k.svrCore.vars.SystemVariables;
078    
079    import ORG.hd.d.jIndexer.server.JIndexBean;
080    
081    /**Simple tests of backward compatibility.
082     * This tests for things that we explicitly want to keep compatible,
083     * or alternatively do not expect nor want to needlessly break.
084     * <p>
085     * This is also used for testing the handling of accession data,
086     * since that is stuff that we need to be able to handle for a long time.
087     */
088    public final class BackCompatTest extends TestCase
089        {
090        public BackCompatTest(final String name)
091            {
092            super(name);
093            }
094    
095        /**If true, print out some compression stats to help with tuning.
096         * These stats DO NOT form part of the official test suite.
097         */
098        private static final boolean CALC_COMPRESSION_STATS = false;
099    
100        /**Test data directory (project-relative) not ending in '/' for frozen AEP and test/event data; not null. */
101        public static final String testDataDir = "PG2Ksrc/misc/testdata";
102    
103        /**Immutable set of usable (GZIPed) serialised AEP files in date order.
104         * Manually kept up-to-date.
105         * <p>
106         * These are AllExhibitProperties objects.
107         * <p>
108         * The last entry is the most recent (and usually the largest).
109         */
110        public static final List<String> frozenAEPs;
111        /**Initialise frozenAEPs. */
112        static
113            {
114            // GZIPped, serialised AllExhibitProperties exhibit meta-data.
115            // Paths relative to the source root.
116            frozenAEPs = Collections.unmodifiableList(Arrays.asList(new String[]{
117                testDataDir + "/20041108._ExProps.dat",  // Captured from local server's cache.
118                testDataDir + "/20041108.2._expropsCache.dat", // From live server after fixes for more robust serialisation.
119                testDataDir + "/20050620._expropsCache.dat", // From live server before accession data expansion.
120                testDataDir + "/20051113._ExProps.dat", // Captured from local server's cache.
121                testDataDir + "/20060619._ExProps.dat", // Captured from local server's cache.
122                testDataDir + "/20060726._ExProps.dat", // Captured from local server's cache.
123                testDataDir + "/20060729._ExProps.dat", // Captured from local server's cache after EPC semantics change.
124                testDataDir + "/20060731._ExProps.dat", // Captured from local server's cache after compression efficiency changes.
125                testDataDir + "/20060818._ExProps.dat", // Captured from local server's cache to match compressed JIndexBean.
126                testDataDir + "/20060819._ExProps.dat", // Captured from local server's cache after trouble with AEP diff RPC.
127                testDataDir + "/20061122._ExProps.dat", // Static cache file after including treedesc and locationDB.
128                testDataDir + "/20070402._ExProps.dat", // Static cache file after moving section title/desc to treedesc.
129                testDataDir + "/20090806._ExProps.dat", // Captured from local server's cache after conversion to use Name (from String) in many places.
130                testDataDir + "/20100621._ExProps.dat", // Static cache file after changing Compact7BitString to usually serialise as CS8Bit.
131            }));
132            }
133    
134    
135        /**Check that we can deserialise older snapshots of our exhibit data.
136         * This includes snapshots of real Gallery meta-data,
137         * and snapshots of test set meta-data.
138         * <p>
139         * We take advantage of multiple CPUs to load these in parallel, which should be safe.
140         */
141        public static void testAEPDeserialise()
142            throws Exception
143            {
144            final String lastOne = frozenAEPs.get(frozenAEPs.size()-1);
145    
146            // List of tasks/loads/deserialisations pending completion.
147            final List<Future<?>> tasks = new ArrayList<Future<?>>(frozenAEPs.size());
148    
149            // Run the tasks as concurrently as we have CPUs for (and a little more)...
150            for(final String filename : frozenAEPs)
151                {
152                tasks.add(Main.computeIntensiveTestThreadPool.submit(new Runnable(){
153                    final public void run()
154                        {
155                        Main.getOut().println("[Loading frozen AEP: "+filename+"...]");
156                        final File f = new File(filename);
157                        if(!f.isFile() || !f.canRead())
158                            {
159                            Main.getErr().println("WARNING: skipping test as cannot open AEP snapshot: " + f);
160                            return;
161                            }
162    
163                        try
164                            {
165                            final Object o = FileTools.deserialiseFromFile(f, true);
166                            assertNotNull("Deserialised data from "+f+" must not be null", o);
167                            assertTrue("Deserialised data from "+f+" must have correct type",
168                                       o instanceof AllExhibitProperties);
169                            final AllExhibitProperties aep = (AllExhibitProperties) o;
170    
171                            // Do extra-thorough checks on the last (most recent) AEP.
172                            if(lastOne.equals(filename))
173                                {
174                                // Checks that the AEP can be locally serialised and deserialised
175                                // without getting broken.
176                                SerializationTest.checkSerialisationPreservesEquality(o);
177    
178                                // Checks that we can correctly serialise/deserialise
179                                // via compression at our highest-available rate
180                                // (eg as if for the AEP diff RPC call),
181                                // and that compact() doesn't break anything either!
182    //                            final Pair<CompressionLevel, byte[]> squished =
183    //                                GenUtils.compressObject(aep, GenUtils.MAX_SUPPORTED_COMPRESSION_LEVEL, !MemoryTools.lotsFree());
184    //                            final AllExhibitProperties unsquished = (AllExhibitProperties)
185    //                                (new ObjectInputStream(GenUtils.wrapForDecompression(new ByteArrayInputStream(squished.second), squished.first))).readObject();
186                                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
187                                final ObjectOutputStream oos = new ObjectOutputStream(GenUtils.wrapForCompression(baos, ExhibitDataTunnelSource.MAX_AEP_DIFF_COMP_LEVEL));
188                                oos.writeObject(aep);
189                                oos.close();
190                                Main.getOut().println("[AEP ("+aep+") compression with "+ExhibitDataTunnelSource.MAX_AEP_DIFF_COMP_LEVEL+" takes "+baos.size()+" bytes.]");
191                                final AllExhibitProperties unsquished = (AllExhibitProperties)
192                                    (new ObjectInputStream(GenUtils.wrapForDecompression(new ByteArrayInputStream(baos.toByteArray()), ExhibitDataTunnelSource.MAX_AEP_DIFF_COMP_LEVEL))).readObject();
193                                assertEquals("Must be able to (de)serialise and be equal after max (de)compression",
194                                                aep, unsquished);
195                                final long t1 = System.currentTimeMillis();
196                                unsquished.compact();
197                                final long t2 = System.currentTimeMillis();
198                                Main.getOut().println("[AEP.compact() took "+(t2-t1)+"ms.]");
199                                assertEquals("Must be able to compact() and be equal afterwards",
200                                                aep, unsquished);
201    
202                                // Tiny sanity test.
203                                assertEquals("AEID sizes should be consistent", aep.aeid.size(), aep.aeid.getAllExhibitNamesSorted().size());
204                                }
205    
206                            // We can piggy-back some compression/tuning calculations here.
207                            if(CALC_COMPRESSION_STATS)
208                                {
209                                System.out.println("Computing stats for sample: " + f + "; gzipped sample length = " + f.length());
210                                System.out.println("  aep.aeid.length (number of exhibits) = " + aep.aeid.length);
211                                final ByteArrayOutputStream baos = new ByteArrayOutputStream((int) f.length() * 3);
212                                // Write raw form of AEID.
213                                (new ObjectOutputStream(baos)).writeObject(aep.aeid);
214                                final byte[] rawAEID = baos.toByteArray();
215                                baos.reset();
216                                System.out.println("  raw AEID serialised length = " + rawAEID.length + "; " + (rawAEID.length / (float) aep.aeid.length) + "bytes/exhibit.");
217                                showCompression(rawAEID, System.out);
218                                // Write raw form of full AEP.
219                                (new ObjectOutputStream(baos)).writeObject(aep);
220                                final byte[] rawAEP = baos.toByteArray();
221                                baos.reset();
222                                System.out.println("  raw AEP serialised length = " + rawAEP.length + "; " + (rawAEP.length / (float) aep.aeid.length) + "bytes/exhibit.");
223                                //baos = null; // Help GC.
224                                showCompression(rawAEP, System.out);
225                                }
226                            }
227                        catch(final Exception e)
228                            {
229                            // Report the problem immediately.
230                            e.printStackTrace();
231                            // Force the Executor mechanism to forward the exception.
232                            throw new Error(e);
233                            }
234                        }
235                    }));
236                }
237    
238            // Wait for all tasks to complete,
239            // propagating any exception thrown therein.
240            for(final Future<?> t : tasks)
241                { t.get(); }
242            }
243    
244    
245        /**Immutable set of usable (GZIPed) serialised event-history files in approximate date order.
246         * Manually kept up-to-date.
247         * <p>
248         * These are EventVariableValueSet objects.
249         * <p>
250         * The last entry is the most recent and/or largest
251         * and thus a good performance benchmark for algorithm/representation tweaks.
252         */
253        public static final List<String> frozenEVVSs;
254        /**Initialise frozenEVVSs. */
255        static
256            {
257            // GZIPped, serialised AllExhibitProperties exhibit meta-data.
258            // Paths relative to the source root.
259            frozenEVVSs = Collections.unmodifiableList(Arrays.asList(new String[]{
260    //            testDataDir + "/eventStore.Vote.comment-T.20050613.ser.gz",
261    //            testDataDir + "/eventStore.Vote.con-T.20050613.ser.gz",
262    //            testDataDir + "/eventStore.Vote.pro-T.20050613.ser.gz",
263                testDataDir + "/eventStore.Vote.comment-T.20090806.ser.gz",
264                testDataDir + "/eventStore.Vote.con-T.20090806.ser.gz",
265                testDataDir + "/eventStore.Vote.pro-T.20090806.ser.gz",
266                testDataDir + "/eventStore.pg2k.genstats.STRING.global.event-T.20090806.ser.gz",
267    //            testDataDir + "/eventStore.AccessPattern.pageview.byURI-T.20090806.ser.gz", // Superseded by 20090814 version/format.
268                testDataDir + "/eventStore.AccessPattern.pageview.byURI-T.20090814.ser.gz", // Newer format than 20090806.
269            }));
270            }
271    
272        /**Check that we can deserialise older snapshots of our EventVariableValueSet data.
273         * This includes snapshots of real Gallery meta-data.
274         */
275        public static void testEVVSDeserialise()
276            throws Exception
277            {
278    //        final String lastOne = frozenEVVSs.get(frozenEVVSs.size()-1);
279            EventVariableValueSet lastOne = null;
280            long lastOneSizeOnDisc = -1;
281    
282            // TODO: run the tasks concurrently where possible for speed...
283            for(final String filename : frozenEVVSs)
284                {
285                final File f = new File(filename);
286                final long sizeOnDisc = f.length();
287                Main.getOut().println("[Loading frozen EVVS: "+filename+", "+sizeOnDisc+" bytes...]");
288                if(!f.isFile() || !f.canRead())
289                    {
290                    Main.getErr().println("WARNING: skipping test as cannot open EVVS snapshot: " + f);
291                    return;
292                    }
293    
294                final long beforeDeser = System.currentTimeMillis();
295                final Object o = FileTools.deserialiseFromFile(f, true);
296                final long afterDeser = System.currentTimeMillis();
297                assertNotNull("Deserialised data from "+f+" must not be null", o);
298                assertTrue("Deserialised data from "+f+" must have correct type",
299                           o instanceof EventVariableValueSet);
300                Main.getOut().println("Deserialisation took "+(afterDeser-beforeDeser)+"ms.");
301                final EventVariableValueSet evvs = (EventVariableValueSet) o;
302    
303                // Check that we can (re)serialise.
304                SerializationTest.checkObjectCanBeSerialisedAndDeserialised(evvs);
305    
306                lastOne = evvs;
307                lastOneSizeOnDisc = sizeOnDisc;
308                }
309    
310            // Do extra checks on the last (biggest and/or most current) instance.
311            assertNotNull(lastOne);
312            assertTrue(lastOneSizeOnDisc > 0);
313    
314            ObjectOutputStream oos = null;
315            final byte data[];
316            try
317                {
318                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
319                final java.util.zip.GZIPOutputStream gos = new java.util.zip.GZIPOutputStream(baos);
320                oos = new ObjectOutputStream((OutputStream)gos);
321                oos.writeObject(lastOne);
322                oos.flush();
323                if(gos != null) { gos.finish(); }
324                data = baos.toByteArray();
325                }
326            finally
327                {
328                if(oos != null) { oos.close(); } // Free up OS resources.
329                }
330    
331            Main.getOut().println("New size "+data.length+" bytes.");
332            if(data.length > lastOneSizeOnDisc)
333                { Main.getErr().println("WHOOPS, got larger!"); }
334            assertTrue("Output size has grown significantly", data.length < (4*lastOneSizeOnDisc)/3);
335    
336            // Try to force some GC now to avoid skewing the results below with pent-up stuff.
337            lastOne = null;
338            MemoryTools.preemptiveGC();
339    
340            // Profile total cost of maximal decompression and deserialisation.
341            // This reflects the costs at the (more common) client end.
342            final ConcurrentMap<StackTraceElement, AtomicInteger> perfCounts = new ConcurrentHashMap<StackTraceElement, AtomicInteger>(1001);
343            final ConcurrentMap<StackTraceElement, ConcurrentMap<StackTraceElement, AtomicInteger>> parentPerfCounts = new ConcurrentHashMap<StackTraceElement, ConcurrentMap<StackTraceElement, AtomicInteger>>(1001);
344            final Thread perfMonitorThread = GenUtils.startThreadPerfMonitor(
345                Thread.currentThread(),
346                perfCounts,
347                parentPerfCounts,
348                "org.hd.", // Capture our code.
349                System.currentTimeMillis() + 1000000, // Monitor thread for at most 1000s.
350                50); // Sample relatively slowly, esp for WinTel development machine.
351            try
352                {
353                final long beforeDeser = System.currentTimeMillis();
354                final ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
355                final Object o2 = ois.readObject();
356                final long afterDeser = System.currentTimeMillis();
357                assertNotNull(o2);
358                Main.getOut().println("Time to deserialise in current format "+(afterDeser-beforeDeser)+"ms.");
359                }
360            finally
361                {
362                GenUtils.stopPerfMonitorandDumpSamples(
363                        perfMonitorThread,
364                        "EVVS (de)serialisation profile",
365                        perfCounts,
366                        parentPerfCounts,
367                        20,
368                        GenUtils.systemOutLogger);
369                }
370            }
371    
372        /**Test history load.
373         * Ensures that we can load the history from the test directory.
374         */
375        public static void testEVVSHistoryLoad()
376            throws Exception
377            {
378            for(final boolean async : new boolean[]{/*false,*/ true})
379                {
380                final BasicVarMgr vars = new BasicVarMgr(rnd.nextBoolean(), rnd.nextBoolean());
381                Main.getOut().println("BasicVarMgr loading history async="+async);
382                final long start = System.currentTimeMillis();
383                vars.loadEventHistories(new File(testDataDir), async);
384                final long afterLoad = System.currentTimeMillis();
385                Main.getOut().println("loadEventHistories() returned after "+(afterLoad-start)+"ms.");
386                // Ensure that we can retrieve an (empty) event set instantly that is not actually persisted...
387                assertNotNull(vars.getEventValue(SystemVariables.ACCESSPATTERN_CLICKTHROUGH_BY_URI, EventPeriod.VLONG, rnd.nextBoolean()));
388                Main.getOut().println("retrieved "+SystemVariables.ACCESSPATTERN_CLICKTHROUGH_BY_URI+" value after "+(System.currentTimeMillis()-start)+"ms.");
389                // We should be able to quickly access one of the smaller present histories...
390                assertNotNull(vars.getEventValue(SystemVariables.VOTE_CON, EventPeriod.VLONG, rnd.nextBoolean()));
391                Main.getOut().println("retrieved "+SystemVariables.VOTE_CON+" value after "+(System.currentTimeMillis()-start)+"ms.");
392    //            assertNotNull(vars.getEventValue(SystemVariables.ACCESSPATTERN_PAGEVIEW_BY_URI, EventPeriod.VLONG, rnd.nextBoolean()));
393    //            Main.getOut().println("retrieved "+SystemVariables.ACCESSPATTERN_PAGEVIEW_BY_URI+" value after "+(System.currentTimeMillis()-start)+"ms.");
394                final long afterAccess = System.currentTimeMillis();
395                Main.getOut().println("Time after access: "+(afterAccess-start)+"ms.");
396                }
397            }
398    
399        /**Give some indication of compression available in supplied raw data.
400         * Produces a map from compression level to compressed data,
401         * and can optionally write what it is doing on System.out.
402         * <p>
403         * The input data is not altered, and is returned in the result
404         * under the "NONE" tag.
405         * <p>
406         * Note that not all compression levels may be implemented/tested,
407         * eg due to resource constraints.
408         */
409        public static Map<CompressionLevel, byte[]> showCompression(final byte raw[],
410                                                                    final PrintStream out)
411            // throws IOException
412            {
413            final Map<CompressionLevel, byte[]> result = new HashMap<CompressionLevel, byte[]>();
414            for(final CompressionLevel level : CompressionLevel.values())
415                {
416                final long start = System.currentTimeMillis();
417                try
418                    {
419                    switch(level)
420                        {
421                        case NONE:
422                            {
423                            result.put(CompressionLevel.NONE, raw);
424                            break;
425                            }
426    
427                        case ZIP:
428                            {
429                            final byte zipped[] = FileTools.compressDeflatableData(raw, 0, raw.length);
430                            result.put(CompressionLevel.ZIP, zipped);
431                            break;
432                            }
433    
434                        case BZIP2:
435                            {
436                            final ByteArrayOutputStream baos = new ByteArrayOutputStream(raw.length / 2);
437                            final OutputStream os = new org.apache.tools.bzip2.CBZip2OutputStream(baos, 9);
438                            os.write(raw);
439                            os.close(); // Finish compression.
440                            final byte bzipped[] = baos.toByteArray();
441                            result.put(CompressionLevel.BZIP2, bzipped);
442                            break;
443                            }
444    
445                        default: // Unsupported method, at least here.
446                            break;
447                        }
448                    }
449                catch(final Throwable t)
450                    {
451                    // Unexpected problem...
452                    t.printStackTrace();
453                    System.err.println("HEAP SIZE: " + Runtime.getRuntime().totalMemory());
454                    }
455    
456                if(out != null)
457                    {
458                    final byte[] compData = result.get(level);
459                    if(compData == null)
460                        { out.println("    [Compression not supported at level: "+level+".]"); }
461                    else
462                        {
463                        final long end = System.currentTimeMillis();
464                        final float ratio = compData.length / (float) raw.length;
465                        out.println("    Compressed with method "+level+" to "+compData.length+" bytes; ratio=" + ratio + "; time=" + (end-start) + "ms.");
466                        }
467                    }
468                }
469            return(result);
470            }
471    
472        /**Test generation of hashes for AccessionData.
473         * We test simple streams against some known or previously-computed values.
474         */
475        @SuppressWarnings("unchecked")
476        public static void testAccessionHashes()
477            throws Exception
478            {
479            // Input byte sequences vs expected full-file hashes.
480            final Tuple.Pair/*<byte[], Tuple.Pair<Integer,ROByteArray>>*/ testData[] =
481                {
482                // Zero-length sequence...
483                new Tuple.Pair<byte[], Tuple.Pair<Integer,ROByteArray>>(new byte[0],
484                    new Tuple.Pair<Integer,ROByteArray>(new Integer(0), ROByteArray.fromHexString("d41d8cd98f00b204e9800998ecf8427e"))),
485    
486                // Simple non-zero-length sequence... "abc\n"
487                new Tuple.Pair<byte[], Tuple.Pair<Integer,ROByteArray>>(new byte[]{0x61, 0x62, 0x63, 0x0a},
488                    new Tuple.Pair<Integer,ROByteArray>(new Integer(0x4788814e), ROByteArray.fromHexString("0bee89b07a248e27c83fc3d5951213c1"))),
489    
490                // Simple non-zero-length sequence... "jim"
491                new Tuple.Pair<byte[], Tuple.Pair<Integer,ROByteArray>>(new byte[]{'J', 'i', 'm'},
492                    new Tuple.Pair<Integer,ROByteArray>(new Integer(91233295), ROByteArray.fromHexString("d54b3c8fcd5ba07e47b400e69a287966"))),
493    
494                // Long sequence of zeros...
495                new Tuple.Pair<byte[], Tuple.Pair<Integer,ROByteArray>>(new byte[262144],
496                    new Tuple.Pair<Integer,ROByteArray>(new Integer(0xe20eea22), ROByteArray.fromHexString("ec87a838931d4d5d2e94a04644788a55"))),
497                };
498    
499            for(int i = testData.length; --i >= 0; )
500                {
501                final Tuple.Pair<byte[], Tuple.Pair<Integer,ROByteArray>> d = testData[i];
502    
503                final Tuple.Pair<Integer,ROByteArray> result =
504                    AccessionData.computeFullFileHashes(new ByteArrayInputStream(d.first));
505    
506                assertEquals("Must compute correct hashes", d.second, result);
507                }
508            }
509    
510        /**Check that we can compute diffs between AEPs successfully.
511         * Potentially for every pair of frozen AEPs that we have available,
512         * test that we can generate a diff between them
513         * and that we can recreate a new AEP using the old AEP and a diff.
514         * <p>
515         * It is acceptable for the diff routine to refuse to create a diff/delta
516         * for whatever reason (unless the force flag is true).
517         *
518         * @throws Exception
519         */
520        public static final void testAEPDiffs()
521            throws Exception
522            {
523            // Limit the amount of time that we spend on this test.
524            // Note that there are a lot of subtle issues that this needs to check for.
525            final long stopBy = System.currentTimeMillis() + 1001;
526    
527            boolean doneADiff = false; // Set true when a diff has been created/applied.
528            boolean testedFinalAEP = false; // Set true when the final AEP has been tested in a diff.
529            boolean overrunning; // Set true when we've passed our target deadline and need to finish up ASAP.
530            while((!(overrunning = (System.currentTimeMillis() > stopBy))) || !doneADiff || !testedFinalAEP)
531                {
532    //              if(overrunning) { Main.getOut().println("[Overrunning...]"); }
533    
534                final int numFrozenAEPs = frozenAEPs.size();
535                            // Pick first frozen AEP at random though ensure that we try final AEP first...
536                final int firstAEPIndex = (!testedFinalAEP) ? (numFrozenAEPs-2) : rnd.nextInt(numFrozenAEPs-1);
537                final AllExhibitProperties aep1 =
538                    (AllExhibitProperties) FileTools.deserialiseFromFile(new File(frozenAEPs.get(firstAEPIndex)), true);
539    
540                // Should always be prepared to diff an AEP against itself
541                // (ie should not throw an exception).
542                AllExhibitPropertiesDelta.createDiff(aep1, aep1, false);
543    
544                // Pick second frozen AEP to be following instance.
545                final int secondAEPIndex = firstAEPIndex + 1;
546                // Note that we consider a diff to the final item to be the most interesting.
547                final boolean secondIsLatestAEP = (secondAEPIndex == numFrozenAEPs-1);
548                // Tested diff against latest AEP value stored...
549                if(secondIsLatestAEP) { testedFinalAEP = true; }
550                final AllExhibitProperties aep2 =
551                    (AllExhibitProperties) FileTools.deserialiseFromFile(new File(frozenAEPs.get(secondAEPIndex)), true);
552    
553                // Should always be prepared to diff an AEP against itself
554                // (ie should not throw an exception).
555                AllExhibitPropertiesDelta.createDiff(aep2, aep2, false);
556    
557                Main.getOut().println("About to diff aep1="+aep1+", aep2="+aep2+"...");
558    
559                final boolean force = doneADiff && rnd.nextBoolean(); // Try forcing only once we've tested one unforced diff.
560                final AllExhibitPropertiesDelta aepDiff;
561                try { aepDiff = AllExhibitPropertiesDelta.createDiff(aep1, aep2, force); }
562                catch(final AllExhibitPropertiesDelta.DiffException e)
563                    {
564                    // Refusing to create a diff is OK if the force flag is false.
565                    System.out.println("Did not create diff: " + e.getMessage());
566                    if(force) { fail("must create diff if force==true"); }
567                    continue;
568                    }
569    
570                // Test that the computed diff works correctly.
571                final AllExhibitProperties aep2Synth = AllExhibitPropertiesDelta.applyDiff(aep1, aepDiff);
572                assertTrue("Result of applying a diff must be non-null",
573                                (null != aep2Synth));
574    
575                // Detailed look at some of the reconstructed object.
576                assertEquals("The epgi must be correctly reconstructed",
577                                aep2.epgi, aep2Synth.epgi);
578                assertEquals("The aeid must be correctly reconstructed",
579                                aep2.aeid, aep2Synth.aeid);
580                assertEquals("The longHash must be correctly reconstructed",
581                                aep2.longHash, aep2Synth.longHash);
582    
583                assertEquals("Result of applying a diff must be equal to the original second AEP",
584                                aep2, aep2Synth);
585                assertEquals("The 'unchanged-since' timestamp must be preserved",
586                                aep2.hashNotChangedSince, aep2Synth.hashNotChangedSince);
587    
588                // Show that the diff is not damaged by (de)serialisation.
589                final AllExhibitPropertiesDelta aepDiff2 = (AllExhibitPropertiesDelta)
590                    SerializationTest.checkObjectCanBeSerialisedAndDeserialised(aepDiff);
591                final AllExhibitProperties aep2Synth2 = AllExhibitPropertiesDelta.applyDiff(aep1, aepDiff2);
592                assertTrue("Result of applying a diff must be non-null",
593                                (null != aep2Synth2));
594                assertEquals("Result of applying a diff must be equal to the original second AEP",
595                                aep2, aep2Synth2);
596                assertEquals("The 'unchanged-since' timestamp must be preserved",
597                                aep2.hashNotChangedSince, aep2Synth2.hashNotChangedSince);
598    
599                // If this was an unforced (realistic) diff
600                // then ensure that the serialised diff is (much) smaller than the AEP.
601                if(!force && !aep1.equals(aep2))
602                    {
603                    final ByteArrayOutputStream baosDiff = new ByteArrayOutputStream();
604                    final ObjectOutputStream oosDiff = new ObjectOutputStream(baosDiff);
605                    oosDiff.writeObject(aepDiff);
606                    oosDiff.close();
607    Main.getOut().println("oosDiff size = " + baosDiff.size());
608                    final ByteArrayOutputStream baosAEP = new ByteArrayOutputStream();
609                    final ObjectOutputStream oosAEP = new ObjectOutputStream(baosAEP);
610                    oosAEP.writeObject(aep2);
611                    oosAEP.close();
612    Main.getOut().println("oosAEP size = " + baosAEP.size());
613                    assertTrue("Serialised AEP diff must be smaller than new AEP, "+aepDiff, baosDiff.size() < baosAEP.size());
614    
615                    // Created and tested at least one unforced diff.
616                    doneADiff = true;
617                    }
618                }
619            }
620    
621        /**Historical GZIPped serialised JIndexBean instances, newest last, for checking backward compatibility. */
622        public static final List<String> JIB_SER_GZ_FILES = Collections.unmodifiableList(Arrays.asList(new String[]{
623                testDataDir + "/_cached_byWord_Index.20060818.ser.gz" /* Matches AEP dump for cross-testing. */,
624            }));
625    
626        /**Test that deserialisation of old JIB frozen instances is still possible.
627         * While we don't promise that the serialised form of a JIndexBean will be long-lived
628         * (and it may always be relatively easy to regenerate from current input data)
629         * checking that we don't accidentally break something is useful,
630         * and this also allows us to check how fast and memory-efficient new versions are.
631         */
632        public static void testJIBLegacyDeser()
633            throws Exception
634            {
635            // Try to read in each old serialised JIB in turn...
636            for(final String fn : JIB_SER_GZ_FILES)
637                {
638                final File f = new File(fn);
639                final long len = f.length();
640    System.out.println("[Loading JIB from "+f+" length "+len+"...]");
641                final AtomicInteger bytesIn = new AtomicInteger();
642                final ObjectInputStream ois = new ObjectInputStream(new FilterInputStream(
643                    new GZIPInputStream(new BufferedInputStream(new FileInputStream(f)))){
644                        @Override public boolean markSupported() { return(false); /* Prevent rewinding. */ }
645                        @Override public int read() throws IOException
646                            {
647                            final int result = in.read();
648                            if(result >= 0) { bytesIn.incrementAndGet(); }
649                            return(result);
650                            }
651                        @Override public int read(final byte[] b, final int off, final int len) throws IOException
652                            {
653                            final int result = in.read(b, off, len);
654                            if(result > 0) { bytesIn.addAndGet(result); }
655                            return(result);
656                            }
657                        @Override public int read(final byte[] b) throws IOException
658                            {
659                            final int result = in.read(b);
660                            if(result > 0) { bytesIn.addAndGet(result); }
661                            return(result);
662                            }
663                        });
664                final JIndexBean jib;
665                try { jib = (JIndexBean) ois.readObject(); }
666                finally { ois.close(); }
667    System.out.println("  [JIB key count = "+jib.keyCount()+", doc count = "+jib.docCount()+".]");
668                jib.compact(); // Force a re-compaction of the JIB.
669                final ByteArrayOutputStream baos = new ByteArrayOutputStream(bytesIn.get());
670                final ObjectOutputStream oos = new ObjectOutputStream(baos);
671                oos.writeObject(jib);
672                oos.close();
673                System.out.println("  [Input/old JIB raw serialised bytes: in = "+bytesIn.get()+", out = "+baos.size()+".]");
674                }
675            }
676    
677    
678        /**Private source of OK pseudo-random numbers. */
679        private static final Random rnd = new Random();
680        }