001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.webSvr.catalogue;
031    
032    import java.io.BufferedReader;
033    import java.io.EOFException;
034    import java.io.IOException;
035    import java.io.InputStream;
036    import java.io.InputStreamReader;
037    import java.util.ArrayList;
038    import java.util.Collections;
039    import java.util.List;
040    
041    import org.hd.d.pg2k.svrCore.AllExhibitProperties;
042    import org.hd.d.pg2k.svrCore.Compact7BitString;
043    import org.hd.d.pg2k.svrCore.CoreConsts;
044    import org.hd.d.pg2k.svrCore.ExhibitName;
045    import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
046    import org.hd.d.pg2k.svrCore.FileTools;
047    import org.hd.d.pg2k.svrCore.Name;
048    import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
049    import org.hd.d.pg2k.svrCore.Tuple;
050    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
051    import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME.ExhibitTypeParameters;
052    import org.hd.d.pg2k.svrCore.datasource.SimpleExhibitPipelineIF;
053    
054    import ORG.hd.d.IsDebug;
055    
056    /**Immutable representation of data for one trail/sequence of exhibits and commentary.
057     * These instances are not intended to be persisted,
058     * so the class does not implement java.io.Serializable.
059     */
060    public final class TrailData
061        {
062        /**Parse an instance from a 7-bit XHTML-safe ASCII text byte stream.
063         * This attempts to be generous, and work around as many problems as possible,
064         * but may give up without grace on, for example,
065         * finding no title or body, or on encountering non-7-bit characters.
066         * <p>
067         * This does not close its input stream.
068         * <p>
069         * Should be able to parse input with UNIX (\r aka LF)
070         * or Windows (\r\n aka CRLF) line termination,
071         * or even a mixture.
072         * <p>
073         * The first line must start "@ " followed by the HTML-safe title.
074         * <p>
075         * Each image/exhibit to be inserted should be on a line starting "* "
076         * followed by the exhibit short name and
077         * then an optional space and HTML-safe comment.
078         * The exhibit short names are held as Name values
079         * and are only checked for syntax.
080         * <p>
081         * Each blank (whitespace-only) line is treated as a paragraph boundary.
082         *
083         * @param in  7-bit ASCII line-oriented input stream; non-null
084         *
085         * @throws IOException  in case of input error
086         * @throws IllegalArgumentException  in case of corrupt input too garbled to handle
087         */
088        public static TrailData parseFromByteStream(final InputStream in)
089            throws IOException
090            {
091            final BufferedReader br = new BufferedReader(
092                new InputStreamReader(in, CoreConsts.FILE_ENCODING_ASCII7));
093    
094            // Extract title from first line (and trim leading/trailing whitespace).
095            final String rawTitle = br.readLine();
096            if((rawTitle == null) || !rawTitle.startsWith("@ "))
097                { throw new IllegalArgumentException("Missing title: must be first line, prefixed with '@ '"); }
098            final Compact7BitString title = Compact7BitString.convertToCompact7BitString(
099                rawTitle.substring(2).trim(), null);
100    
101            // Name (short) of current/previous exhibit, initially null.
102            Name currentExhibitShortName = null;
103            Name previousExhibitShortName = null;
104            // Text currently being collected.
105            final StringBuilder text = new StringBuilder(256);
106    
107            // Read body.
108            final List<Tuple.Pair<Name,Compact7BitString>> body = new ArrayList<Tuple.Pair<Name,Compact7BitString>>(32);
109            for( ; ; )
110                {
111                final String inputLine = br.readLine();
112                final boolean EOF = (inputLine == null);
113                final String trimmed = EOF ? "" : inputLine.trim();
114                final boolean blankLine = (!EOF) && trimmed.isEmpty();
115                final boolean emptyText = (text.length() == 0);
116                if(EOF || (blankLine && !emptyText) || inputLine.startsWith("* "))
117                    {
118                    // End of previous exhibit/text combo,
119                    // and possibly the start of a new one.
120                    if((currentExhibitShortName != null) || !emptyText)
121                        {
122                        // Push non-empty trailing item onto end of body.
123                        body.add(new Tuple.Pair<Name, Compact7BitString>(currentExhibitShortName,
124                            emptyText ? null : Compact7BitString.convertToCompact7BitString(text.toString(), null)));
125    
126                        // Clear text out and current name ready to collect text for next entry.
127                        text.setLength(0);
128                        currentExhibitShortName = null;
129                        }
130                    // Stop at EOF...
131                    if(EOF) { break; }
132                    // Do nothing more for a blank input line.
133                    if(blankLine) { continue; }
134    
135                    // Extract exhibit and any comment.
136                    // Treat line as three whitespace-separated fields thus:
137                    // '*' shortName [optional-comment]
138                    // where the comment MAY get used as a caption in future.
139                    // The shortName may be replaced by -n- simply to change the number-in-sequence.
140                    final String[] values = inputLine.split("[ \t]+");
141                    // Skip a malformed exhibit line.
142                    if(values.length < 2)
143                        { continue; }
144                    // If we manage to extract a valid short exhibit name then continue.
145                    final String sen = values[1];
146                    if(ExhibitName.validNameFinalComponentSyntax(sen))
147                        {
148                        currentExhibitShortName = previousExhibitShortName =
149                            // We hope that there may be similarity with the previous name...
150                            Name.create(sen, previousExhibitShortName);
151                        continue;
152                        }
153    
154                    // Try to parse -nnn- number-in-sequence value
155                    // if we have a 'previous' exhibit name to hand.
156                    // Only accept strictly-positive values of n.
157                    if((previousExhibitShortName != null) &&
158                       sen.startsWith("-") && sen.endsWith("-"))
159                        {
160                        try
161                            {
162                            final int n = Integer.parseInt(sen.substring(1, sen.length()-1), 10);
163                            if(n > 0)
164                                {
165                                // Construct new short name using supplied sequence number...
166                                final String mainWords = ExhibitName.getMainWordsComponentFromShortName(previousExhibitShortName, Collections.<String>emptySet()).toString();
167                                final String author = ExhibitName.getAuthorComponent(previousExhibitShortName).toString();
168                                final String extn = ExhibitName.getExtensionComponent(previousExhibitShortName).toString();
169                                final String newShortName =
170                                    mainWords + ExhibitName.WORD_SEP +
171                                    n + ExhibitName.WORD_SEP +
172                                    author + '.' +
173                                    extn;
174                                assert(ExhibitName.validNameFinalComponentSyntax(newShortName));
175                                currentExhibitShortName = previousExhibitShortName =
176                                    // We hope that there may be similarity with the previous name...
177                                    Name.create(newShortName, previousExhibitShortName);
178                                // Use new short name...
179                                continue;
180                                }
181                            }
182                        catch(final NumberFormatException e) { /* Silently ignore and mop up error. */ }
183                        }
184    
185                    // Parse error: could not understand the input.
186                    // Quietly ignore this unparsable exhibit line for robustness,
187                    // but wipe out the old exhibit name as a rip-stop.
188                    currentExhibitShortName = previousExhibitShortName = null;
189                    continue;
190                    }
191    
192                // Blank lines do not contribute to the result text directly.
193                if(blankLine) { continue; }
194    
195                // Treat this as a line of text for the exhibit.
196                // Implicitly add whitespace after any extant text.
197                // Append the whitespace-trimmed input line...
198                if(!emptyText) { text.append(' '); }
199                text.append(trimmed);
200                }
201    
202            // We must see at least one exhibit or text para.
203            if(body.isEmpty())
204                { throw new EOFException("no body text/exhibits found"); }
205    
206            return(new TrailData(title, body));
207            }
208    
209        /**Create instance from pre-parsed data.
210         * Marked 'private' since only intended to be called from factory methods.
211         *
212         * @param title  7-bit XHTML-safe markup-free (except for UNICODE character entities);
213         *     non-null, non-empty, no longer than MAX_TITLE_LEN characters/bytes
214         * @param body  sequence of exhibit short names (and Name) with associated (7-bit XHTML-safe) texts
215         *     (either but not both of each pair may be null, and neither of any pair can be empty);
216         *     non-null, non-empty
217         */
218        private TrailData(final Compact7BitString title,
219                          final List<Tuple.Pair<Name,Compact7BitString>> body)
220            {
221            if(title == null)
222                { throw new IllegalArgumentException("null title not permitted"); }
223            final int titleLen = title.toString().length();
224            if((titleLen < 1) || (titleLen > MAX_TITLE_LEN))
225                { throw new IllegalArgumentException("title length invalid"); }
226            if((body == null) || body.isEmpty())
227                { throw new IllegalArgumentException("null/empty body"); }
228    
229            // Take defensive copy of body (to be able to enforce immutability, etc).
230            final List<Tuple.Pair<Name,Compact7BitString>> bodyCopy = new ArrayList<Tuple.Pair<Name,Compact7BitString>>(body);
231            // Check each member of the list for validity.
232            for(final Tuple.Pair<Name,Compact7BitString> entry : bodyCopy)
233                {
234                if((entry.first == null) && (entry.second == null))
235                    { throw new IllegalArgumentException("no body entry can have the exhibit and text null"); }
236                if((entry.first != null) && !ExhibitName.validNameFinalComponentSyntax(entry.first))
237                    { throw new IllegalArgumentException("invalid short exhibit name"); }
238                if((entry.second != null) && entry.second.isEmpty())
239                    { throw new IllegalArgumentException("invalid empty body text entry"); }
240                }
241    
242            this.title = title;
243            this.body = bodyCopy;
244            }
245    
246        /**Maximum title length in raw bytes/characters; strictly positive.
247         * On the generous side to allow for bulky character entities in non-English titles.
248         */
249        public static final int MAX_TITLE_LEN = 128;
250    
251        /**Title for trail (immutable); never null nor empty.
252         * Immutable 7-bit XHTML/HTML-safe title with no mark-up except UNICODE character entities.
253         */
254        public final Compact7BitString title;
255    
256        /**Body of trail (immutable); never null nor empty.
257         * Either but not both of each pair may be null, and neither of any pair can be empty.
258         */
259        public final List<Tuple.Pair<Name,Compact7BitString>> body;
260    
261        /**Read trail data from specified trail exhibit (full) name, or null if not available.
262         * Guaranteed not to throw any Exception except for null arguments; useful for JSPs.
263         */
264        public static final TrailData readTrailFromExhibit(final SimpleExhibitPipelineIF dataSource,
265                                                           final Name.ExhibitFull trailExhibitName,
266                                                           final SimpleLoggerIF log)
267            {
268            if((dataSource == null) || (trailExhibitName == null) || (log == null))
269                { throw new IllegalArgumentException(); }
270    
271            try
272                {
273                // Return null immediately if the exhibit file type is wrong.
274                final ExhibitTypeParameters fileType = ExhibitMIME.getInputFileType(trailExhibitName);
275                if((fileType == null) || (fileType.type != ExhibitMIME.ET_TRML))
276                    { return(null); }
277    
278                final AllExhibitProperties aep = dataSource.getAllExhibitProperties(-1);
279                // Return null immediately if the exhibit does not exist.
280                final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(trailExhibitName);
281                if(esa == null) { return(null); }
282    
283                // Wrap the exhibit as a stream...
284                final AllExhibitProperties.ExhibitDataSource eds =
285                    FileTools.wrapExhibitAsStream(dataSource);
286    //                new AllExhibitProperties.ExhibitDataSource(){
287    //                    @Override
288    //                    public final void getRawFile(final ByteBuffer buf, final String exhibitName, final int position) throws IOException
289    //                        { dataSource.getRawFile(buf, exhibitName, position, false); }
290    //                    };
291    
292                // Now attempt to parse the exhibit as trail data...
293                return(parseFromByteStream(eds.getInputStream(esa)));
294                }
295            catch(final Exception e)
296                {
297    if(IsDebug.isDebug) { e.printStackTrace(); }
298                log.log("readTrailFromExhibit() failed for "+trailExhibitName+": "+e.getMessage());
299                return(null); // Safe return value.
300                }
301            }
302        }