001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.webSvr.catalogue;
031
032 import java.io.BufferedReader;
033 import java.io.EOFException;
034 import java.io.IOException;
035 import java.io.InputStream;
036 import java.io.InputStreamReader;
037 import java.util.ArrayList;
038 import java.util.Collections;
039 import java.util.List;
040
041 import org.hd.d.pg2k.svrCore.AllExhibitProperties;
042 import org.hd.d.pg2k.svrCore.Compact7BitString;
043 import org.hd.d.pg2k.svrCore.CoreConsts;
044 import org.hd.d.pg2k.svrCore.ExhibitName;
045 import org.hd.d.pg2k.svrCore.ExhibitStaticAttr;
046 import org.hd.d.pg2k.svrCore.FileTools;
047 import org.hd.d.pg2k.svrCore.Name;
048 import org.hd.d.pg2k.svrCore.SimpleLoggerIF;
049 import org.hd.d.pg2k.svrCore.Tuple;
050 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME;
051 import org.hd.d.pg2k.svrCore.MIME.ExhibitMIME.ExhibitTypeParameters;
052 import org.hd.d.pg2k.svrCore.datasource.SimpleExhibitPipelineIF;
053
054 import ORG.hd.d.IsDebug;
055
056 /**Immutable representation of data for one trail/sequence of exhibits and commentary.
057 * These instances are not intended to be persisted,
058 * so the class does not implement java.io.Serializable.
059 */
060 public final class TrailData
061 {
062 /**Parse an instance from a 7-bit XHTML-safe ASCII text byte stream.
063 * This attempts to be generous, and work around as many problems as possible,
064 * but may give up without grace on, for example,
065 * finding no title or body, or on encountering non-7-bit characters.
066 * <p>
067 * This does not close its input stream.
068 * <p>
069 * Should be able to parse input with UNIX (\r aka LF)
070 * or Windows (\r\n aka CRLF) line termination,
071 * or even a mixture.
072 * <p>
073 * The first line must start "@ " followed by the HTML-safe title.
074 * <p>
075 * Each image/exhibit to be inserted should be on a line starting "* "
076 * followed by the exhibit short name and
077 * then an optional space and HTML-safe comment.
078 * The exhibit short names are held as Name values
079 * and are only checked for syntax.
080 * <p>
081 * Each blank (whitespace-only) line is treated as a paragraph boundary.
082 *
083 * @param in 7-bit ASCII line-oriented input stream; non-null
084 *
085 * @throws IOException in case of input error
086 * @throws IllegalArgumentException in case of corrupt input too garbled to handle
087 */
088 public static TrailData parseFromByteStream(final InputStream in)
089 throws IOException
090 {
091 final BufferedReader br = new BufferedReader(
092 new InputStreamReader(in, CoreConsts.FILE_ENCODING_ASCII7));
093
094 // Extract title from first line (and trim leading/trailing whitespace).
095 final String rawTitle = br.readLine();
096 if((rawTitle == null) || !rawTitle.startsWith("@ "))
097 { throw new IllegalArgumentException("Missing title: must be first line, prefixed with '@ '"); }
098 final Compact7BitString title = Compact7BitString.convertToCompact7BitString(
099 rawTitle.substring(2).trim(), null);
100
101 // Name (short) of current/previous exhibit, initially null.
102 Name currentExhibitShortName = null;
103 Name previousExhibitShortName = null;
104 // Text currently being collected.
105 final StringBuilder text = new StringBuilder(256);
106
107 // Read body.
108 final List<Tuple.Pair<Name,Compact7BitString>> body = new ArrayList<Tuple.Pair<Name,Compact7BitString>>(32);
109 for( ; ; )
110 {
111 final String inputLine = br.readLine();
112 final boolean EOF = (inputLine == null);
113 final String trimmed = EOF ? "" : inputLine.trim();
114 final boolean blankLine = (!EOF) && trimmed.isEmpty();
115 final boolean emptyText = (text.length() == 0);
116 if(EOF || (blankLine && !emptyText) || inputLine.startsWith("* "))
117 {
118 // End of previous exhibit/text combo,
119 // and possibly the start of a new one.
120 if((currentExhibitShortName != null) || !emptyText)
121 {
122 // Push non-empty trailing item onto end of body.
123 body.add(new Tuple.Pair<Name, Compact7BitString>(currentExhibitShortName,
124 emptyText ? null : Compact7BitString.convertToCompact7BitString(text.toString(), null)));
125
126 // Clear text out and current name ready to collect text for next entry.
127 text.setLength(0);
128 currentExhibitShortName = null;
129 }
130 // Stop at EOF...
131 if(EOF) { break; }
132 // Do nothing more for a blank input line.
133 if(blankLine) { continue; }
134
135 // Extract exhibit and any comment.
136 // Treat line as three whitespace-separated fields thus:
137 // '*' shortName [optional-comment]
138 // where the comment MAY get used as a caption in future.
139 // The shortName may be replaced by -n- simply to change the number-in-sequence.
140 final String[] values = inputLine.split("[ \t]+");
141 // Skip a malformed exhibit line.
142 if(values.length < 2)
143 { continue; }
144 // If we manage to extract a valid short exhibit name then continue.
145 final String sen = values[1];
146 if(ExhibitName.validNameFinalComponentSyntax(sen))
147 {
148 currentExhibitShortName = previousExhibitShortName =
149 // We hope that there may be similarity with the previous name...
150 Name.create(sen, previousExhibitShortName);
151 continue;
152 }
153
154 // Try to parse -nnn- number-in-sequence value
155 // if we have a 'previous' exhibit name to hand.
156 // Only accept strictly-positive values of n.
157 if((previousExhibitShortName != null) &&
158 sen.startsWith("-") && sen.endsWith("-"))
159 {
160 try
161 {
162 final int n = Integer.parseInt(sen.substring(1, sen.length()-1), 10);
163 if(n > 0)
164 {
165 // Construct new short name using supplied sequence number...
166 final String mainWords = ExhibitName.getMainWordsComponentFromShortName(previousExhibitShortName, Collections.<String>emptySet()).toString();
167 final String author = ExhibitName.getAuthorComponent(previousExhibitShortName).toString();
168 final String extn = ExhibitName.getExtensionComponent(previousExhibitShortName).toString();
169 final String newShortName =
170 mainWords + ExhibitName.WORD_SEP +
171 n + ExhibitName.WORD_SEP +
172 author + '.' +
173 extn;
174 assert(ExhibitName.validNameFinalComponentSyntax(newShortName));
175 currentExhibitShortName = previousExhibitShortName =
176 // We hope that there may be similarity with the previous name...
177 Name.create(newShortName, previousExhibitShortName);
178 // Use new short name...
179 continue;
180 }
181 }
182 catch(final NumberFormatException e) { /* Silently ignore and mop up error. */ }
183 }
184
185 // Parse error: could not understand the input.
186 // Quietly ignore this unparsable exhibit line for robustness,
187 // but wipe out the old exhibit name as a rip-stop.
188 currentExhibitShortName = previousExhibitShortName = null;
189 continue;
190 }
191
192 // Blank lines do not contribute to the result text directly.
193 if(blankLine) { continue; }
194
195 // Treat this as a line of text for the exhibit.
196 // Implicitly add whitespace after any extant text.
197 // Append the whitespace-trimmed input line...
198 if(!emptyText) { text.append(' '); }
199 text.append(trimmed);
200 }
201
202 // We must see at least one exhibit or text para.
203 if(body.isEmpty())
204 { throw new EOFException("no body text/exhibits found"); }
205
206 return(new TrailData(title, body));
207 }
208
209 /**Create instance from pre-parsed data.
210 * Marked 'private' since only intended to be called from factory methods.
211 *
212 * @param title 7-bit XHTML-safe markup-free (except for UNICODE character entities);
213 * non-null, non-empty, no longer than MAX_TITLE_LEN characters/bytes
214 * @param body sequence of exhibit short names (and Name) with associated (7-bit XHTML-safe) texts
215 * (either but not both of each pair may be null, and neither of any pair can be empty);
216 * non-null, non-empty
217 */
218 private TrailData(final Compact7BitString title,
219 final List<Tuple.Pair<Name,Compact7BitString>> body)
220 {
221 if(title == null)
222 { throw new IllegalArgumentException("null title not permitted"); }
223 final int titleLen = title.toString().length();
224 if((titleLen < 1) || (titleLen > MAX_TITLE_LEN))
225 { throw new IllegalArgumentException("title length invalid"); }
226 if((body == null) || body.isEmpty())
227 { throw new IllegalArgumentException("null/empty body"); }
228
229 // Take defensive copy of body (to be able to enforce immutability, etc).
230 final List<Tuple.Pair<Name,Compact7BitString>> bodyCopy = new ArrayList<Tuple.Pair<Name,Compact7BitString>>(body);
231 // Check each member of the list for validity.
232 for(final Tuple.Pair<Name,Compact7BitString> entry : bodyCopy)
233 {
234 if((entry.first == null) && (entry.second == null))
235 { throw new IllegalArgumentException("no body entry can have the exhibit and text null"); }
236 if((entry.first != null) && !ExhibitName.validNameFinalComponentSyntax(entry.first))
237 { throw new IllegalArgumentException("invalid short exhibit name"); }
238 if((entry.second != null) && entry.second.isEmpty())
239 { throw new IllegalArgumentException("invalid empty body text entry"); }
240 }
241
242 this.title = title;
243 this.body = bodyCopy;
244 }
245
246 /**Maximum title length in raw bytes/characters; strictly positive.
247 * On the generous side to allow for bulky character entities in non-English titles.
248 */
249 public static final int MAX_TITLE_LEN = 128;
250
251 /**Title for trail (immutable); never null nor empty.
252 * Immutable 7-bit XHTML/HTML-safe title with no mark-up except UNICODE character entities.
253 */
254 public final Compact7BitString title;
255
256 /**Body of trail (immutable); never null nor empty.
257 * Either but not both of each pair may be null, and neither of any pair can be empty.
258 */
259 public final List<Tuple.Pair<Name,Compact7BitString>> body;
260
261 /**Read trail data from specified trail exhibit (full) name, or null if not available.
262 * Guaranteed not to throw any Exception except for null arguments; useful for JSPs.
263 */
264 public static final TrailData readTrailFromExhibit(final SimpleExhibitPipelineIF dataSource,
265 final Name.ExhibitFull trailExhibitName,
266 final SimpleLoggerIF log)
267 {
268 if((dataSource == null) || (trailExhibitName == null) || (log == null))
269 { throw new IllegalArgumentException(); }
270
271 try
272 {
273 // Return null immediately if the exhibit file type is wrong.
274 final ExhibitTypeParameters fileType = ExhibitMIME.getInputFileType(trailExhibitName);
275 if((fileType == null) || (fileType.type != ExhibitMIME.ET_TRML))
276 { return(null); }
277
278 final AllExhibitProperties aep = dataSource.getAllExhibitProperties(-1);
279 // Return null immediately if the exhibit does not exist.
280 final ExhibitStaticAttr esa = aep.aeid.getStaticAttr(trailExhibitName);
281 if(esa == null) { return(null); }
282
283 // Wrap the exhibit as a stream...
284 final AllExhibitProperties.ExhibitDataSource eds =
285 FileTools.wrapExhibitAsStream(dataSource);
286 // new AllExhibitProperties.ExhibitDataSource(){
287 // @Override
288 // public final void getRawFile(final ByteBuffer buf, final String exhibitName, final int position) throws IOException
289 // { dataSource.getRawFile(buf, exhibitName, position, false); }
290 // };
291
292 // Now attempt to parse the exhibit as trail data...
293 return(parseFromByteStream(eds.getInputStream(esa)));
294 }
295 catch(final Exception e)
296 {
297 if(IsDebug.isDebug) { e.printStackTrace(); }
298 log.log("readTrailFromExhibit() failed for "+trailExhibitName+": "+e.getMessage());
299 return(null); // Safe return value.
300 }
301 }
302 }