001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: ToHTMLStream.java 1225444 2011-12-29 05:52:39Z mrglavas $
020     */
021    package org.apache.xml.serializer;
022    
023    import java.io.IOException;
024    import java.util.Properties;
025    
026    import javax.xml.transform.Result;
027    
028    import org.apache.xml.serializer.utils.MsgKey;
029    import org.apache.xml.serializer.utils.Utils;
030    import org.xml.sax.Attributes;
031    import org.xml.sax.SAXException;
032    
033    /**
034     * This serializer takes a series of SAX or
035     * SAX-like events and writes its output
036     * to the given stream.
037     * 
038     * This class is not a public API, it is public
039     * because it is used from another package.
040     * 
041     * @xsl.usage internal
042     */
043    public class ToHTMLStream extends ToStream 
044    {
045    
046        /** This flag is set while receiving events from the DTD */
047        protected boolean m_inDTD = false;
048    
049        /** True if the current element is a block element.  (seems like 
050         *  this needs to be a stack. -sb). */
051        private boolean m_inBlockElem = false;
052    
053        /**
054         * Map that tells which XML characters should have special treatment, and it
055         *  provides character to entity name lookup.
056         */
057        private final CharInfo m_htmlcharInfo =
058    //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
059            CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
060    
061        /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
062        static final Trie m_elementFlags = new Trie();
063    
064        static {
065            initTagReference(m_elementFlags);
066        }
067        static void initTagReference(Trie m_elementFlags) {
068    
069            // HTML 4.0 loose DTD
070            m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
071            m_elementFlags.put(
072                "FRAME",
073                new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
074            m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
075            m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
076            m_elementFlags.put(
077                "ISINDEX",
078                new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
079            m_elementFlags.put(
080                "APPLET",
081                new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
082            m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
083            m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
084            m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
085    
086            // HTML 4.0 strict DTD
087            m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
088            m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
089            m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
090            m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
091            m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
092            m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
093            m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
094            m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
095            m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
096            m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
097            m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
098            m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
099            m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
100            m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
101            m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
102            m_elementFlags.put(
103                "SUP",
104                new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
105            m_elementFlags.put(
106                "SUB",
107                new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
108            m_elementFlags.put(
109                "SPAN",
110                new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
111            m_elementFlags.put(
112                "BDO",
113                new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
114            m_elementFlags.put(
115                "BR",
116                new ElemDesc(
117                    0
118                        | ElemDesc.SPECIAL
119                        | ElemDesc.ASPECIAL
120                        | ElemDesc.EMPTY
121                        | ElemDesc.BLOCK));
122            m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
123            m_elementFlags.put(
124                "ADDRESS",
125                new ElemDesc(
126                    0
127                        | ElemDesc.BLOCK
128                        | ElemDesc.BLOCKFORM
129                        | ElemDesc.BLOCKFORMFIELDSET));
130            m_elementFlags.put(
131                "DIV",
132                new ElemDesc(
133                    0
134                        | ElemDesc.BLOCK
135                        | ElemDesc.BLOCKFORM
136                        | ElemDesc.BLOCKFORMFIELDSET));
137            m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
138            m_elementFlags.put(
139                "MAP",
140                new ElemDesc(
141                    0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
142            m_elementFlags.put(
143                "AREA",
144                new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
145            m_elementFlags.put(
146                "LINK",
147                new ElemDesc(
148                    0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
149            m_elementFlags.put(
150                "IMG",
151                new ElemDesc(
152                    0
153                        | ElemDesc.SPECIAL
154                        | ElemDesc.ASPECIAL
155                        | ElemDesc.EMPTY
156                        | ElemDesc.WHITESPACESENSITIVE));
157            m_elementFlags.put(
158                "OBJECT",
159                new ElemDesc(
160                    0
161                        | ElemDesc.SPECIAL
162                        | ElemDesc.ASPECIAL
163                        | ElemDesc.HEADMISC
164                        | ElemDesc.WHITESPACESENSITIVE));
165            m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
166            m_elementFlags.put(
167                "HR",
168                new ElemDesc(
169                    0
170                        | ElemDesc.BLOCK
171                        | ElemDesc.BLOCKFORM
172                        | ElemDesc.BLOCKFORMFIELDSET
173                        | ElemDesc.EMPTY));
174            m_elementFlags.put(
175                "P",
176                new ElemDesc(
177                    0
178                        | ElemDesc.BLOCK
179                        | ElemDesc.BLOCKFORM
180                        | ElemDesc.BLOCKFORMFIELDSET));
181            m_elementFlags.put(
182                "H1",
183                new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
184            m_elementFlags.put(
185                "H2",
186                new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
187            m_elementFlags.put(
188                "H3",
189                new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
190            m_elementFlags.put(
191                "H4",
192                new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
193            m_elementFlags.put(
194                "H5",
195                new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
196            m_elementFlags.put(
197                "H6",
198                new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
199            m_elementFlags.put(
200                "PRE",
201                new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
202            m_elementFlags.put(
203                "Q",
204                new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
205            m_elementFlags.put(
206                "BLOCKQUOTE",
207                new ElemDesc(
208                    0
209                        | ElemDesc.BLOCK
210                        | ElemDesc.BLOCKFORM
211                        | ElemDesc.BLOCKFORMFIELDSET));
212            m_elementFlags.put("INS", new ElemDesc(0));
213            m_elementFlags.put("DEL", new ElemDesc(0));
214            m_elementFlags.put(
215                "DL",
216                new ElemDesc(
217                    0
218                        | ElemDesc.BLOCK
219                        | ElemDesc.BLOCKFORM
220                        | ElemDesc.BLOCKFORMFIELDSET));
221            m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
222            m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
223            m_elementFlags.put(
224                "OL",
225                new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
226            m_elementFlags.put(
227                "UL",
228                new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
229            m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
230            m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
231            m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
232            m_elementFlags.put(
233                "INPUT",
234                new ElemDesc(
235                    0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
236            m_elementFlags.put(
237                "SELECT",
238                new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
239            m_elementFlags.put("OPTGROUP", new ElemDesc(0));
240            m_elementFlags.put("OPTION", new ElemDesc(0));
241            m_elementFlags.put(
242                "TEXTAREA",
243                new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
244            m_elementFlags.put(
245                "FIELDSET",
246                new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
247            m_elementFlags.put("LEGEND", new ElemDesc(0));
248            m_elementFlags.put(
249                "BUTTON",
250                new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
251            m_elementFlags.put(
252                "TABLE",
253                new ElemDesc(
254                    0
255                        | ElemDesc.BLOCK
256                        | ElemDesc.BLOCKFORM
257                        | ElemDesc.BLOCKFORMFIELDSET));
258            m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
259            m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
260            m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
261            m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
262            m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
263            m_elementFlags.put(
264                "COL",
265                new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
266            m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
267            m_elementFlags.put("TH", new ElemDesc(0));
268            m_elementFlags.put("TD", new ElemDesc(0));
269            m_elementFlags.put(
270                "HEAD",
271                new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
272            m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
273            m_elementFlags.put(
274                "BASE",
275                new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
276            m_elementFlags.put(
277                "META",
278                new ElemDesc(
279                    0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
280            m_elementFlags.put(
281                "STYLE",
282                new ElemDesc(
283                    0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
284            m_elementFlags.put(
285                "SCRIPT",
286                new ElemDesc(
287                    0
288                        | ElemDesc.SPECIAL
289                        | ElemDesc.ASPECIAL
290                        | ElemDesc.HEADMISC
291                        | ElemDesc.RAW));
292            m_elementFlags.put(
293                "NOSCRIPT",
294                new ElemDesc(
295                    0
296                        | ElemDesc.BLOCK
297                        | ElemDesc.BLOCKFORM
298                        | ElemDesc.BLOCKFORMFIELDSET));
299            m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM));
300    
301            // From "John Ky" <hand@syd.speednet.com.au
302            // Transitional Document Type Definition ()
303            // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
304            m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
305    
306            // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
307            m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
308            m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
309    
310            // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
311            m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
312    
313            // From "John Ky" <hand@syd.speednet.com.au
314            m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
315    
316            // HTML 4.0, section 16.5
317            m_elementFlags.put(
318                "IFRAME",
319                new ElemDesc(
320                    0
321                        | ElemDesc.BLOCK
322                        | ElemDesc.BLOCKFORM
323                        | ElemDesc.BLOCKFORMFIELDSET));
324                        
325            // Netscape 4 extension
326            m_elementFlags.put(
327                "LAYER",
328                new ElemDesc(
329                    0
330                        | ElemDesc.BLOCK
331                        | ElemDesc.BLOCKFORM
332                        | ElemDesc.BLOCKFORMFIELDSET));
333            // Netscape 4 extension                    
334            m_elementFlags.put(
335                "ILAYER",
336                new ElemDesc(
337                    0
338                        | ElemDesc.BLOCK
339                        | ElemDesc.BLOCKFORM
340                        | ElemDesc.BLOCKFORMFIELDSET));
341    
342            // NOW FOR ATTRIBUTE INFORMATION . . .
343            ElemDesc elemDesc;
344    
345    
346            // ----------------------------------------------
347            elemDesc = (ElemDesc) m_elementFlags.get("a");
348            elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
349            elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
350            
351            // ----------------------------------------------
352            elemDesc = (ElemDesc) m_elementFlags.get("area");
353    
354            elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
355            elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
356    
357            // ----------------------------------------------
358            elemDesc = (ElemDesc) m_elementFlags.get("base");
359    
360            elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
361    
362            // ----------------------------------------------
363            elemDesc = (ElemDesc) m_elementFlags.get("button");
364            elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
365    
366            // ----------------------------------------------
367            elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
368    
369            elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
370    
371            // ----------------------------------------------
372            elemDesc = (ElemDesc) m_elementFlags.get("del");
373            elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
374    
375            // ----------------------------------------------
376            elemDesc = (ElemDesc) m_elementFlags.get("dir");
377            elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
378               
379            // ----------------------------------------------
380            
381            elemDesc = (ElemDesc) m_elementFlags.get("div");
382            elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
383            elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
384       
385            // ----------------------------------------------        
386            elemDesc = (ElemDesc) m_elementFlags.get("dl");
387            elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
388               
389            // ----------------------------------------------
390            elemDesc = (ElemDesc) m_elementFlags.get("form");
391            elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
392    
393            // ----------------------------------------------
394            // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
395            elemDesc = (ElemDesc) m_elementFlags.get("frame");
396            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
397            elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
398            elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
399    
400            // ----------------------------------------------
401            elemDesc = (ElemDesc) m_elementFlags.get("head");
402            elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
403    
404            // ----------------------------------------------        
405            elemDesc = (ElemDesc) m_elementFlags.get("hr");
406            elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
407            
408            // ----------------------------------------------
409            // HTML 4.0, section 16.5
410            elemDesc = (ElemDesc) m_elementFlags.get("iframe");
411            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
412            elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
413    
414            // ----------------------------------------------
415            // Netscape 4 extension
416            elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
417            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
418    
419            // ----------------------------------------------
420            elemDesc = (ElemDesc) m_elementFlags.get("img");
421            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
422            elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
423            elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
424            elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
425    
426            // ----------------------------------------------
427            elemDesc = (ElemDesc) m_elementFlags.get("input");
428    
429            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
430            elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
431            elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
432            elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
433            elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
434            elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
435            
436            // ----------------------------------------------
437            elemDesc = (ElemDesc) m_elementFlags.get("ins");
438            elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
439    
440            // ----------------------------------------------
441            // Netscape 4 extension
442            elemDesc = (ElemDesc) m_elementFlags.get("layer");
443            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
444    
445            // ----------------------------------------------
446            elemDesc = (ElemDesc) m_elementFlags.get("link");
447            elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
448           
449            // ----------------------------------------------       
450            elemDesc = (ElemDesc) m_elementFlags.get("menu");
451            elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
452            
453            // ----------------------------------------------
454            elemDesc = (ElemDesc) m_elementFlags.get("object");
455    
456            elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
457            elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
458            elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
459            elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
460            elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
461            elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
462            
463            // ----------------------------------------------        
464            elemDesc = (ElemDesc) m_elementFlags.get("ol");
465            elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
466            
467            // ----------------------------------------------
468            elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
469            elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
470    
471            // ----------------------------------------------
472            elemDesc = (ElemDesc) m_elementFlags.get("option");
473            elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
474            elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
475            
476            // ----------------------------------------------
477            elemDesc = (ElemDesc) m_elementFlags.get("q");
478            elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
479    
480            // ----------------------------------------------
481            elemDesc = (ElemDesc) m_elementFlags.get("script");
482            elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
483            elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
484            elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
485    
486            // ----------------------------------------------
487            elemDesc = (ElemDesc) m_elementFlags.get("select");
488            elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
489            elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
490    
491            // ----------------------------------------------
492            elemDesc = (ElemDesc) m_elementFlags.get("table");
493            elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
494            
495            // ----------------------------------------------        
496            elemDesc = (ElemDesc) m_elementFlags.get("td");
497            elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
498    
499            // ----------------------------------------------
500            elemDesc = (ElemDesc) m_elementFlags.get("textarea");
501            elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
502            elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
503           
504            // ----------------------------------------------                
505            elemDesc = (ElemDesc) m_elementFlags.get("th");
506            elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
507            
508            // ----------------------------------------------
509            // The nowrap attribute of a tr element is both
510            // a Netscape and Internet-Explorer extension                
511            elemDesc = (ElemDesc) m_elementFlags.get("tr");
512            elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
513            
514            // ----------------------------------------------        
515            elemDesc = (ElemDesc) m_elementFlags.get("ul");
516            elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
517        }
518    
519        /**
520         * Dummy element for elements not found.
521         */
522        static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
523    
524        /** True if URLs should be specially escaped with the %xx form. */
525        private boolean m_specialEscapeURLs = true;
526    
527        /** True if the META tag should be omitted. */
528        private boolean m_omitMetaTag = false;
529    
530        /**
531         * Tells if the formatter should use special URL escaping.
532         *
533         * @param bool True if URLs should be specially escaped with the %xx form.
534         */
535        public void setSpecialEscapeURLs(boolean bool)
536        {
537            m_specialEscapeURLs = bool;
538        }
539    
540        /**
541         * Tells if the formatter should omit the META tag.
542         *
543         * @param bool True if the META tag should be omitted.
544         */
545        public void setOmitMetaTag(boolean bool)
546        {
547            m_omitMetaTag = bool;
548        }
549    
550        /**
551         * Specifies an output format for this serializer. It the
552         * serializer has already been associated with an output format,
553         * it will switch to the new format. This method should not be
554         * called while the serializer is in the process of serializing
555         * a document.
556         * 
557         * This method can be called multiple times before starting
558         * the serialization of a particular result-tree. In principle
559         * all serialization parameters can be changed, with the exception
560         * of method="html" (it must be method="html" otherwise we
561         * shouldn't even have a ToHTMLStream object here!) 
562         *
563         * @param format The output format or serialzation parameters
564         * to use.
565         */
566        public void setOutputFormat(Properties format)
567        {
568            /*
569             * If "format" does not contain the property
570             * S_USE_URL_ESCAPING, then don't set this value at all,
571             * just leave as-is rather than explicitly setting it.
572             */
573            String value; 
574            value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING);
575            if (value != null) {
576                m_specialEscapeURLs =
577                    OutputPropertyUtils.getBooleanProperty(
578                        OutputPropertiesFactory.S_USE_URL_ESCAPING,
579                        format);
580            }
581    
582            /*
583             * If "format" does not contain the property
584             * S_OMIT_META_TAG, then don't set this value at all,
585             * just leave as-is rather than explicitly setting it.
586             */
587            value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG);
588            if (value != null) {
589               m_omitMetaTag =
590                    OutputPropertyUtils.getBooleanProperty(
591                        OutputPropertiesFactory.S_OMIT_META_TAG,
592                        format);
593            }
594    
595            super.setOutputFormat(format);
596        }
597    
598        /**
599         * Tells if the formatter should use special URL escaping.
600         *
601         * @return True if URLs should be specially escaped with the %xx form.
602         */
603        private final boolean getSpecialEscapeURLs()
604        {
605            return m_specialEscapeURLs;
606        }
607    
608        /**
609         * Tells if the formatter should omit the META tag.
610         *
611         * @return True if the META tag should be omitted.
612         */
613        private final boolean getOmitMetaTag()
614        {
615            return m_omitMetaTag;
616        }
617    
618        /**
619         * Get a description of the given element.
620         *
621         * @param name non-null name of element, case insensitive.
622         *
623         * @return non-null reference to ElemDesc, which may be m_dummy if no 
624         *         element description matches the given name.
625         */
626        public static final ElemDesc getElemDesc(String name)
627        {
628            /* this method used to return m_dummy  when name was null
629             * but now it doesn't check and and requires non-null name.
630             */
631            Object obj = m_elementFlags.get(name);
632            if (null != obj)
633                return (ElemDesc)obj;
634            return m_dummy;
635        }
636        
637        
638        /**
639         * A Trie that is just a copy of the "static" one.
640         * We need this one to be able to use the faster, but not thread-safe
641         * method Trie.get2(name)
642         */
643        private Trie m_htmlInfo = new Trie(m_elementFlags);
644        /**
645         * Calls to this method could be replaced with calls to
646         * getElemDesc(name), but this one should be faster.
647         */
648        private ElemDesc getElemDesc2(String name)
649        {
650            Object obj = m_htmlInfo.get2(name);
651            if (null != obj)
652                return (ElemDesc)obj;
653            return m_dummy;
654        }
655    
656        /**
657         * Default constructor.
658         */
659        public ToHTMLStream()
660        {
661    
662            super();
663            // we are just constructing this thing, no output properties
664            // have been used, so we will set the right default for
665            // indenting anyways
666            m_doIndent = true; 
667            m_charInfo = m_htmlcharInfo;
668            // initialize namespaces
669            m_prefixMap = new NamespaceMappings();
670    
671        }
672    
673        /** The name of the current element. */
674    //    private String m_currentElementName = null;
675    
676        /**
677         * Receive notification of the beginning of a document.
678         *
679         * @throws org.xml.sax.SAXException Any SAX exception, possibly
680         *            wrapping another exception.
681         *
682         * @throws org.xml.sax.SAXException
683         */
684        protected void startDocumentInternal() throws org.xml.sax.SAXException
685        {
686            super.startDocumentInternal();
687    
688            m_needToCallStartDocument = false; 
689            m_needToOutputDocTypeDecl = true;
690            m_startNewLine = false;
691            setOmitXMLDeclaration(true);
692        }
693    
694        /**
695         * This method should only get called once.
696         * If a DOCTYPE declaration needs to get written out, it will
697         * be written out. If it doesn't need to be written out, then
698         * the call to this method has no effect.
699         */
700        private void outputDocTypeDecl(String name) throws SAXException {
701            if (true == m_needToOutputDocTypeDecl)
702            {
703                String doctypeSystem = getDoctypeSystem();
704                String doctypePublic = getDoctypePublic();
705                if ((null != doctypeSystem) || (null != doctypePublic))
706                {
707                    final java.io.Writer writer = m_writer;
708                    try
709                    {
710                    writer.write("<!DOCTYPE ");
711                    writer.write(name);
712    
713                    if (null != doctypePublic)
714                    {
715                        writer.write(" PUBLIC \"");
716                        writer.write(doctypePublic);
717                        writer.write('"');
718                    }
719    
720                    if (null != doctypeSystem)
721                    {
722                        if (null == doctypePublic)
723                            writer.write(" SYSTEM \"");
724                        else
725                            writer.write(" \"");
726    
727                        writer.write(doctypeSystem);
728                        writer.write('"');
729                    }
730    
731                    writer.write('>');
732                    outputLineSep();
733                    }
734                    catch(IOException e)
735                    {
736                        throw new SAXException(e);
737                    }
738                }
739            }
740    
741            m_needToOutputDocTypeDecl = false;
742        }
743    
744        /**
745         * Receive notification of the end of a document. 
746         *
747         * @throws org.xml.sax.SAXException Any SAX exception, possibly
748         *            wrapping another exception.
749         *
750         * @throws org.xml.sax.SAXException
751         */
752        public final void endDocument() throws org.xml.sax.SAXException
753        {
754            
755            flushPending();
756            if (m_doIndent && !m_isprevtext)
757            {
758                try
759                {
760                outputLineSep();
761                }
762                catch(IOException e)
763                {
764                    throw new SAXException(e);
765                }
766            }
767    
768            flushWriter();
769            if (m_tracer != null)
770                super.fireEndDoc();
771        }
772    
773        /**
774         *  Receive notification of the beginning of an element.
775         *
776         *
777         *  @param namespaceURI
778         *  @param localName
779         *  @param name The element type name.
780         *  @param atts The attributes attached to the element, if any.
781         *  @throws org.xml.sax.SAXException Any SAX exception, possibly
782         *             wrapping another exception.
783         *  @see #endElement
784         *  @see org.xml.sax.AttributeList
785         */
786        public void startElement(
787            String namespaceURI,
788            String localName,
789            String name,
790            Attributes atts)
791            throws org.xml.sax.SAXException
792        {
793    
794            ElemContext elemContext = m_elemContext;
795    
796            // clean up any pending things first
797            if (elemContext.m_startTagOpen)
798            {
799                closeStartTag();
800                elemContext.m_startTagOpen = false;
801            }
802            else if (m_cdataTagOpen)
803            {
804                closeCDATA();
805                m_cdataTagOpen = false;
806            }
807            else if (m_needToCallStartDocument)
808            {
809                startDocumentInternal();
810                m_needToCallStartDocument = false;
811            }
812            
813            if (m_needToOutputDocTypeDecl) {            
814                String n = name;
815                if (n == null || n.length() == 0) {
816                    // If the lexical QName is not given
817                    // use the localName in the DOCTYPE
818                    n = localName;
819                }
820                outputDocTypeDecl(n);
821            }
822    
823    
824            // if this element has a namespace then treat it like XML
825            if (null != namespaceURI && namespaceURI.length() > 0)
826            {
827                super.startElement(namespaceURI, localName, name, atts);
828    
829                return;
830            }
831            
832            try
833            {
834                // getElemDesc2(name) is faster than getElemDesc(name)
835                ElemDesc elemDesc = getElemDesc2(name);
836                int elemFlags = elemDesc.getFlags();
837    
838                // deal with indentation issues first
839                if (m_doIndent)
840                {
841    
842                    boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
843                    if (m_ispreserve)
844                        m_ispreserve = false;
845                    else if (
846                        (null != elemContext.m_elementName)
847                        && (!m_inBlockElem
848                            || isBlockElement) /* && !isWhiteSpaceSensitive */
849                        )
850                    {
851                        m_startNewLine = true;
852    
853                        indent();
854    
855                    }
856                    m_inBlockElem = !isBlockElement;
857                }
858    
859                // save any attributes for later processing
860                if (atts != null)
861                    addAttributes(atts);            
862    
863                m_isprevtext = false;
864                final java.io.Writer writer = m_writer;
865                writer.write('<');
866                writer.write(name);
867    
868    
869    
870                if (m_tracer != null)
871                    firePseudoAttributes();
872                
873                if ((elemFlags & ElemDesc.EMPTY) != 0)  
874                {
875                    // an optimization for elements which are expected
876                    // to be empty.
877                    m_elemContext = elemContext.push();
878                    /* XSLTC sometimes calls namespaceAfterStartElement()
879                     * so we need to remember the name
880                     */
881                    m_elemContext.m_elementName = name;
882                    m_elemContext.m_elementDesc = elemDesc;
883                    return;                
884                } 
885                else
886                {
887                    elemContext = elemContext.push(namespaceURI,localName,name);
888                    m_elemContext = elemContext;
889                    elemContext.m_elementDesc = elemDesc;
890                    elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
891                }
892                
893    
894                if ((elemFlags & ElemDesc.HEADELEM) != 0)
895                {
896                    // This is the <HEAD> element, do some special processing
897                    closeStartTag();
898                    elemContext.m_startTagOpen = false;
899                    if (!m_omitMetaTag)
900                    {
901                        if (m_doIndent)
902                            indent();
903                        writer.write(
904                            "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
905                        String encoding = getEncoding();
906                        String encode = Encodings.getMimeEncoding(encoding);
907                        writer.write(encode);
908                        writer.write("\">");
909                    }
910                }
911            }
912            catch (IOException e)
913            {
914                throw new SAXException(e);
915            }
916        }
917    
918        /**
919         *  Receive notification of the end of an element.
920         *
921         *
922         *  @param namespaceURI
923         *  @param localName
924         *  @param name The element type name
925         *  @throws org.xml.sax.SAXException Any SAX exception, possibly
926         *             wrapping another exception.
927         */
928        public final void endElement(
929            final String namespaceURI,
930            final String localName,
931            final String name)
932            throws org.xml.sax.SAXException
933        {
934            // deal with any pending issues
935            if (m_cdataTagOpen)
936                closeCDATA();
937    
938            // if the element has a namespace, treat it like XML, not HTML
939            if (null != namespaceURI && namespaceURI.length() > 0)
940            {
941                super.endElement(namespaceURI, localName, name);
942    
943                return;
944            }
945    
946            try
947            {
948    
949                ElemContext elemContext = m_elemContext;
950                final ElemDesc elemDesc = elemContext.m_elementDesc;
951                final int elemFlags = elemDesc.getFlags();
952                final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
953    
954                // deal with any indentation issues
955                if (m_doIndent)
956                {
957                    final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
958                    boolean shouldIndent = false;
959    
960                    if (m_ispreserve)
961                    {
962                        m_ispreserve = false;
963                    }
964                    else if (m_doIndent && (!m_inBlockElem || isBlockElement))
965                    {
966                        m_startNewLine = true;
967                        shouldIndent = true;
968                    }
969                    if (!elemContext.m_startTagOpen && shouldIndent)
970                        indent(elemContext.m_currentElemDepth - 1);
971                    m_inBlockElem = !isBlockElement;
972                }
973    
974                final java.io.Writer writer = m_writer;
975                if (!elemContext.m_startTagOpen)
976                {
977                    writer.write("</");
978                    writer.write(name);
979                    writer.write('>');
980                }
981                else
982                {
983                    // the start-tag open when this method was called,
984                    // so we need to process it now.
985                    
986                    if (m_tracer != null)
987                        super.fireStartElem(name);
988    
989                    // the starting tag was still open when we received this endElement() call
990                    // so we need to process any gathered attributes NOW, before they go away.
991                    int nAttrs = m_attributes.getLength();
992                    if (nAttrs > 0)
993                    {
994                        processAttributes(m_writer, nAttrs);
995                        // clear attributes object for re-use with next element
996                        m_attributes.clear();
997                    }
998                    if (!elemEmpty)
999                    {
1000                        // As per Dave/Paul recommendation 12/06/2000
1001                        // if (shouldIndent)
1002                        // writer.write('>');
1003                        //  indent(m_currentIndent);
1004    
1005                        writer.write("></");
1006                        writer.write(name);
1007                        writer.write('>');
1008                    }
1009                    else
1010                    {
1011                        writer.write('>');
1012                    }
1013                }
1014                
1015                // clean up because the element has ended
1016                if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
1017                    m_ispreserve = true;
1018                m_isprevtext = false;
1019    
1020                // fire off the end element event
1021                if (m_tracer != null)
1022                    super.fireEndElem(name);            
1023                               
1024                // OPTIMIZE-EMPTY                
1025                if (elemEmpty)
1026                {
1027                    // a quick exit if the HTML element had no children.
1028                    // This block of code can be removed if the corresponding block of code
1029                    // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1030                    m_elemContext = elemContext.m_prev;
1031                    return;
1032                }
1033    
1034                // some more clean because the element has ended. 
1035                if (!elemContext.m_startTagOpen)
1036                {
1037                    if (m_doIndent && !m_preserves.isEmpty())
1038                        m_preserves.pop();
1039                }
1040                m_elemContext = elemContext.m_prev;
1041    //            m_isRawStack.pop();
1042            }
1043            catch (IOException e)
1044            {
1045                throw new SAXException(e);
1046            }
1047        }
1048    
1049        /**
1050         * Process an attribute.
1051         * @param   writer The writer to write the processed output to.
1052         * @param   name   The name of the attribute.
1053         * @param   value   The value of the attribute.
1054         * @param   elemDesc The description of the HTML element 
1055         *           that has this attribute.
1056         *
1057         * @throws org.xml.sax.SAXException
1058         */
1059        protected void processAttribute(
1060            java.io.Writer writer,
1061            String name,
1062            String value,
1063            ElemDesc elemDesc)
1064            throws IOException
1065        {
1066            writer.write(' ');
1067    
1068            if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1069                && elemDesc != null 
1070                && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1071            {
1072                writer.write(name);
1073            }
1074            else
1075            {
1076                // %REVIEW% %OPT%
1077                // Two calls to single-char write may NOT
1078                // be more efficient than one to string-write...
1079                writer.write(name);
1080                writer.write("=\"");
1081                if (   elemDesc != null
1082                    && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1083                    writeAttrURI(writer, value, m_specialEscapeURLs);
1084                else
1085                    writeAttrString(writer, value, this.getEncoding());
1086                writer.write('"');
1087    
1088            }
1089        }
1090    
1091        /**
1092         * Tell if a character is an ASCII digit.
1093         */
1094        private boolean isASCIIDigit(char c)
1095        {
1096            return (c >= '0' && c <= '9');
1097        }
1098    
1099        /**
1100         * Make an integer into an HH hex value.
1101         * Does no checking on the size of the input, since this 
1102         * is only meant to be used locally by writeAttrURI.
1103         * 
1104         * @param i must be a value less than 255.
1105         * 
1106         * @return should be a two character string.
1107         */
1108        private static String makeHHString(int i)
1109        {
1110            String s = Integer.toHexString(i).toUpperCase();
1111            if (s.length() == 1)
1112            {
1113                s = "0" + s;
1114            }
1115            return s;
1116        }
1117    
1118        /**
1119        * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1120        * @param str must be 2 characters long
1121        *
1122        * @return true or false
1123        */
1124        private boolean isHHSign(String str)
1125        {
1126            boolean sign = true;
1127            try
1128            {
1129                char r = (char) Integer.parseInt(str, 16);
1130            }
1131            catch (NumberFormatException e)
1132            {
1133                sign = false;
1134            }
1135            return sign;
1136        }
1137    
1138        /**
1139         * Write the specified <var>string</var> after substituting non ASCII characters,
1140         * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1141         *
1142         * @param   string      String to convert to XML format.
1143         * @param doURLEscaping True if we should try to encode as 
1144         *                      per http://www.ietf.org/rfc/rfc2396.txt.
1145         *
1146         * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1147         */
1148        public void writeAttrURI(
1149            final java.io.Writer writer, String string, boolean doURLEscaping)
1150            throws IOException
1151        {
1152            // http://www.ietf.org/rfc/rfc2396.txt says:
1153            // A URI is always in an "escaped" form, since escaping or unescaping a
1154            // completed URI might change its semantics.  Normally, the only time
1155            // escape encodings can safely be made is when the URI is being created
1156            // from its component parts; each component may have its own set of
1157            // characters that are reserved, so only the mechanism responsible for
1158            // generating or interpreting that component can determine whether or
1159            // not escaping a character will change its semantics. Likewise, a URI
1160            // must be separated into its components before the escaped characters
1161            // within those components can be safely decoded.
1162            //
1163            // ...So we do our best to do limited escaping of the URL, without 
1164            // causing damage.  If the URL is already properly escaped, in theory, this 
1165            // function should not change the string value.
1166    
1167            final int end = string.length();
1168            if (end > m_attrBuff.length)
1169            {
1170               m_attrBuff = new char[end*2 + 1];               
1171            }
1172            string.getChars(0,end, m_attrBuff, 0); 
1173            final char[] chars = m_attrBuff;
1174    
1175            int cleanStart = 0;
1176            int cleanLength = 0;
1177            
1178            
1179            char ch = 0;
1180            for (int i = 0; i < end; i++)
1181            {
1182                ch = chars[i];
1183    
1184                if ((ch < 32) || (ch > 126))
1185                {
1186                    if (cleanLength > 0)
1187                    {
1188                        writer.write(chars, cleanStart, cleanLength);
1189                        cleanLength = 0;
1190                    }
1191                    if (doURLEscaping)
1192                    {
1193                        // Encode UTF16 to UTF8.
1194                        // Reference is Unicode, A Primer, by Tony Graham.
1195                        // Page 92.
1196    
1197                        // Note that Kay doesn't escape 0x20...
1198                        //  if(ch == 0x20) // Not sure about this... -sb
1199                        //  {
1200                        //    writer.write(ch);
1201                        //  }
1202                        //  else 
1203                        if (ch <= 0x7F)
1204                        {
1205                            writer.write('%');
1206                            writer.write(makeHHString(ch));
1207                        }
1208                        else if (ch <= 0x7FF)
1209                        {
1210                            // Clear low 6 bits before rotate, put high 4 bits in low byte, 
1211                            // and set two high bits.
1212                            int high = (ch >> 6) | 0xC0;
1213                            int low = (ch & 0x3F) | 0x80;
1214                            // First 6 bits, + high bit
1215                            writer.write('%');
1216                            writer.write(makeHHString(high));
1217                            writer.write('%');
1218                            writer.write(makeHHString(low));
1219                        }
1220                        else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1221                        {
1222                            // I'm sure this can be done in 3 instructions, but I choose 
1223                            // to try and do it exactly like it is done in the book, at least 
1224                            // until we are sure this is totally clean.  I don't think performance 
1225                            // is a big issue with this particular function, though I could be 
1226                            // wrong.  Also, the stuff below clearly does more masking than 
1227                            // it needs to do.
1228    
1229                            // Clear high 6 bits.
1230                            int highSurrogate = ((int) ch) & 0x03FF;
1231    
1232                            // Middle 4 bits (wwww) + 1
1233                            // "Note that the value of wwww from the high surrogate bit pattern
1234                            // is incremented to make the uuuuu bit pattern in the scalar value 
1235                            // so the surrogate pair don't address the BMP."
1236                            int wwww = ((highSurrogate & 0x03C0) >> 6);
1237                            int uuuuu = wwww + 1;
1238    
1239                            // next 4 bits
1240                            int zzzz = (highSurrogate & 0x003C) >> 2;
1241    
1242                            // low 2 bits
1243                            int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1244    
1245                            // Get low surrogate character.
1246                            ch = chars[++i];
1247    
1248                            // Clear high 6 bits.
1249                            int lowSurrogate = ((int) ch) & 0x03FF;
1250    
1251                            // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1252                            yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1253    
1254                            // bottom 6 bits.
1255                            int xxxxxx = (lowSurrogate & 0x003F);
1256    
1257                            int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1258                            int byte2 =
1259                                0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1260                            int byte3 = 0x80 | yyyyyy;
1261                            int byte4 = 0x80 | xxxxxx;
1262    
1263                            writer.write('%');
1264                            writer.write(makeHHString(byte1));
1265                            writer.write('%');
1266                            writer.write(makeHHString(byte2));
1267                            writer.write('%');
1268                            writer.write(makeHHString(byte3));
1269                            writer.write('%');
1270                            writer.write(makeHHString(byte4));
1271                        }
1272                        else
1273                        {
1274                            int high = (ch >> 12) | 0xE0; // top 4 bits
1275                            int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1276                            // middle 6 bits
1277                            int low = (ch & 0x3F) | 0x80;
1278                            // First 6 bits, + high bit
1279                            writer.write('%');
1280                            writer.write(makeHHString(high));
1281                            writer.write('%');
1282                            writer.write(makeHHString(middle));
1283                            writer.write('%');
1284                            writer.write(makeHHString(low));
1285                        }
1286    
1287                    }
1288                    else if (escapingNotNeeded(ch))
1289                    {
1290                        writer.write(ch);
1291                    }
1292                    else
1293                    {
1294                        writer.write("&#");
1295                        writer.write(Integer.toString(ch));
1296                        writer.write(';');
1297                    }
1298                    // In this character range we have first written out any previously accumulated 
1299                    // "clean" characters, then processed the current more complicated character,
1300                    // which may have incremented "i".
1301                    // We now we reset the next possible clean character.
1302                    cleanStart = i + 1;
1303                }
1304                // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1305                // not allowing quotes in the URI proper syntax, nor in the fragment 
1306                // identifier, we believe that it's OK to double escape quotes.
1307                else if (ch == '"')
1308                {
1309                    // If the character is a '%' number number, try to avoid double-escaping.
1310                    // There is a question if this is legal behavior.
1311    
1312                    // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1313                    // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1314    
1315                    //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1316    
1317                    // We are no longer escaping '%'
1318    
1319                    if (cleanLength > 0)
1320                    {
1321                        writer.write(chars, cleanStart, cleanLength);
1322                        cleanLength = 0;
1323                    }   
1324                    
1325                    
1326                    // Mike Kay encodes this as &#34;, so he may know something I don't?
1327                    if (doURLEscaping)
1328                        writer.write("%22");
1329                    else
1330                        writer.write("&quot;"); // we have to escape this, I guess.
1331    
1332                    // We have written out any clean characters, then the escaped '%' and now we
1333                    // We now we reset the next possible clean character.
1334                    cleanStart = i + 1;    
1335                }
1336                else if (ch == '&')
1337                {
1338                    // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38) 
1339                    // instead of "&" to avoid confusion with the beginning of a character 
1340                    // reference (entity reference open delimiter). 
1341                    if (cleanLength > 0)
1342                    {
1343                        writer.write(chars, cleanStart, cleanLength);
1344                        cleanLength = 0;
1345                    } 
1346                    writer.write("&amp;");
1347                    cleanStart = i + 1; 
1348                }
1349                else
1350                {
1351                    // no processing for this character, just count how
1352                    // many characters in a row that we have that need no processing
1353                    cleanLength++;
1354                }
1355            }
1356            
1357            // are there any clean characters at the end of the array
1358            // that we haven't processed yet?
1359            if (cleanLength > 1)
1360            {
1361                // if the whole string can be written out as-is do so
1362                // otherwise write out the clean chars at the end of the
1363                // array
1364                if (cleanStart == 0)
1365                    writer.write(string);
1366                else
1367                    writer.write(chars, cleanStart, cleanLength);
1368            }
1369            else if (cleanLength == 1)
1370            {
1371                // a little optimization for 1 clean character
1372                // (we could have let the previous if(...) handle them all)
1373                writer.write(ch);
1374            }
1375        }
1376    
1377        /**
1378         * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1379         * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1380         *
1381         * @param   string      String to convert to XML format.
1382         * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1383         *
1384         * @throws org.xml.sax.SAXException
1385         */
1386        public void writeAttrString(
1387            final java.io.Writer writer, String string, String encoding)
1388            throws IOException
1389        {
1390            final int end = string.length();
1391            if (end > m_attrBuff.length)
1392            {
1393                m_attrBuff = new char[end * 2 + 1];
1394            }
1395            string.getChars(0, end, m_attrBuff, 0);
1396            final char[] chars = m_attrBuff;
1397    
1398            
1399    
1400            int cleanStart = 0;
1401            int cleanLength = 0;
1402    
1403            char ch = 0;
1404            for (int i = 0; i < end; i++)
1405            {
1406                ch = chars[i];
1407    
1408                // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1409                // System.out.println("ch: "+(int)ch);
1410                // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1411                // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1412                if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
1413                {
1414                    cleanLength++;
1415                }
1416                else if ('<' == ch || '>' == ch)
1417                {
1418                    cleanLength++; // no escaping in this case, as specified in 15.2
1419                }
1420                else if (
1421                    ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1422                {
1423                    cleanLength++; // no escaping in this case, as specified in 15.2
1424                }
1425                else
1426                {
1427                    if (cleanLength > 0)
1428                    {
1429                        writer.write(chars,cleanStart,cleanLength);
1430                        cleanLength = 0;
1431                    }
1432                    int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1433    
1434                    if (i != pos)
1435                    {
1436                        i = pos - 1;
1437                    }
1438                    else
1439                    {
1440                        if (Encodings.isHighUTF16Surrogate(ch))
1441                        {
1442     
1443                                writeUTF16Surrogate(ch, chars, i, end);
1444                                i++; // two input characters processed
1445                                     // this increments by one and the for()
1446                                     // loop itself increments by another one.
1447                        }
1448    
1449                        // The next is kind of a hack to keep from escaping in the case 
1450                        // of Shift_JIS and the like.
1451    
1452                        /*
1453                        else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1454                        && (ch != 160))
1455                        {
1456                        writer.write(ch);  // no escaping in this case
1457                        }
1458                        else
1459                        */
1460                        String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1461                        if (null != outputStringForChar)
1462                        {
1463                            writer.write(outputStringForChar);
1464                        }
1465                        else if (escapingNotNeeded(ch))
1466                        {
1467                            writer.write(ch); // no escaping in this case
1468                        }
1469                        else
1470                        {
1471                            writer.write("&#");
1472                            writer.write(Integer.toString(ch));
1473                            writer.write(';');
1474                        }
1475                    }
1476                    cleanStart = i + 1;
1477                }
1478            } // end of for()
1479            
1480            // are there any clean characters at the end of the array
1481            // that we haven't processed yet?
1482            if (cleanLength > 1)
1483            {
1484                // if the whole string can be written out as-is do so
1485                // otherwise write out the clean chars at the end of the
1486                // array
1487                if (cleanStart == 0)
1488                    writer.write(string);
1489                else
1490                    writer.write(chars, cleanStart, cleanLength);
1491            }
1492            else if (cleanLength == 1)
1493            {
1494                // a little optimization for 1 clean character
1495                // (we could have let the previous if(...) handle them all)
1496                writer.write(ch);
1497            }
1498        }
1499    
1500    
1501    
1502        /**
1503         * Receive notification of character data.
1504         *
1505         * <p>The Parser will call this method to report each chunk of
1506         * character data.  SAX parsers may return all contiguous character
1507         * data in a single chunk, or they may split it into several
1508         * chunks; however, all of the characters in any single event
1509         * must come from the same external entity, so that the Locator
1510         * provides useful information.</p>
1511         *
1512         * <p>The application must not attempt to read from the array
1513         * outside of the specified range.</p>
1514         *
1515         * <p>Note that some parsers will report whitespace using the
1516         * ignorableWhitespace() method rather than this one (validating
1517         * parsers must do so).</p>
1518         *
1519         * @param chars The characters from the XML document.
1520         * @param start The start position in the array.
1521         * @param length The number of characters to read from the array.
1522         * @throws org.xml.sax.SAXException Any SAX exception, possibly
1523         *            wrapping another exception.
1524         * @see #ignorableWhitespace
1525         * @see org.xml.sax.Locator
1526         *
1527         * @throws org.xml.sax.SAXException
1528         */
1529        public final void characters(char chars[], int start, int length)
1530            throws org.xml.sax.SAXException
1531        {
1532    
1533            if (m_elemContext.m_isRaw)
1534            { 
1535                try
1536                {
1537                    // Clean up some pending issues.
1538                    if (m_elemContext.m_startTagOpen)
1539                    {
1540                        closeStartTag();
1541                        m_elemContext.m_startTagOpen = false;
1542                    }
1543                    
1544                    m_ispreserve = true;
1545                    
1546                    writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1547                    
1548                    // time to generate characters event
1549                    if (m_tracer != null)
1550                        super.fireCharEvent(chars, start, length);
1551                    
1552                    return;
1553                }
1554                catch (IOException ioe)
1555                {
1556                    throw new org.xml.sax.SAXException(
1557                        Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe);
1558                }
1559            }
1560            else
1561            {
1562                super.characters(chars, start, length);
1563            }
1564        }
1565    
1566        /**
1567         *  Receive notification of cdata.
1568         *
1569         *  <p>The Parser will call this method to report each chunk of
1570         *  character data.  SAX parsers may return all contiguous character
1571         *  data in a single chunk, or they may split it into several
1572         *  chunks; however, all of the characters in any single event
1573         *  must come from the same external entity, so that the Locator
1574         *  provides useful information.</p>
1575         *
1576         *  <p>The application must not attempt to read from the array
1577         *  outside of the specified range.</p>
1578         *
1579         *  <p>Note that some parsers will report whitespace using the
1580         *  ignorableWhitespace() method rather than this one (validating
1581         *  parsers must do so).</p>
1582         *
1583         *  @param ch The characters from the XML document.
1584         *  @param start The start position in the array.
1585         *  @param length The number of characters to read from the array.
1586         *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1587         *             wrapping another exception.
1588         *  @see #ignorableWhitespace
1589         *  @see org.xml.sax.Locator
1590         *
1591         * @throws org.xml.sax.SAXException
1592         */
1593        public final void cdata(char ch[], int start, int length)
1594            throws org.xml.sax.SAXException
1595        {
1596    
1597            if ((null != m_elemContext.m_elementName)
1598                && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1599                    || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1600            {
1601                try
1602                {
1603                    if (m_elemContext.m_startTagOpen)
1604                    {
1605                        closeStartTag();
1606                        m_elemContext.m_startTagOpen = false;
1607                    }
1608    
1609                    m_ispreserve = true;
1610    
1611                    if (shouldIndent())
1612                        indent();
1613    
1614                    // writer.write(ch, start, length);
1615                    writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1616                }
1617                catch (IOException ioe)
1618                {
1619                    throw new org.xml.sax.SAXException(
1620                        Utils.messages.createMessage(
1621                            MsgKey.ER_OIERROR,
1622                            null),
1623                        ioe);
1624                    //"IO error", ioe);
1625                }
1626            }
1627            else
1628            {
1629                super.cdata(ch, start, length);
1630            }
1631        }
1632    
1633        /**
1634         *  Receive notification of a processing instruction.
1635         *
1636         *  @param target The processing instruction target.
1637         *  @param data The processing instruction data, or null if
1638         *         none was supplied.
1639         *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1640         *             wrapping another exception.
1641         *
1642         * @throws org.xml.sax.SAXException
1643         */
1644        public void processingInstruction(String target, String data)
1645            throws org.xml.sax.SAXException
1646        {
1647    
1648            // Process any pending starDocument and startElement first.
1649            flushPending(); 
1650            
1651            // Use a fairly nasty hack to tell if the next node is supposed to be 
1652            // unescaped text.
1653            if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1654            {
1655                startNonEscaping();
1656            }
1657            else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1658            {
1659                endNonEscaping();
1660            }
1661            else
1662            {
1663                try
1664                {
1665                    // clean up any pending things first
1666                    if (m_elemContext.m_startTagOpen)
1667                    {
1668                        closeStartTag();
1669                        m_elemContext.m_startTagOpen = false;
1670                    }
1671                    else if (m_cdataTagOpen)
1672                    {
1673                        closeCDATA();
1674                    }
1675                    else if (m_needToCallStartDocument)
1676                    {
1677                        startDocumentInternal();
1678                    }
1679                
1680    
1681                /*
1682                 * Perhaps processing instructions can be written out in HTML before
1683                 * the DOCTYPE, in which case this could be emitted with the
1684                 * startElement call, that knows the name of the document element
1685                 * doing it right.
1686                 */
1687                if (true == m_needToOutputDocTypeDecl)
1688                    outputDocTypeDecl("html"); // best guess for the upcoming element
1689    
1690     
1691                if (shouldIndent())
1692                    indent();
1693    
1694                final java.io.Writer writer = m_writer;
1695                //writer.write("<?" + target);
1696                writer.write("<?");
1697                writer.write(target);
1698    
1699                if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1700                    writer.write(' '); 
1701    
1702                //writer.write(data + ">"); // different from XML
1703                writer.write(data); // different from XML
1704                writer.write('>'); // different from XML
1705    
1706                // Always output a newline char if not inside of an 
1707                // element. The whitespace is not significant in that
1708                // case.
1709                if (m_elemContext.m_currentElemDepth <= 0)
1710                    outputLineSep();
1711    
1712                m_startNewLine = true;
1713                }
1714                catch(IOException e)
1715                {
1716                    throw new SAXException(e);
1717                }
1718            }
1719                   
1720            // now generate the PI event
1721            if (m_tracer != null)
1722                super.fireEscapingEvent(target, data);
1723         }
1724    
1725        /**
1726         * Receive notivication of a entityReference.
1727         *
1728         * @param name non-null reference to entity name string.
1729         *
1730         * @throws org.xml.sax.SAXException
1731         */
1732        public final void entityReference(String name)
1733            throws org.xml.sax.SAXException
1734        {
1735            try
1736            {
1737    
1738            final java.io.Writer writer = m_writer;
1739            writer.write('&');
1740            writer.write(name);
1741            writer.write(';');
1742            
1743            } catch(IOException e)
1744            {
1745                throw new SAXException(e);
1746            }
1747        }
1748        /**
1749         * @see ExtendedContentHandler#endElement(String)
1750         */
1751        public final void endElement(String elemName) throws SAXException
1752        {
1753            endElement(null, null, elemName);
1754        }
1755    
1756        /**
1757         * Process the attributes, which means to write out the currently
1758         * collected attributes to the writer. The attributes are not
1759         * cleared by this method
1760         * 
1761         * @param writer the writer to write processed attributes to.
1762         * @param nAttrs the number of attributes in m_attributes 
1763         * to be processed
1764         *
1765         * @throws org.xml.sax.SAXException
1766         */
1767        public void processAttributes(java.io.Writer writer, int nAttrs)
1768            throws IOException,SAXException
1769        {
1770                /* 
1771                 * process the collected attributes
1772                 */
1773                for (int i = 0; i < nAttrs; i++)
1774                {
1775                    processAttribute(
1776                        writer,
1777                        m_attributes.getQName(i),
1778                        m_attributes.getValue(i),
1779                        m_elemContext.m_elementDesc);
1780                }
1781        }
1782    
1783        /**
1784         * For the enclosing elements starting tag write out out any attributes
1785         * followed by ">". At this point we also mark if this element is
1786         * a cdata-section-element.
1787         *
1788         *@throws org.xml.sax.SAXException
1789         */
1790        protected void closeStartTag() throws SAXException
1791        {
1792                try
1793                {
1794    
1795                // finish processing attributes, time to fire off the start element event
1796                if (m_tracer != null)
1797                    super.fireStartElem(m_elemContext.m_elementName);  
1798                
1799                int nAttrs = m_attributes.getLength();   
1800                if (nAttrs>0)
1801                {
1802                    processAttributes(m_writer, nAttrs);
1803                    // clear attributes object for re-use with next element
1804                    m_attributes.clear();
1805                }
1806    
1807                m_writer.write('>');
1808    
1809                /* At this point we have the prefix mappings now, so
1810                 * lets determine if the current element is specified in the cdata-
1811                 * section-elements list.
1812                 */
1813                if (m_CdataElems != null) // if there are any cdata sections
1814                    m_elemContext.m_isCdataSection = isCdataSection();
1815                if (m_doIndent)
1816                {
1817                    m_isprevtext = false;
1818                    m_preserves.push(m_ispreserve);
1819                }
1820                
1821                }
1822                catch(IOException e)
1823                {
1824                    throw new SAXException(e);
1825                }
1826        }
1827        
1828    
1829       
1830            /**
1831             * This method is used when a prefix/uri namespace mapping
1832             * is indicated after the element was started with a
1833             * startElement() and before and endElement().
1834             * startPrefixMapping(prefix,uri) would be used before the
1835             * startElement() call.
1836             * @param uri the URI of the namespace
1837             * @param prefix the prefix associated with the given URI.
1838             *
1839             * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1840             */
1841            public void namespaceAfterStartElement(String prefix, String uri)
1842                throws SAXException
1843            {
1844                // hack for XSLTC with finding URI for default namespace
1845                if (m_elemContext.m_elementURI == null)
1846                {
1847                    String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1848                    if (prefix1 == null && EMPTYSTRING.equals(prefix))
1849                    {
1850                        // the elements URI is not known yet, and it
1851                        // doesn't have a prefix, and we are currently
1852                        // setting the uri for prefix "", so we have
1853                        // the uri for the element... lets remember it
1854                        m_elemContext.m_elementURI = uri;
1855                    }
1856                }            
1857                startPrefixMapping(prefix,uri,false);
1858            }
1859    
1860        public void startDTD(String name, String publicId, String systemId)
1861            throws SAXException
1862        {
1863            m_inDTD = true;
1864            super.startDTD(name, publicId, systemId);
1865        }
1866    
1867        /**
1868         * Report the end of DTD declarations.
1869         * @throws org.xml.sax.SAXException The application may raise an exception.
1870         * @see #startDTD
1871         */
1872        public void endDTD() throws org.xml.sax.SAXException
1873        {
1874            m_inDTD = false;
1875            /* for ToHTMLStream the DOCTYPE is entirely output in the
1876             * startDocumentInternal() method, so don't do anything here
1877             */
1878        }
1879        /**
1880         * This method does nothing.
1881         */
1882        public void attributeDecl(
1883            String eName,
1884            String aName,
1885            String type,
1886            String valueDefault,
1887            String value)
1888            throws SAXException
1889        {
1890            // The internal DTD subset is not serialized by the ToHTMLStream serializer
1891        }
1892    
1893        /**
1894         * This method does nothing.
1895         */
1896        public void elementDecl(String name, String model) throws SAXException
1897        {
1898            // The internal DTD subset is not serialized by the ToHTMLStream serializer
1899        }
1900        /**
1901         * This method does nothing.
1902         */
1903        public void internalEntityDecl(String name, String value)
1904            throws SAXException
1905        {
1906            // The internal DTD subset is not serialized by the ToHTMLStream serializer
1907        }
1908        /**
1909         * This method does nothing.
1910         */
1911        public void externalEntityDecl(
1912            String name,
1913            String publicId,
1914            String systemId)
1915            throws SAXException
1916        {
1917            // The internal DTD subset is not serialized by the ToHTMLStream serializer
1918        }
1919    
1920        /**
1921         * This method is used to add an attribute to the currently open element. 
1922         * The caller has guaranted that this attribute is unique, which means that it
1923         * not been seen before and will not be seen again.
1924         * 
1925         * @param name the qualified name of the attribute
1926         * @param value the value of the attribute which can contain only
1927         * ASCII printable characters characters in the range 32 to 127 inclusive.
1928         * @param flags the bit values of this integer give optimization information.
1929         */
1930        public void addUniqueAttribute(String name, String value, int flags)
1931            throws SAXException
1932        {
1933            try
1934            {
1935                final java.io.Writer writer = m_writer;
1936                if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1937                {
1938                    // "flags" has indicated that the characters
1939                    // '>'  '<'   '&'  and '"' are not in the value and
1940                    // m_htmlcharInfo has recorded that there are no other
1941                    // entities in the range 0 to 127 so we write out the
1942                    // value directly
1943                    writer.write(' ');
1944                    writer.write(name);
1945                    writer.write("=\"");
1946                    writer.write(value);
1947                    writer.write('"');
1948                }
1949                else if (
1950                    (flags & HTML_ATTREMPTY) > 0
1951                        && (value.length() == 0 || value.equalsIgnoreCase(name)))
1952                {
1953                    writer.write(' ');
1954                    writer.write(name);
1955                }
1956                else
1957                {
1958                    writer.write(' ');
1959                    writer.write(name);
1960                    writer.write("=\"");
1961                    if ((flags & HTML_ATTRURL) > 0)
1962                    {
1963                        writeAttrURI(writer, value, m_specialEscapeURLs);
1964                    }
1965                    else
1966                    {
1967                        writeAttrString(writer, value, this.getEncoding());
1968                    }
1969                    writer.write('"');
1970                }
1971            } catch (IOException e) {
1972                throw new SAXException(e);
1973            }
1974        }
1975    
1976        public void comment(char ch[], int start, int length)
1977                throws SAXException
1978        {
1979            // The internal DTD subset is not serialized by the ToHTMLStream serializer
1980            if (m_inDTD)
1981                return;
1982            
1983            // Clean up some pending issues, just in case
1984            // this call is coming right after a startElement()
1985            // or we are in the middle of writing out CDATA
1986            // or if a startDocument() call was not received
1987            if (m_elemContext.m_startTagOpen)
1988            {
1989                closeStartTag();
1990                m_elemContext.m_startTagOpen = false;
1991            }
1992            else if (m_cdataTagOpen)
1993            {
1994                closeCDATA();
1995            }
1996            else if (m_needToCallStartDocument)
1997            {
1998                startDocumentInternal();
1999            }
2000    
2001            /*
2002             * Perhaps comments can be written out in HTML before the DOCTYPE.
2003             * In this case we might delete this call to writeOutDOCTYPE, and
2004             * it would be handled within the startElement() call.
2005             */
2006            if (m_needToOutputDocTypeDecl)
2007                outputDocTypeDecl("html"); // best guess for the upcoming element
2008    
2009            super.comment(ch, start, length);
2010        }
2011        
2012        public boolean reset()
2013        {
2014            boolean ret = super.reset();
2015            if (!ret)
2016                return false;
2017            resetToHTMLStream();
2018            return true;        
2019        }
2020        
2021        private void resetToHTMLStream()
2022        {
2023            // m_htmlcharInfo remains unchanged
2024            // m_htmlInfo = null;  // Don't reset
2025            m_inBlockElem = false;
2026            m_inDTD = false;
2027            m_omitMetaTag = false;
2028            m_specialEscapeURLs = true;     
2029        }
2030        
2031        static class Trie
2032        {
2033            /**
2034             * A digital search trie for 7-bit ASCII text
2035             * The API is a subset of java.util.Hashtable
2036             * The key must be a 7-bit ASCII string
2037             * The value may be any Java Object
2038             * One can get an object stored in a trie from its key, 
2039             * but the search is either case sensitive or case 
2040             * insensitive to the characters in the key, and this
2041             * choice of sensitivity or insensitivity is made when
2042             * the Trie is created, before any objects are put in it.
2043             * 
2044             * This class is a copy of the one in org.apache.xml.utils. 
2045             * It exists to cut the serializers dependancy on that package.
2046             *  
2047             * @xsl.usage internal
2048             */
2049    
2050            /** Size of the m_nextChar array.  */
2051            public static final int ALPHA_SIZE = 128;
2052    
2053            /** The root node of the tree.    */
2054            final Node m_Root;
2055    
2056            /** helper buffer to convert Strings to char arrays */
2057            private char[] m_charBuffer = new char[0];
2058    
2059            /** true if the search for an object is lower case only with the key */
2060            private final boolean m_lowerCaseOnly;
2061    
2062            /**
2063             * Construct the trie that has a case insensitive search.
2064             */
2065            public Trie()
2066            {
2067                m_Root = new Node();
2068                m_lowerCaseOnly = false;
2069            }
2070    
2071            /**
2072             * Construct the trie given the desired case sensitivity with the key.
2073             * @param lowerCaseOnly true if the search keys are to be loser case only,
2074             * not case insensitive.
2075             */
2076            public Trie(boolean lowerCaseOnly)
2077            {
2078                m_Root = new Node();
2079                m_lowerCaseOnly = lowerCaseOnly;
2080            }
2081    
2082            /**
2083             * Put an object into the trie for lookup.
2084             *
2085             * @param key must be a 7-bit ASCII string
2086             * @param value any java object.
2087             *
2088             * @return The old object that matched key, or null.
2089             */
2090            public Object put(String key, Object value)
2091            {
2092    
2093                final int len = key.length();
2094                if (len > m_charBuffer.length)
2095                {
2096                    // make the biggest buffer ever needed in get(String)
2097                    m_charBuffer = new char[len];
2098                }
2099    
2100                Node node = m_Root;
2101    
2102                for (int i = 0; i < len; i++)
2103                {
2104                    Node nextNode =
2105                        node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2106    
2107                    if (nextNode != null)
2108                    {
2109                        node = nextNode;
2110                    }
2111                    else
2112                    {
2113                        for (; i < len; i++)
2114                        {
2115                            Node newNode = new Node();
2116                            if (m_lowerCaseOnly)
2117                            {
2118                                // put this value into the tree only with a lower case key 
2119                                node.m_nextChar[Character.toLowerCase(
2120                                    key.charAt(i))] =
2121                                    newNode;
2122                            }
2123                            else
2124                            {
2125                                // put this value into the tree with a case insensitive key
2126                                node.m_nextChar[Character.toUpperCase(
2127                                    key.charAt(i))] =
2128                                    newNode;
2129                                node.m_nextChar[Character.toLowerCase(
2130                                    key.charAt(i))] =
2131                                    newNode;
2132                            }
2133                            node = newNode;
2134                        }
2135                        break;
2136                    }
2137                }
2138    
2139                Object ret = node.m_Value;
2140    
2141                node.m_Value = value;
2142    
2143                return ret;
2144            }
2145    
2146            /**
2147             * Get an object that matches the key.
2148             *
2149             * @param key must be a 7-bit ASCII string
2150             *
2151             * @return The object that matches the key, or null.
2152             */
2153            public Object get(final String key)
2154            {
2155    
2156                final int len = key.length();
2157    
2158                /* If the name is too long, we won't find it, this also keeps us
2159                 * from overflowing m_charBuffer
2160                 */
2161                if (m_charBuffer.length < len)
2162                    return null;
2163    
2164                Node node = m_Root;
2165                switch (len) // optimize the look up based on the number of chars
2166                {
2167                    // case 0 looks silly, but the generated bytecode runs
2168                    // faster for lookup of elements of length 2 with this in
2169                    // and a fair bit faster.  Don't know why.
2170                    case 0 :
2171                        {
2172                            return null;
2173                        }
2174    
2175                    case 1 :
2176                        {
2177                            final char ch = key.charAt(0);
2178                            if (ch < ALPHA_SIZE)
2179                            {
2180                                node = node.m_nextChar[ch];
2181                                if (node != null)
2182                                    return node.m_Value;
2183                            }
2184                            return null;
2185                        }
2186                        //                comment out case 2 because the default is faster            
2187                        //                case 2 :
2188                        //                    {
2189                        //                        final char ch0 = key.charAt(0);
2190                        //                        final char ch1 = key.charAt(1);
2191                        //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2192                        //                        {
2193                        //                            node = node.m_nextChar[ch0];
2194                        //                            if (node != null)
2195                        //                            {
2196                        //                        
2197                        //                                if (ch1 < ALPHA_SIZE) 
2198                        //                                {
2199                        //                                    node = node.m_nextChar[ch1];
2200                        //                                    if (node != null)
2201                        //                                        return node.m_Value;
2202                        //                                }
2203                        //                            }
2204                        //                        }
2205                        //                        return null;
2206                        //                   }
2207                    default :
2208                        {
2209                            for (int i = 0; i < len; i++)
2210                            {
2211                                // A thread-safe way to loop over the characters
2212                                final char ch = key.charAt(i);
2213                                if (ALPHA_SIZE <= ch)
2214                                {
2215                                    // the key is not 7-bit ASCII so we won't find it here
2216                                    return null;
2217                                }
2218    
2219                                node = node.m_nextChar[ch];
2220                                if (node == null)
2221                                    return null;
2222                            }
2223    
2224                            return node.m_Value;
2225                        }
2226                }
2227            }
2228    
2229            /**
2230             * The node representation for the trie.
2231             * @xsl.usage internal
2232             */
2233            private static class Node
2234            {
2235    
2236                /**
2237                 * Constructor, creates a Node[ALPHA_SIZE].
2238                 */
2239                Node()
2240                {
2241                    m_nextChar = new Node[ALPHA_SIZE];
2242                    m_Value = null;
2243                }
2244    
2245                /** The next nodes.   */
2246                final Node m_nextChar[];
2247    
2248                /** The value.   */
2249                Object m_Value;
2250            }
2251            /**
2252             * Construct the trie from another Trie.
2253             * Both the existing Trie and this new one share the same table for
2254             * lookup, and it is assumed that the table is fully populated and
2255             * not changing anymore.
2256             * 
2257             * @param existingTrie the Trie that this one is a copy of.
2258             */
2259            public Trie(Trie existingTrie)
2260            {
2261                // copy some fields from the existing Trie into this one.
2262                m_Root = existingTrie.m_Root;
2263                m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2264    
2265                // get a buffer just big enough to hold the longest key in the table.
2266                int max = existingTrie.getLongestKeyLength();
2267                m_charBuffer = new char[max];
2268            }
2269    
2270            /**
2271             * Get an object that matches the key.
2272             * This method is faster than get(), but is not thread-safe.
2273             *
2274             * @param key must be a 7-bit ASCII string
2275             *
2276             * @return The object that matches the key, or null.
2277             */
2278            public Object get2(final String key)
2279            {
2280    
2281                final int len = key.length();
2282    
2283                /* If the name is too long, we won't find it, this also keeps us
2284                 * from overflowing m_charBuffer
2285                 */
2286                if (m_charBuffer.length < len)
2287                    return null;
2288    
2289                Node node = m_Root;
2290                switch (len) // optimize the look up based on the number of chars
2291                {
2292                    // case 0 looks silly, but the generated bytecode runs
2293                    // faster for lookup of elements of length 2 with this in
2294                    // and a fair bit faster.  Don't know why.
2295                    case 0 :
2296                        {
2297                            return null;
2298                        }
2299    
2300                    case 1 :
2301                        {
2302                            final char ch = key.charAt(0);
2303                            if (ch < ALPHA_SIZE)
2304                            {
2305                                node = node.m_nextChar[ch];
2306                                if (node != null)
2307                                    return node.m_Value;
2308                            }
2309                            return null;
2310                        }
2311                    default :
2312                        {
2313                            /* Copy string into array. This is not thread-safe because
2314                             * it modifies the contents of m_charBuffer. If multiple
2315                             * threads were to use this Trie they all would be
2316                             * using this same array (not good). So this 
2317                             * method is not thread-safe, but it is faster because
2318                             * converting to a char[] and looping over elements of
2319                             * the array is faster than a String's charAt(i).
2320                             */
2321                            key.getChars(0, len, m_charBuffer, 0);
2322    
2323                            for (int i = 0; i < len; i++)
2324                            {
2325                                final char ch = m_charBuffer[i];
2326                                if (ALPHA_SIZE <= ch)
2327                                {
2328                                    // the key is not 7-bit ASCII so we won't find it here
2329                                    return null;
2330                                }
2331    
2332                                node = node.m_nextChar[ch];
2333                                if (node == null)
2334                                    return null;
2335                            }
2336    
2337                            return node.m_Value;
2338                        }
2339                }
2340            }
2341    
2342            /**
2343             * Get the length of the longest key used in the table. 
2344             */
2345            public int getLongestKeyLength()
2346            {
2347                return m_charBuffer.length;
2348            }
2349        }
2350    }