001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $ 020 */ 021 package org.apache.xml.dtm.ref.sax2dtm; 022 023 import javax.xml.transform.Source; 024 025 import org.apache.xml.dtm.DTM; 026 import org.apache.xml.dtm.DTMManager; 027 import org.apache.xml.dtm.DTMWSFilter; 028 import org.apache.xml.utils.IntStack; 029 import org.apache.xml.utils.IntVector; 030 import org.apache.xml.utils.StringVector; 031 import org.apache.xml.utils.XMLStringFactory; 032 033 import org.xml.sax.SAXException; 034 035 /** 036 * This is a subclass of SAX2DTM which has been modified to meet the needs of 037 * Result Tree Frameworks (RTFs). The differences are: 038 * 039 * 1) Multiple XML trees may be appended to the single DTM. This means 040 * that the root node of each document is _not_ node 0. Some code has 041 * had to be deoptimized to support this mode of operation, and an 042 * explicit mechanism for obtaining the Node Handle of the root node 043 * has been provided. 044 * 045 * 2) A stack of these documents is maintained, allowing us to "tail-prune" the 046 * most recently added trees off the end of the DTM as stylesheet elements 047 * (and thus variable contexts) are exited. 048 * 049 * PLEASE NOTE that this class may be _heavily_ dependent upon the 050 * internals of the SAX2DTM superclass, and must be maintained in 051 * parallel with that code. Arguably, they should be conditionals 052 * within a single class... but they have deen separated for 053 * performance reasons. (In fact, one could even argue about which is 054 * the superclass and which is the subclass; the current arrangement 055 * is as much about preserving stability of existing code during 056 * development as anything else.) 057 * 058 * %REVIEW% In fact, since the differences are so minor, I think it 059 * may be possible/practical to fold them back into the base 060 * SAX2DTM. Consider that as a future code-size optimization. 061 * */ 062 public class SAX2RTFDTM extends SAX2DTM 063 { 064 /** Set true to monitor SAX events and similar diagnostic info. */ 065 private static final boolean DEBUG = false; 066 067 /** Most recently started Document, or null if the DTM is empty. */ 068 private int m_currentDocumentNode=NULL; 069 070 /** Tail-pruning mark: Number of nodes in use */ 071 IntStack mark_size=new IntStack(); 072 /** Tail-pruning mark: Number of data items in use */ 073 IntStack mark_data_size=new IntStack(); 074 /** Tail-pruning mark: Number of size-of-data fields in use */ 075 IntStack mark_char_size=new IntStack(); 076 /** Tail-pruning mark: Number of dataOrQName slots in use */ 077 IntStack mark_doq_size=new IntStack(); 078 /** Tail-pruning mark: Number of namespace declaration sets in use 079 * %REVIEW% I don't think number of NS sets is ever different from number 080 * of NS elements. We can probabably reduce these to a single stack and save 081 * some storage. 082 * */ 083 IntStack mark_nsdeclset_size=new IntStack(); 084 /** Tail-pruning mark: Number of naespace declaration elements in use 085 * %REVIEW% I don't think number of NS sets is ever different from number 086 * of NS elements. We can probabably reduce these to a single stack and save 087 * some storage. 088 */ 089 IntStack mark_nsdeclelem_size=new IntStack(); 090 091 /** 092 * Tail-pruning mark: initial number of nodes in use 093 */ 094 int m_emptyNodeCount; 095 096 /** 097 * Tail-pruning mark: initial number of namespace declaration sets 098 */ 099 int m_emptyNSDeclSetCount; 100 101 /** 102 * Tail-pruning mark: initial number of namespace declaration elements 103 */ 104 int m_emptyNSDeclSetElemsCount; 105 106 /** 107 * Tail-pruning mark: initial number of data items in use 108 */ 109 int m_emptyDataCount; 110 111 /** 112 * Tail-pruning mark: initial number of characters in use 113 */ 114 int m_emptyCharsCount; 115 116 /** 117 * Tail-pruning mark: default initial number of dataOrQName slots in use 118 */ 119 int m_emptyDataQNCount; 120 121 public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity, 122 DTMWSFilter whiteSpaceFilter, 123 XMLStringFactory xstringfactory, 124 boolean doIndexing) 125 { 126 super(mgr, source, dtmIdentity, whiteSpaceFilter, 127 xstringfactory, doIndexing); 128 129 // NEVER track source locators for RTFs; they aren't meaningful. I think. 130 // (If we did track them, we'd need to tail-prune these too.) 131 //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location; 132 m_useSourceLocationProperty=false; 133 m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector() 134 : null; 135 m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null; 136 m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null; 137 138 // Record initial sizes of fields that are pushed and restored 139 // for RTF tail-pruning. More entries can be popped than pushed, so 140 // we need this to mark the primordial state of the DTM. 141 m_emptyNodeCount = m_size; 142 m_emptyNSDeclSetCount = (m_namespaceDeclSets == null) 143 ? 0 : m_namespaceDeclSets.size(); 144 m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null) 145 ? 0 : m_namespaceDeclSetElements.size(); 146 m_emptyDataCount = m_data.size(); 147 m_emptyCharsCount = m_chars.size(); 148 m_emptyDataQNCount = m_dataOrQName.size(); 149 } 150 151 /** 152 * Given a DTM, find the owning document node. In the case of 153 * SAX2RTFDTM, which may contain multiple documents, this returns 154 * the <b>most recently started</b> document, or null if the DTM is 155 * empty or no document is currently under construction. 156 * 157 * %REVIEW% Should we continue to report the most recent after 158 * construction has ended? I think not, given that it may have been 159 * tail-pruned. 160 * 161 * @return int Node handle of Document node, or null if this DTM does not 162 * contain an "active" document. 163 * */ 164 public int getDocument() 165 { 166 return makeNodeHandle(m_currentDocumentNode); 167 } 168 169 /** 170 * Given a node handle, find the owning document node, using DTM semantics 171 * (Document owns itself) rather than DOM semantics (Document has no owner). 172 * 173 * (I'm counting on the fact that getOwnerDocument() is implemented on top 174 * of this call, in the superclass, to avoid having to rewrite that one. 175 * Be careful if that code changes!) 176 * 177 * @param nodeHandle the id of the node. 178 * @return int Node handle of owning document 179 */ 180 public int getDocumentRoot(int nodeHandle) 181 { 182 for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) { 183 if (_type(id)==DTM.DOCUMENT_NODE) { 184 return makeNodeHandle(id); 185 } 186 } 187 188 return DTM.NULL; // Safety net; should never happen 189 } 190 191 /** 192 * Given a node identifier, find the owning document node. Unlike the DOM, 193 * this considers the owningDocument of a Document to be itself. Note that 194 * in shared DTMs this may not be zero. 195 * 196 * @param nodeIdentifier the id of the starting node. 197 * @return int Node identifier of the root of this DTM tree 198 */ 199 protected int _documentRoot(int nodeIdentifier) 200 { 201 if(nodeIdentifier==NULL) return NULL; 202 203 for (int parent=_parent(nodeIdentifier); 204 parent!=NULL; 205 nodeIdentifier=parent,parent=_parent(nodeIdentifier)) 206 ; 207 208 return nodeIdentifier; 209 } 210 211 /** 212 * Receive notification of the beginning of a new RTF document. 213 * 214 * %REVIEW% Y'know, this isn't all that much of a deoptimization. We 215 * might want to consider folding the start/endDocument changes back 216 * into the main SAX2DTM so we don't have to expose so many fields 217 * (even as Protected) and carry the additional code. 218 * 219 * @throws SAXException Any SAX exception, possibly 220 * wrapping another exception. 221 * @see org.xml.sax.ContentHandler#startDocument 222 * */ 223 public void startDocument() throws SAXException 224 { 225 // Re-initialize the tree append process 226 m_endDocumentOccured = false; 227 m_prefixMappings = new java.util.Vector(); 228 m_contextIndexes = new IntStack(); 229 m_parents = new IntStack(); 230 231 m_currentDocumentNode=m_size; 232 super.startDocument(); 233 } 234 235 /** 236 * Receive notification of the end of the document. 237 * 238 * %REVIEW% Y'know, this isn't all that much of a deoptimization. We 239 * might want to consider folding the start/endDocument changes back 240 * into the main SAX2DTM so we don't have to expose so many fields 241 * (even as Protected). 242 * 243 * @throws SAXException Any SAX exception, possibly 244 * wrapping another exception. 245 * @see org.xml.sax.ContentHandler#endDocument 246 * */ 247 public void endDocument() throws SAXException 248 { 249 charactersFlush(); 250 251 m_nextsib.setElementAt(NULL,m_currentDocumentNode); 252 253 if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED) 254 m_firstch.setElementAt(NULL,m_currentDocumentNode); 255 256 if (DTM.NULL != m_previous) 257 m_nextsib.setElementAt(DTM.NULL,m_previous); 258 259 m_parents = null; 260 m_prefixMappings = null; 261 m_contextIndexes = null; 262 263 m_currentDocumentNode= NULL; // no longer open 264 m_endDocumentOccured = true; 265 } 266 267 268 /** "Tail-pruning" support for RTFs. 269 * 270 * This function pushes information about the current size of the 271 * DTM's data structures onto a stack, for use by popRewindMark() 272 * (which see). 273 * 274 * %REVIEW% I have no idea how to rewind m_elemIndexes. However, 275 * RTFs will not be indexed, so I can simply panic if that case 276 * arises. Hey, it works... 277 * */ 278 public void pushRewindMark() 279 { 280 if(m_indexing || m_elemIndexes!=null) 281 throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM"); 282 283 // Values from DTMDefaultBase 284 // %REVIEW% Can the namespace stack sizes ever differ? If not, save space! 285 mark_size.push(m_size); 286 mark_nsdeclset_size.push((m_namespaceDeclSets==null) 287 ? 0 288 : m_namespaceDeclSets.size()); 289 mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null) 290 ? 0 291 : m_namespaceDeclSetElements.size()); 292 293 // Values from SAX2DTM 294 mark_data_size.push(m_data.size()); 295 mark_char_size.push(m_chars.size()); 296 mark_doq_size.push(m_dataOrQName.size()); 297 } 298 299 /** "Tail-pruning" support for RTFs. 300 * 301 * This function pops the information previously saved by 302 * pushRewindMark (which see) and uses it to discard all nodes added 303 * to the DTM after that time. We expect that this will allow us to 304 * reuse storage more effectively. 305 * 306 * This is _not_ intended to be called while a document is still being 307 * constructed -- only between endDocument and the next startDocument 308 * 309 * %REVIEW% WARNING: This is the first use of some of the truncation 310 * methods. If Xalan blows up after this is called, that's a likely 311 * place to check. 312 * 313 * %REVIEW% Our original design for DTMs permitted them to share 314 * string pools. If there any risk that this might be happening, we 315 * can _not_ rewind and recover the string storage. One solution 316 * might to assert that DTMs used for RTFs Must Not take advantage 317 * of that feature, but this seems excessively fragile. Another, much 318 * less attractive, would be to just let them leak... Nah. 319 * 320 * @return true if and only if the pop completely emptied the 321 * RTF. That response is used when determining how to unspool 322 * RTF-started-while-RTF-open situations. 323 * */ 324 public boolean popRewindMark() 325 { 326 boolean top=mark_size.empty(); 327 328 m_size=top ? m_emptyNodeCount : mark_size.pop(); 329 m_exptype.setSize(m_size); 330 m_firstch.setSize(m_size); 331 m_nextsib.setSize(m_size); 332 m_prevsib.setSize(m_size); 333 m_parent.setSize(m_size); 334 335 m_elemIndexes=null; 336 337 int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop(); 338 if (m_namespaceDeclSets!=null) { 339 m_namespaceDeclSets.setSize(ds); 340 } 341 342 int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop(); 343 if (m_namespaceDeclSetElements!=null) { 344 m_namespaceDeclSetElements.setSize(ds1); 345 } 346 347 // Values from SAX2DTM - m_data always has a reserved entry 348 m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop()); 349 m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop()); 350 m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop()); 351 352 // Return true iff DTM now empty 353 return m_size==0; 354 } 355 356 /** @return true if a DTM tree is currently under construction. 357 * */ 358 public boolean isTreeIncomplete() 359 { 360 return !m_endDocumentOccured; 361 } 362 }