001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $ 020 */ 021 package org.apache.xml.serializer; 022 023 import java.io.IOException; 024 025 import org.apache.xml.serializer.utils.MsgKey; 026 import org.apache.xml.serializer.utils.Utils; 027 import org.xml.sax.Attributes; 028 import org.xml.sax.SAXException; 029 030 /** 031 * This class is not a public API. 032 * It is only public because it is used in other packages. 033 * This class converts SAX or SAX-like calls to a 034 * serialized document for xsl:output method of "text". 035 * @xsl.usage internal 036 */ 037 public class ToTextStream extends ToStream 038 { 039 040 041 /** 042 * Default constructor. 043 */ 044 public ToTextStream() 045 { 046 super(); 047 } 048 049 050 051 /** 052 * Receive notification of the beginning of a document. 053 * 054 * <p>The SAX parser will invoke this method only once, before any 055 * other methods in this interface or in DTDHandler (except for 056 * setDocumentLocator).</p> 057 * 058 * @throws org.xml.sax.SAXException Any SAX exception, possibly 059 * wrapping another exception. 060 * 061 * @throws org.xml.sax.SAXException 062 */ 063 protected void startDocumentInternal() throws org.xml.sax.SAXException 064 { 065 super.startDocumentInternal(); 066 067 m_needToCallStartDocument = false; 068 069 // No action for the moment. 070 } 071 072 /** 073 * Receive notification of the end of a document. 074 * 075 * <p>The SAX parser will invoke this method only once, and it will 076 * be the last method invoked during the parse. The parser shall 077 * not invoke this method until it has either abandoned parsing 078 * (because of an unrecoverable error) or reached the end of 079 * input.</p> 080 * 081 * @throws org.xml.sax.SAXException Any SAX exception, possibly 082 * wrapping another exception. 083 * 084 * @throws org.xml.sax.SAXException 085 */ 086 public void endDocument() throws org.xml.sax.SAXException 087 { 088 flushPending(); 089 flushWriter(); 090 if (m_tracer != null) 091 super.fireEndDoc(); 092 } 093 094 /** 095 * Receive notification of the beginning of an element. 096 * 097 * <p>The Parser will invoke this method at the beginning of every 098 * element in the XML document; there will be a corresponding 099 * endElement() event for every startElement() event (even when the 100 * element is empty). All of the element's content will be 101 * reported, in order, before the corresponding endElement() 102 * event.</p> 103 * 104 * <p>If the element name has a namespace prefix, the prefix will 105 * still be attached. Note that the attribute list provided will 106 * contain only attributes with explicit values (specified or 107 * defaulted): #IMPLIED attributes will be omitted.</p> 108 * 109 * 110 * @param namespaceURI The Namespace URI, or the empty string if the 111 * element has no Namespace URI or if Namespace 112 * processing is not being performed. 113 * @param localName The local name (without prefix), or the 114 * empty string if Namespace processing is not being 115 * performed. 116 * @param name The qualified name (with prefix), or the 117 * empty string if qualified names are not available. 118 * @param atts The attributes attached to the element, if any. 119 * @throws org.xml.sax.SAXException Any SAX exception, possibly 120 * wrapping another exception. 121 * @see #endElement 122 * @see org.xml.sax.AttributeList 123 * 124 * @throws org.xml.sax.SAXException 125 */ 126 public void startElement( 127 String namespaceURI, String localName, String name, Attributes atts) 128 throws org.xml.sax.SAXException 129 { 130 // time to fire off startElement event 131 if (m_tracer != null) { 132 super.fireStartElem(name); 133 this.firePseudoAttributes(); 134 } 135 return; 136 } 137 138 /** 139 * Receive notification of the end of an element. 140 * 141 * <p>The SAX parser will invoke this method at the end of every 142 * element in the XML document; there will be a corresponding 143 * startElement() event for every endElement() event (even when the 144 * element is empty).</p> 145 * 146 * <p>If the element name has a namespace prefix, the prefix will 147 * still be attached to the name.</p> 148 * 149 * 150 * @param namespaceURI The Namespace URI, or the empty string if the 151 * element has no Namespace URI or if Namespace 152 * processing is not being performed. 153 * @param localName The local name (without prefix), or the 154 * empty string if Namespace processing is not being 155 * performed. 156 * @param name The qualified name (with prefix), or the 157 * empty string if qualified names are not available. 158 * @throws org.xml.sax.SAXException Any SAX exception, possibly 159 * wrapping another exception. 160 * 161 * @throws org.xml.sax.SAXException 162 */ 163 public void endElement(String namespaceURI, String localName, String name) 164 throws org.xml.sax.SAXException 165 { 166 if (m_tracer != null) 167 super.fireEndElem(name); 168 } 169 170 /** 171 * Receive notification of character data. 172 * 173 * <p>The Parser will call this method to report each chunk of 174 * character data. SAX parsers may return all contiguous character 175 * data in a single chunk, or they may split it into several 176 * chunks; however, all of the characters in any single event 177 * must come from the same external entity, so that the Locator 178 * provides useful information.</p> 179 * 180 * <p>The application must not attempt to read from the array 181 * outside of the specified range.</p> 182 * 183 * <p>Note that some parsers will report whitespace using the 184 * ignorableWhitespace() method rather than this one (validating 185 * parsers must do so).</p> 186 * 187 * @param ch The characters from the XML document. 188 * @param start The start position in the array. 189 * @param length The number of characters to read from the array. 190 * @throws org.xml.sax.SAXException Any SAX exception, possibly 191 * wrapping another exception. 192 * @see #ignorableWhitespace 193 * @see org.xml.sax.Locator 194 */ 195 public void characters(char ch[], int start, int length) 196 throws org.xml.sax.SAXException 197 { 198 199 flushPending(); 200 201 try 202 { 203 if (inTemporaryOutputState()) { 204 /* leave characters un-processed as we are 205 * creating temporary output, the output generated by 206 * this serializer will be input to a final serializer 207 * later on and it will do the processing in final 208 * output state (not temporary output state). 209 * 210 * A "temporary" ToTextStream serializer is used to 211 * evaluate attribute value templates (for example), 212 * and the result of evaluating such a thing 213 * is fed into a final serializer later on. 214 */ 215 m_writer.write(ch, start, length); 216 } 217 else { 218 // In final output state we do process the characters! 219 writeNormalizedChars(ch, start, length, m_lineSepUse); 220 } 221 222 if (m_tracer != null) 223 super.fireCharEvent(ch, start, length); 224 } 225 catch(IOException ioe) 226 { 227 throw new SAXException(ioe); 228 } 229 } 230 231 /** 232 * If available, when the disable-output-escaping attribute is used, 233 * output raw text without escaping. 234 * 235 * @param ch The characters from the XML document. 236 * @param start The start position in the array. 237 * @param length The number of characters to read from the array. 238 * 239 * @throws org.xml.sax.SAXException Any SAX exception, possibly 240 * wrapping another exception. 241 */ 242 public void charactersRaw(char ch[], int start, int length) 243 throws org.xml.sax.SAXException 244 { 245 246 try 247 { 248 writeNormalizedChars(ch, start, length, m_lineSepUse); 249 } 250 catch(IOException ioe) 251 { 252 throw new SAXException(ioe); 253 } 254 } 255 256 /** 257 * Normalize the characters, but don't escape. Different from 258 * SerializerToXML#writeNormalizedChars because it does not attempt to do 259 * XML escaping at all. 260 * 261 * @param ch The characters from the XML document. 262 * @param start The start position in the array. 263 * @param length The number of characters to read from the array. 264 * @param useLineSep true if the operating systems 265 * end-of-line separator should be output rather than a new-line character. 266 * 267 * @throws IOException 268 * @throws org.xml.sax.SAXException 269 */ 270 void writeNormalizedChars( 271 final char ch[], 272 final int start, 273 final int length, 274 final boolean useLineSep) 275 throws IOException, org.xml.sax.SAXException 276 { 277 final String encoding = getEncoding(); 278 final java.io.Writer writer = m_writer; 279 final int end = start + length; 280 281 /* copy a few "constants" before the loop for performance */ 282 final char S_LINEFEED = CharInfo.S_LINEFEED; 283 284 // This for() loop always increments i by one at the end 285 // of the loop. Additional increments of i adjust for when 286 // two input characters (a high/low UTF16 surrogate pair) 287 // are processed. 288 for (int i = start; i < end; i++) { 289 final char c = ch[i]; 290 291 if (S_LINEFEED == c && useLineSep) { 292 writer.write(m_lineSep, 0, m_lineSepLen); 293 // one input char processed 294 } else if (m_encodingInfo.isInEncoding(c)) { 295 writer.write(c); 296 // one input char processed 297 } else if (Encodings.isHighUTF16Surrogate(c)) { 298 final int codePoint = writeUTF16Surrogate(c, ch, i, end); 299 if (codePoint != 0) { 300 // I think we can just emit the message, 301 // not crash and burn. 302 final String integralValue = Integer.toString(codePoint); 303 final String msg = Utils.messages.createMessage( 304 MsgKey.ER_ILLEGAL_CHARACTER, 305 new Object[] { integralValue, encoding }); 306 307 //Older behavior was to throw the message, 308 //but newer gentler behavior is to write a message to System.err 309 //throw new SAXException(msg); 310 System.err.println(msg); 311 312 } 313 i++; // two input chars processed 314 } else { 315 // Don't know what to do with this char, it is 316 // not in the encoding and not a high char in 317 // a surrogate pair, so write out as an entity ref 318 if (encoding != null) { 319 /* The output encoding is known, 320 * so somthing is wrong. 321 */ 322 323 // not in the encoding, so write out a character reference 324 writer.write('&'); 325 writer.write('#'); 326 writer.write(Integer.toString(c)); 327 writer.write(';'); 328 329 // I think we can just emit the message, 330 // not crash and burn. 331 final String integralValue = Integer.toString(c); 332 final String msg = Utils.messages.createMessage( 333 MsgKey.ER_ILLEGAL_CHARACTER, 334 new Object[] { integralValue, encoding }); 335 336 //Older behavior was to throw the message, 337 //but newer gentler behavior is to write a message to System.err 338 //throw new SAXException(msg); 339 System.err.println(msg); 340 } else { 341 /* The output encoding is not known, 342 * so just write it out as-is. 343 */ 344 writer.write(c); 345 } 346 347 // one input char was processed 348 } 349 } 350 } 351 352 /** 353 * Receive notification of cdata. 354 * 355 * <p>The Parser will call this method to report each chunk of 356 * character data. SAX parsers may return all contiguous character 357 * data in a single chunk, or they may split it into several 358 * chunks; however, all of the characters in any single event 359 * must come from the same external entity, so that the Locator 360 * provides useful information.</p> 361 * 362 * <p>The application must not attempt to read from the array 363 * outside of the specified range.</p> 364 * 365 * <p>Note that some parsers will report whitespace using the 366 * ignorableWhitespace() method rather than this one (validating 367 * parsers must do so).</p> 368 * 369 * @param ch The characters from the XML document. 370 * @param start The start position in the array. 371 * @param length The number of characters to read from the array. 372 * @throws org.xml.sax.SAXException Any SAX exception, possibly 373 * wrapping another exception. 374 * @see #ignorableWhitespace 375 * @see org.xml.sax.Locator 376 */ 377 public void cdata(char ch[], int start, int length) 378 throws org.xml.sax.SAXException 379 { 380 try 381 { 382 writeNormalizedChars(ch, start, length, m_lineSepUse); 383 if (m_tracer != null) 384 super.fireCDATAEvent(ch, start, length); 385 } 386 catch(IOException ioe) 387 { 388 throw new SAXException(ioe); 389 } 390 } 391 392 /** 393 * Receive notification of ignorable whitespace in element content. 394 * 395 * <p>Validating Parsers must use this method to report each chunk 396 * of ignorable whitespace (see the W3C XML 1.0 recommendation, 397 * section 2.10): non-validating parsers may also use this method 398 * if they are capable of parsing and using content models.</p> 399 * 400 * <p>SAX parsers may return all contiguous whitespace in a single 401 * chunk, or they may split it into several chunks; however, all of 402 * the characters in any single event must come from the same 403 * external entity, so that the Locator provides useful 404 * information.</p> 405 * 406 * <p>The application must not attempt to read from the array 407 * outside of the specified range.</p> 408 * 409 * @param ch The characters from the XML document. 410 * @param start The start position in the array. 411 * @param length The number of characters to read from the array. 412 * @throws org.xml.sax.SAXException Any SAX exception, possibly 413 * wrapping another exception. 414 * @see #characters 415 * 416 * @throws org.xml.sax.SAXException 417 */ 418 public void ignorableWhitespace(char ch[], int start, int length) 419 throws org.xml.sax.SAXException 420 { 421 422 try 423 { 424 writeNormalizedChars(ch, start, length, m_lineSepUse); 425 } 426 catch(IOException ioe) 427 { 428 throw new SAXException(ioe); 429 } 430 } 431 432 /** 433 * Receive notification of a processing instruction. 434 * 435 * <p>The Parser will invoke this method once for each processing 436 * instruction found: note that processing instructions may occur 437 * before or after the main document element.</p> 438 * 439 * <p>A SAX parser should never report an XML declaration (XML 1.0, 440 * section 2.8) or a text declaration (XML 1.0, section 4.3.1) 441 * using this method.</p> 442 * 443 * @param target The processing instruction target. 444 * @param data The processing instruction data, or null if 445 * none was supplied. 446 * @throws org.xml.sax.SAXException Any SAX exception, possibly 447 * wrapping another exception. 448 * 449 * @throws org.xml.sax.SAXException 450 */ 451 public void processingInstruction(String target, String data) 452 throws org.xml.sax.SAXException 453 { 454 // flush anything pending first 455 flushPending(); 456 457 if (m_tracer != null) 458 super.fireEscapingEvent(target, data); 459 } 460 461 /** 462 * Called when a Comment is to be constructed. 463 * Note that Xalan will normally invoke the other version of this method. 464 * %REVIEW% In fact, is this one ever needed, or was it a mistake? 465 * 466 * @param data The comment data. 467 * @throws org.xml.sax.SAXException Any SAX exception, possibly 468 * wrapping another exception. 469 */ 470 public void comment(String data) throws org.xml.sax.SAXException 471 { 472 final int length = data.length(); 473 if (length > m_charsBuff.length) 474 { 475 m_charsBuff = new char[length*2 + 1]; 476 } 477 data.getChars(0, length, m_charsBuff, 0); 478 comment(m_charsBuff, 0, length); 479 } 480 481 /** 482 * Report an XML comment anywhere in the document. 483 * 484 * This callback will be used for comments inside or outside the 485 * document element, including comments in the external DTD 486 * subset (if read). 487 * 488 * @param ch An array holding the characters in the comment. 489 * @param start The starting position in the array. 490 * @param length The number of characters to use from the array. 491 * @throws org.xml.sax.SAXException The application may raise an exception. 492 */ 493 public void comment(char ch[], int start, int length) 494 throws org.xml.sax.SAXException 495 { 496 497 flushPending(); 498 if (m_tracer != null) 499 super.fireCommentEvent(ch, start, length); 500 } 501 502 /** 503 * Receive notivication of a entityReference. 504 * 505 * @param name non-null reference to the name of the entity. 506 * 507 * @throws org.xml.sax.SAXException 508 */ 509 public void entityReference(String name) throws org.xml.sax.SAXException 510 { 511 if (m_tracer != null) 512 super.fireEntityReference(name); 513 } 514 515 /** 516 * @see ExtendedContentHandler#addAttribute(String, String, String, String, String) 517 */ 518 public void addAttribute( 519 String uri, 520 String localName, 521 String rawName, 522 String type, 523 String value, 524 boolean XSLAttribute) 525 { 526 // do nothing, just forget all about the attribute 527 } 528 529 /** 530 * @see org.xml.sax.ext.LexicalHandler#endCDATA() 531 */ 532 public void endCDATA() throws SAXException 533 { 534 // do nothing 535 } 536 537 /** 538 * @see ExtendedContentHandler#endElement(String) 539 */ 540 public void endElement(String elemName) throws SAXException 541 { 542 if (m_tracer != null) 543 super.fireEndElem(elemName); 544 } 545 546 /** 547 * From XSLTC 548 */ 549 public void startElement( 550 String elementNamespaceURI, 551 String elementLocalName, 552 String elementName) 553 throws SAXException 554 { 555 if (m_needToCallStartDocument) 556 startDocumentInternal(); 557 // time to fire off startlement event. 558 if (m_tracer != null) { 559 super.fireStartElem(elementName); 560 this.firePseudoAttributes(); 561 } 562 563 return; 564 } 565 566 567 /** 568 * From XSLTC 569 */ 570 public void characters(String characters) 571 throws SAXException 572 { 573 final int length = characters.length(); 574 if (length > m_charsBuff.length) 575 { 576 m_charsBuff = new char[length*2 + 1]; 577 } 578 characters.getChars(0, length, m_charsBuff, 0); 579 characters(m_charsBuff, 0, length); 580 } 581 582 583 /** 584 * From XSLTC 585 */ 586 public void addAttribute(String name, String value) 587 { 588 // do nothing, forget about the attribute 589 } 590 591 /** 592 * Add a unique attribute 593 */ 594 public void addUniqueAttribute(String qName, String value, int flags) 595 throws SAXException 596 { 597 // do nothing, forget about the attribute 598 } 599 600 public boolean startPrefixMapping( 601 String prefix, 602 String uri, 603 boolean shouldFlush) 604 throws SAXException 605 { 606 // no namespace support for HTML 607 return false; 608 } 609 610 611 public void startPrefixMapping(String prefix, String uri) 612 throws org.xml.sax.SAXException 613 { 614 // no namespace support for HTML 615 } 616 617 618 public void namespaceAfterStartElement( 619 final String prefix, 620 final String uri) 621 throws SAXException 622 { 623 // no namespace support for HTML 624 } 625 626 public void flushPending() throws org.xml.sax.SAXException 627 { 628 if (m_needToCallStartDocument) 629 { 630 startDocumentInternal(); 631 m_needToCallStartDocument = false; 632 } 633 } 634 }