001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: IncrementalSAXSource_Xerces.java 468653 2006-10-28 07:07:05Z minchau $ 020 */ 021 022 package org.apache.xml.dtm.ref; 023 024 import java.io.IOException; 025 import java.lang.reflect.Constructor; 026 import java.lang.reflect.Method; 027 028 import org.apache.xerces.parsers.SAXParser; 029 import org.apache.xml.res.XMLErrorResources; 030 import org.apache.xml.res.XMLMessages; 031 032 import org.xml.sax.InputSource; 033 import org.xml.sax.SAXException; 034 import org.xml.sax.XMLReader; 035 036 037 /** <p>IncrementalSAXSource_Xerces takes advantage of the fact that Xerces1 038 * incremental mode is already a coroutine of sorts, and just wraps our 039 * IncrementalSAXSource API around it.</p> 040 * 041 * <p>Usage example: See main().</p> 042 * 043 * <p>Status: Passes simple main() unit-test. NEEDS JAVADOC.</p> 044 * */ 045 public class IncrementalSAXSource_Xerces 046 implements IncrementalSAXSource 047 { 048 // 049 // Reflection. To allow this to compile with both Xerces1 and Xerces2, which 050 // require very different methods and objects, we need to avoid static 051 // references to those APIs. So until Xerces2 is pervasive and we're willing 052 // to make it a prerequisite, we will rely upon relection. 053 // 054 Method fParseSomeSetup=null; // Xerces1 method 055 Method fParseSome=null; // Xerces1 method 056 Object fPullParserConfig=null; // Xerces2 pull control object 057 Method fConfigSetInput=null; // Xerces2 method 058 Method fConfigParse=null; // Xerces2 method 059 Method fSetInputSource=null; // Xerces2 pull control method 060 Constructor fConfigInputSourceCtor=null; // Xerces2 initialization method 061 Method fConfigSetByteStream=null; // Xerces2 initialization method 062 Method fConfigSetCharStream=null; // Xerces2 initialization method 063 Method fConfigSetEncoding=null; // Xerces2 initialization method 064 Method fReset=null; // Both Xerces1 and Xerces2, but diff. signatures 065 066 // 067 // Data 068 // 069 SAXParser fIncrementalParser; 070 private boolean fParseInProgress=false; 071 072 // 073 // Constructors 074 // 075 076 /** Create a IncrementalSAXSource_Xerces, and create a SAXParser 077 * to go with it. Xerces2 incremental parsing is only supported if 078 * this constructor is used, due to limitations in the Xerces2 API (as of 079 * Beta 3). If you don't like that restriction, tell the Xerces folks that 080 * there should be a simpler way to request incremental SAX parsing. 081 * */ 082 public IncrementalSAXSource_Xerces() 083 throws NoSuchMethodException 084 { 085 try 086 { 087 // Xerces-2 incremental parsing support (as of Beta 3) 088 // ContentHandlers still get set on fIncrementalParser (to get 089 // conversion from XNI events to SAX events), but 090 // _control_ for incremental parsing must be exercised via the config. 091 // 092 // At this time there's no way to read the existing config, only 093 // to assert a new one... and only when creating a brand-new parser. 094 // 095 // Reflection is used to allow us to continue to compile against 096 // Xerces1. If/when we can abandon the older versions of the parser, 097 // this will simplify significantly. 098 099 // If we can't get the magic constructor, no need to look further. 100 Class xniConfigClass=ObjectFactory.findProviderClass( 101 "org.apache.xerces.xni.parser.XMLParserConfiguration", 102 ObjectFactory.findClassLoader(), true); 103 Class[] args1={xniConfigClass}; 104 Constructor ctor=SAXParser.class.getConstructor(args1); 105 106 // Build the parser configuration object. StandardParserConfiguration 107 // happens to implement XMLPullParserConfiguration, which is the API 108 // we're going to want to use. 109 Class xniStdConfigClass=ObjectFactory.findProviderClass( 110 "org.apache.xerces.parsers.StandardParserConfiguration", 111 ObjectFactory.findClassLoader(), true); 112 fPullParserConfig=xniStdConfigClass.newInstance(); 113 Object[] args2={fPullParserConfig}; 114 fIncrementalParser = (SAXParser)ctor.newInstance(args2); 115 116 // Preload all the needed the configuration methods... I want to know they're 117 // all here before we commit to trying to use them, just in case the 118 // API changes again. 119 Class fXniInputSourceClass=ObjectFactory.findProviderClass( 120 "org.apache.xerces.xni.parser.XMLInputSource", 121 ObjectFactory.findClassLoader(), true); 122 Class[] args3={fXniInputSourceClass}; 123 fConfigSetInput=xniStdConfigClass.getMethod("setInputSource",args3); 124 125 Class[] args4={String.class,String.class,String.class}; 126 fConfigInputSourceCtor=fXniInputSourceClass.getConstructor(args4); 127 Class[] args5={java.io.InputStream.class}; 128 fConfigSetByteStream=fXniInputSourceClass.getMethod("setByteStream",args5); 129 Class[] args6={java.io.Reader.class}; 130 fConfigSetCharStream=fXniInputSourceClass.getMethod("setCharacterStream",args6); 131 Class[] args7={String.class}; 132 fConfigSetEncoding=fXniInputSourceClass.getMethod("setEncoding",args7); 133 134 Class[] argsb={Boolean.TYPE}; 135 fConfigParse=xniStdConfigClass.getMethod("parse",argsb); 136 Class[] noargs=new Class[0]; 137 fReset=fIncrementalParser.getClass().getMethod("reset",noargs); 138 } 139 catch(Exception e) 140 { 141 // Fallback if this fails (implemented in createIncrementalSAXSource) is 142 // to attempt Xerces-1 incremental setup. Can't do tail-call in 143 // constructor, so create new, copy Xerces-1 initialization, 144 // then throw it away... Ugh. 145 IncrementalSAXSource_Xerces dummy=new IncrementalSAXSource_Xerces(new SAXParser()); 146 this.fParseSomeSetup=dummy.fParseSomeSetup; 147 this.fParseSome=dummy.fParseSome; 148 this.fIncrementalParser=dummy.fIncrementalParser; 149 } 150 } 151 152 /** Create a IncrementalSAXSource_Xerces wrapped around 153 * an existing SAXParser. Currently this works only for recent 154 * releases of Xerces-1. Xerces-2 incremental is currently possible 155 * only if we are allowed to create the parser instance, due to 156 * limitations in the API exposed by Xerces-2 Beta 3; see the 157 * no-args constructor for that code. 158 * 159 * @exception if the SAXParser class doesn't support the Xerces 160 * incremental parse operations. In that case, caller should 161 * fall back upon the IncrementalSAXSource_Filter approach. 162 * */ 163 public IncrementalSAXSource_Xerces(SAXParser parser) 164 throws NoSuchMethodException 165 { 166 // Reflection is used to allow us to compile against 167 // Xerces2. If/when we can abandon the older versions of the parser, 168 // this constructor will simply have to fail until/unless the 169 // Xerces2 incremental support is made available on previously 170 // constructed SAXParser instances. 171 fIncrementalParser=parser; 172 Class me=parser.getClass(); 173 Class[] parms={InputSource.class}; 174 fParseSomeSetup=me.getMethod("parseSomeSetup",parms); 175 parms=new Class[0]; 176 fParseSome=me.getMethod("parseSome",parms); 177 // Fallback if this fails (implemented in createIncrementalSAXSource) is 178 // to use IncrementalSAXSource_Filter rather than Xerces-specific code. 179 } 180 181 // 182 // Factories 183 // 184 static public IncrementalSAXSource createIncrementalSAXSource() 185 { 186 try 187 { 188 return new IncrementalSAXSource_Xerces(); 189 } 190 catch(NoSuchMethodException e) 191 { 192 // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded. 193 // Fall back on filtering solution. 194 IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter(); 195 iss.setXMLReader(new SAXParser()); 196 return iss; 197 } 198 } 199 200 static public IncrementalSAXSource 201 createIncrementalSAXSource(SAXParser parser) { 202 try 203 { 204 return new IncrementalSAXSource_Xerces(parser); 205 } 206 catch(NoSuchMethodException e) 207 { 208 // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded. 209 // Fall back on filtering solution. 210 IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter(); 211 iss.setXMLReader(parser); 212 return iss; 213 } 214 } 215 216 // 217 // Public methods 218 // 219 220 // Register handler directly with the incremental parser 221 public void setContentHandler(org.xml.sax.ContentHandler handler) 222 { 223 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader 224 // %OPT% Cast at asignment? 225 ((XMLReader)fIncrementalParser).setContentHandler(handler); 226 } 227 228 // Register handler directly with the incremental parser 229 public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler) 230 { 231 // Not supported by all SAX2 parsers but should work in Xerces: 232 try 233 { 234 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader 235 // %OPT% Cast at asignment? 236 ((XMLReader)fIncrementalParser).setProperty("http://xml.org/sax/properties/lexical-handler", 237 handler); 238 } 239 catch(org.xml.sax.SAXNotRecognizedException e) 240 { 241 // Nothing we can do about it 242 } 243 catch(org.xml.sax.SAXNotSupportedException e) 244 { 245 // Nothing we can do about it 246 } 247 } 248 249 // Register handler directly with the incremental parser 250 public void setDTDHandler(org.xml.sax.DTDHandler handler) 251 { 252 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader 253 // %OPT% Cast at asignment? 254 ((XMLReader)fIncrementalParser).setDTDHandler(handler); 255 } 256 257 //================================================================ 258 /** startParse() is a simple API which tells the IncrementalSAXSource 259 * to begin reading a document. 260 * 261 * @throws SAXException is parse thread is already in progress 262 * or parsing can not be started. 263 * */ 264 public void startParse(InputSource source) throws SAXException 265 { 266 if (fIncrementalParser==null) 267 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_NEEDS_SAXPARSER, null)); //"startParse needs a non-null SAXParser."); 268 if (fParseInProgress) 269 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_WHILE_PARSING, null)); //"startParse may not be called while parsing."); 270 271 boolean ok=false; 272 273 try 274 { 275 ok = parseSomeSetup(source); 276 } 277 catch(Exception ex) 278 { 279 throw new SAXException(ex); 280 } 281 282 if(!ok) 283 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_INIT_PARSER, null)); //"could not initialize parser with"); 284 } 285 286 287 /** deliverMoreNodes() is a simple API which tells the coroutine 288 * parser that we need more nodes. This is intended to be called 289 * from one of our partner routines, and serves to encapsulate the 290 * details of how incremental parsing has been achieved. 291 * 292 * @param parsemore If true, tells the incremental parser to generate 293 * another chunk of output. If false, tells the parser that we're 294 * satisfied and it can terminate parsing of this document. 295 * @return Boolean.TRUE if the CoroutineParser believes more data may be available 296 * for further parsing. Boolean.FALSE if parsing ran to completion. 297 * Exception if the parser objected for some reason. 298 * */ 299 public Object deliverMoreNodes (boolean parsemore) 300 { 301 if(!parsemore) 302 { 303 fParseInProgress=false; 304 return Boolean.FALSE; 305 } 306 307 Object arg; 308 try { 309 boolean keepgoing = parseSome(); 310 arg = keepgoing ? Boolean.TRUE : Boolean.FALSE; 311 } catch (SAXException ex) { 312 arg = ex; 313 } catch (IOException ex) { 314 arg = ex; 315 } catch (Exception ex) { 316 arg = new SAXException(ex); 317 } 318 return arg; 319 } 320 321 // Private methods -- conveniences to hide the reflection details 322 private boolean parseSomeSetup(InputSource source) 323 throws SAXException, IOException, IllegalAccessException, 324 java.lang.reflect.InvocationTargetException, 325 java.lang.InstantiationException 326 { 327 if(fConfigSetInput!=null) 328 { 329 // Obtain input from SAX inputSource object, construct XNI version of 330 // that object. Logic adapted from Xerces2. 331 Object[] parms1={source.getPublicId(),source.getSystemId(),null}; 332 Object xmlsource=fConfigInputSourceCtor.newInstance(parms1); 333 Object[] parmsa={source.getByteStream()}; 334 fConfigSetByteStream.invoke(xmlsource,parmsa); 335 parmsa[0]=source.getCharacterStream(); 336 fConfigSetCharStream.invoke(xmlsource,parmsa); 337 parmsa[0]=source.getEncoding(); 338 fConfigSetEncoding.invoke(xmlsource,parmsa); 339 340 // Bugzilla5272 patch suggested by Sandy Gao. 341 // Has to be reflection to run with Xerces2 342 // after compilation against Xerces1. or vice 343 // versa, due to return type mismatches. 344 Object[] noparms=new Object[0]; 345 fReset.invoke(fIncrementalParser,noparms); 346 347 parmsa[0]=xmlsource; 348 fConfigSetInput.invoke(fPullParserConfig,parmsa); 349 350 // %REVIEW% Do first pull. Should we instead just return true? 351 return parseSome(); 352 } 353 else 354 { 355 Object[] parm={source}; 356 Object ret=fParseSomeSetup.invoke(fIncrementalParser,parm); 357 return ((Boolean)ret).booleanValue(); 358 } 359 } 360 // Would null work??? 361 private static final Object[] noparms=new Object[0]; 362 private static final Object[] parmsfalse={Boolean.FALSE}; 363 private boolean parseSome() 364 throws SAXException, IOException, IllegalAccessException, 365 java.lang.reflect.InvocationTargetException 366 { 367 // Take next parsing step, return false iff parsing complete: 368 if(fConfigSetInput!=null) 369 { 370 Object ret=(Boolean)(fConfigParse.invoke(fPullParserConfig,parmsfalse)); 371 return ((Boolean)ret).booleanValue(); 372 } 373 else 374 { 375 Object ret=fParseSome.invoke(fIncrementalParser,noparms); 376 return ((Boolean)ret).booleanValue(); 377 } 378 } 379 380 381 //================================================================ 382 /** Simple unit test. Attempt coroutine parsing of document indicated 383 * by first argument (as a URI), report progress. 384 */ 385 public static void main(String args[]) 386 { 387 System.out.println("Starting..."); 388 389 CoroutineManager co = new CoroutineManager(); 390 int appCoroutineID = co.co_joinCoroutineSet(-1); 391 if (appCoroutineID == -1) 392 { 393 System.out.println("ERROR: Couldn't allocate coroutine number.\n"); 394 return; 395 } 396 IncrementalSAXSource parser= 397 createIncrementalSAXSource(); 398 399 // Use a serializer as our sample output 400 org.apache.xml.serialize.XMLSerializer trace; 401 trace=new org.apache.xml.serialize.XMLSerializer(System.out,null); 402 parser.setContentHandler(trace); 403 parser.setLexicalHandler(trace); 404 405 // Tell coroutine to begin parsing, run while parsing is in progress 406 407 for(int arg=0;arg<args.length;++arg) 408 { 409 try 410 { 411 InputSource source = new InputSource(args[arg]); 412 Object result=null; 413 boolean more=true; 414 parser.startParse(source); 415 for(result = parser.deliverMoreNodes(more); 416 result==Boolean.TRUE; 417 result = parser.deliverMoreNodes(more)) 418 { 419 System.out.println("\nSome parsing successful, trying more.\n"); 420 421 // Special test: Terminate parsing early. 422 if(arg+1<args.length && "!".equals(args[arg+1])) 423 { 424 ++arg; 425 more=false; 426 } 427 428 } 429 430 if (result instanceof Boolean && ((Boolean)result)==Boolean.FALSE) 431 { 432 System.out.println("\nParser ended (EOF or on request).\n"); 433 } 434 else if (result == null) { 435 System.out.println("\nUNEXPECTED: Parser says shut down prematurely.\n"); 436 } 437 else if (result instanceof Exception) { 438 throw new org.apache.xml.utils.WrappedRuntimeException((Exception)result); 439 // System.out.println("\nParser threw exception:"); 440 // ((Exception)result).printStackTrace(); 441 } 442 443 } 444 445 catch(SAXException e) 446 { 447 e.printStackTrace(); 448 } 449 } 450 451 } 452 453 454 } // class IncrementalSAXSource_Xerces