001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: DocumentCache.java 1225369 2011-12-28 22:54:01Z mrglavas $ 020 */ 021 022 package org.apache.xalan.xsltc.dom; 023 024 import java.io.File; 025 import java.io.PrintWriter; 026 import java.net.URL; 027 import java.net.URLConnection; 028 import java.net.URLDecoder; 029 import java.util.Date; 030 import java.util.Hashtable; 031 032 import javax.xml.parsers.ParserConfigurationException; 033 import javax.xml.parsers.SAXParser; 034 import javax.xml.parsers.SAXParserFactory; 035 import javax.xml.transform.TransformerException; 036 import javax.xml.transform.sax.SAXSource; 037 038 import org.apache.xalan.xsltc.DOM; 039 import org.apache.xalan.xsltc.DOMCache; 040 import org.apache.xalan.xsltc.DOMEnhancedForDTM; 041 import org.apache.xalan.xsltc.Translet; 042 import org.apache.xalan.xsltc.runtime.AbstractTranslet; 043 import org.apache.xalan.xsltc.runtime.BasisLibrary; 044 import org.apache.xalan.xsltc.runtime.Constants; 045 import org.apache.xml.utils.SystemIDResolver; 046 047 import org.xml.sax.InputSource; 048 import org.xml.sax.SAXException; 049 import org.xml.sax.XMLReader; 050 051 /** 052 * @author Morten Jorgensen 053 */ 054 public final class DocumentCache implements DOMCache { 055 056 private int _size; 057 private Hashtable _references; 058 private String[] _URIs; 059 private int _count; 060 private int _current; 061 private SAXParser _parser; 062 private XMLReader _reader; 063 private XSLTCDTMManager _dtmManager; 064 065 private static final int REFRESH_INTERVAL = 1000; 066 067 /* 068 * Inner class containing a DOMImpl object and DTD handler 069 */ 070 public final class CachedDocument { 071 072 // Statistics data 073 private long _firstReferenced; 074 private long _lastReferenced; 075 private long _accessCount; 076 private long _lastModified; 077 private long _lastChecked; 078 private long _buildTime; 079 080 // DOM and DTD handler references 081 private DOMEnhancedForDTM _dom = null; 082 083 /** 084 * Constructor - load document and initialise statistics 085 */ 086 public CachedDocument(String uri) { 087 // Initialise statistics variables 088 final long stamp = System.currentTimeMillis(); 089 _firstReferenced = stamp; 090 _lastReferenced = stamp; 091 _accessCount = 0; 092 loadDocument(uri); 093 094 _buildTime = System.currentTimeMillis() - stamp; 095 } 096 097 /** 098 * Loads the document and updates build-time (latency) statistics 099 */ 100 public void loadDocument(String uri) { 101 102 try { 103 final long stamp = System.currentTimeMillis(); 104 _dom = (DOMEnhancedForDTM)_dtmManager.getDTM( 105 new SAXSource(_reader, new InputSource(uri)), 106 false, null, true, false); 107 _dom.setDocumentURI(uri); 108 109 // The build time can be used for statistics for a better 110 // priority algorithm (currently round robin). 111 final long thisTime = System.currentTimeMillis() - stamp; 112 if (_buildTime > 0) 113 _buildTime = (_buildTime + thisTime) >>> 1; 114 else 115 _buildTime = thisTime; 116 } 117 catch (Exception e) { 118 _dom = null; 119 } 120 } 121 122 public DOM getDocument() { return(_dom); } 123 124 public long getFirstReferenced() { return(_firstReferenced); } 125 126 public long getLastReferenced() { return(_lastReferenced); } 127 128 public long getAccessCount() { return(_accessCount); } 129 130 public void incAccessCount() { _accessCount++; } 131 132 public long getLastModified() { return(_lastModified); } 133 134 public void setLastModified(long t){ _lastModified = t; } 135 136 public long getLatency() { return(_buildTime); } 137 138 public long getLastChecked() { return(_lastChecked); } 139 140 public void setLastChecked(long t) { _lastChecked = t; } 141 142 public long getEstimatedSize() { 143 if (_dom != null) 144 return(_dom.getSize() << 5); // ??? 145 else 146 return(0); 147 } 148 149 } 150 151 /** 152 * DocumentCache constructor 153 */ 154 public DocumentCache(int size) throws SAXException { 155 this(size, null); 156 try { 157 _dtmManager = (XSLTCDTMManager)XSLTCDTMManager.getDTMManagerClass() 158 .newInstance(); 159 } catch (Exception e) { 160 throw new SAXException(e); 161 } 162 } 163 164 /** 165 * DocumentCache constructor 166 */ 167 public DocumentCache(int size, XSLTCDTMManager dtmManager) throws SAXException { 168 _dtmManager = dtmManager; 169 _count = 0; 170 _current = 0; 171 _size = size; 172 _references = new Hashtable(_size+2); 173 _URIs = new String[_size]; 174 175 try { 176 // Create a SAX parser and get the XMLReader object it uses 177 final SAXParserFactory factory = SAXParserFactory.newInstance(); 178 try { 179 factory.setFeature(Constants.NAMESPACE_FEATURE,true); 180 } 181 catch (Exception e) { 182 factory.setNamespaceAware(true); 183 } 184 _parser = factory.newSAXParser(); 185 _reader = _parser.getXMLReader(); 186 } 187 catch (ParserConfigurationException e) { 188 BasisLibrary.runTimeError(BasisLibrary.NAMESPACES_SUPPORT_ERR); 189 } 190 } 191 192 /** 193 * Returns the time-stamp for a document's last update 194 */ 195 private final long getLastModified(String uri) { 196 try { 197 URL url = new URL(uri); 198 URLConnection connection = url.openConnection(); 199 long timestamp = connection.getLastModified(); 200 // Check for a "file:" URI (courtesy of Brian Ewins) 201 if (timestamp == 0){ // get 0 for local URI 202 if ("file".equals(url.getProtocol())){ 203 File localfile = new File(URLDecoder.decode(url.getFile())); 204 timestamp = localfile.lastModified(); 205 } 206 } 207 return(timestamp); 208 } 209 // Brutal handling of all exceptions 210 catch (Exception e) { 211 return(System.currentTimeMillis()); 212 } 213 } 214 215 /** 216 * 217 */ 218 private CachedDocument lookupDocument(String uri) { 219 return((CachedDocument)_references.get(uri)); 220 } 221 222 /** 223 * 224 */ 225 private synchronized void insertDocument(String uri, CachedDocument doc) { 226 if (_count < _size) { 227 // Insert out URI in circular buffer 228 _URIs[_count++] = uri; 229 _current = 0; 230 } 231 else { 232 // Remove oldest URI from reference Hashtable 233 _references.remove(_URIs[_current]); 234 // Insert our URI in circular buffer 235 _URIs[_current] = uri; 236 if (++_current >= _size) _current = 0; 237 } 238 _references.put(uri, doc); 239 } 240 241 /** 242 * 243 */ 244 private synchronized void replaceDocument(String uri, CachedDocument doc) { 245 CachedDocument old = (CachedDocument)_references.get(uri); 246 if (doc == null) 247 insertDocument(uri, doc); 248 else 249 _references.put(uri, doc); 250 } 251 252 /** 253 * Returns a document either by finding it in the cache or 254 * downloading it and putting it in the cache. 255 */ 256 public DOM retrieveDocument(String baseURI, String href, Translet trs) { 257 CachedDocument doc; 258 259 String uri = href; 260 if (baseURI != null && baseURI.length() != 0) { 261 try { 262 uri = SystemIDResolver.getAbsoluteURI(uri, baseURI); 263 } catch (TransformerException te) { 264 // ignore 265 } 266 } 267 268 // Try to get the document from the cache first 269 if ((doc = lookupDocument(uri)) == null) { 270 doc = new CachedDocument(uri); 271 if (doc == null) return null; // better error handling needed!!! 272 doc.setLastModified(getLastModified(uri)); 273 insertDocument(uri, doc); 274 } 275 // If the document is in the cache we must check if it is still valid 276 else { 277 long now = System.currentTimeMillis(); 278 long chk = doc.getLastChecked(); 279 doc.setLastChecked(now); 280 // Has the modification time for this file been checked lately? 281 if (now > (chk + REFRESH_INTERVAL)) { 282 doc.setLastChecked(now); 283 long last = getLastModified(uri); 284 // Reload document if it has been modified since last download 285 if (last > doc.getLastModified()) { 286 doc = new CachedDocument(uri); 287 if (doc == null) return null; 288 doc.setLastModified(getLastModified(uri)); 289 replaceDocument(uri, doc); 290 } 291 } 292 293 } 294 295 // Get the references to the actual DOM and DTD handler 296 final DOM dom = doc.getDocument(); 297 298 // The dom reference may be null if the URL pointed to a 299 // non-existing document 300 if (dom == null) return null; 301 302 doc.incAccessCount(); // For statistics 303 304 final AbstractTranslet translet = (AbstractTranslet)trs; 305 306 // Give the translet an early opportunity to extract any 307 // information from the DOM object that it would like. 308 translet.prepassDocument(dom); 309 310 return(doc.getDocument()); 311 } 312 313 /** 314 * Outputs the cache statistics 315 */ 316 public void getStatistics(PrintWriter out) { 317 out.println("<h2>DOM cache statistics</h2><center><table border=\"2\">"+ 318 "<tr><td><b>Document URI</b></td>"+ 319 "<td><center><b>Build time</b></center></td>"+ 320 "<td><center><b>Access count</b></center></td>"+ 321 "<td><center><b>Last accessed</b></center></td>"+ 322 "<td><center><b>Last modified</b></center></td></tr>"); 323 324 for (int i=0; i<_count; i++) { 325 CachedDocument doc = (CachedDocument)_references.get(_URIs[i]); 326 out.print("<tr><td><a href=\""+_URIs[i]+"\">"+ 327 "<font size=-1>"+_URIs[i]+"</font></a></td>"); 328 out.print("<td><center>"+doc.getLatency()+"ms</center></td>"); 329 out.print("<td><center>"+doc.getAccessCount()+"</center></td>"); 330 out.print("<td><center>"+(new Date(doc.getLastReferenced()))+ 331 "</center></td>"); 332 out.print("<td><center>"+(new Date(doc.getLastModified()))+ 333 "</center></td>"); 334 out.println("</tr>"); 335 } 336 337 out.println("</table></center>"); 338 } 339 }