001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: SuballocatedIntVector.java 468655 2006-10-28 07:12:06Z minchau $ 020 */ 021 package org.apache.xml.utils; 022 023 /** 024 * A very simple table that stores a list of int. Very similar API to our 025 * IntVector class (same API); different internal storage. 026 * 027 * This version uses an array-of-arrays solution. Read/write access is thus 028 * a bit slower than the simple IntVector, and basic storage is a trifle 029 * higher due to the top-level array -- but appending is O(1) fast rather 030 * than O(N**2) slow, which will swamp those costs in situations where 031 * long vectors are being built up. 032 * 033 * Known issues: 034 * 035 * Some methods are private because they haven't yet been tested properly. 036 * 037 * Retrieval performance is critical, since this is used at the core 038 * of the DTM model. (Append performance is almost as important.) 039 * That's pushing me toward just letting reads from unset indices 040 * throw exceptions or return stale data; safer behavior would have 041 * performance costs. 042 * */ 043 public class SuballocatedIntVector 044 { 045 /** Size of blocks to allocate */ 046 protected int m_blocksize; 047 048 /** Bitwise addressing (much faster than div/remainder */ 049 protected int m_SHIFT, m_MASK; 050 051 /** The default number of blocks to (over)allocate by */ 052 protected static final int NUMBLOCKS_DEFAULT = 32; 053 054 /** The number of blocks to (over)allocate by */ 055 protected int m_numblocks = NUMBLOCKS_DEFAULT; 056 057 /** Array of arrays of ints */ 058 protected int m_map[][]; 059 060 /** Number of ints in array */ 061 protected int m_firstFree = 0; 062 063 /** "Shortcut" handle to m_map[0]. Surprisingly helpful for short vectors. */ 064 protected int m_map0[]; 065 066 /** "Shortcut" handle to most recently added row of m_map. 067 * Very helpful during construction. 068 * @xsl.usage internal 069 */ 070 protected int m_buildCache[]; 071 protected int m_buildCacheStartIndex; 072 073 074 /** 075 * Default constructor. Note that the default 076 * block size is currently 2K, which may be overkill for 077 * small lists and undershootng for large ones. 078 */ 079 public SuballocatedIntVector() 080 { 081 this(2048); 082 } 083 084 /** 085 * Construct a IntVector, using the given block size and number 086 * of blocks. For efficiency, we will round the requested size 087 * off to a power of two. 088 * 089 * @param blocksize Size of block to allocate 090 * @param numblocks Number of blocks to allocate 091 * */ 092 public SuballocatedIntVector(int blocksize, int numblocks) 093 { 094 //m_blocksize = blocksize; 095 for(m_SHIFT=0;0!=(blocksize>>>=1);++m_SHIFT) 096 ; 097 m_blocksize=1<<m_SHIFT; 098 m_MASK=m_blocksize-1; 099 m_numblocks = numblocks; 100 101 m_map0=new int[m_blocksize]; 102 m_map = new int[numblocks][]; 103 m_map[0]=m_map0; 104 m_buildCache = m_map0; 105 m_buildCacheStartIndex = 0; 106 } 107 108 /** Construct a IntVector, using the given block size and 109 * the default number of blocks (32). 110 * 111 * @param blocksize Size of block to allocate 112 * */ 113 public SuballocatedIntVector(int blocksize) 114 { 115 this(blocksize, NUMBLOCKS_DEFAULT); 116 } 117 118 /** 119 * Get the length of the list. 120 * 121 * @return length of the list 122 */ 123 public int size() 124 { 125 return m_firstFree; 126 } 127 128 /** 129 * Set the length of the list. This will only work to truncate the list, and 130 * even then it has not been heavily tested and may not be trustworthy. 131 * 132 * @return length of the list 133 */ 134 public void setSize(int sz) 135 { 136 if(m_firstFree>sz) // Whups; had that backward! 137 m_firstFree = sz; 138 } 139 140 /** 141 * Append a int onto the vector. 142 * 143 * @param value Int to add to the list 144 */ 145 public void addElement(int value) 146 { 147 int indexRelativeToCache = m_firstFree - m_buildCacheStartIndex; 148 149 // Is the new index an index into the cache row of m_map? 150 if(indexRelativeToCache >= 0 && indexRelativeToCache < m_blocksize) { 151 m_buildCache[indexRelativeToCache]=value; 152 ++m_firstFree; 153 } else { 154 // Growing the outer array should be rare. We initialize to a 155 // total of m_blocksize squared elements, which at the default 156 // size is 4M integers... and we grow by at least that much each 157 // time. However, attempts to microoptimize for this (assume 158 // long enough and catch exceptions) yield no noticable 159 // improvement. 160 161 int index=m_firstFree>>>m_SHIFT; 162 int offset=m_firstFree&m_MASK; 163 164 if(index>=m_map.length) 165 { 166 int newsize=index+m_numblocks; 167 int[][] newMap=new int[newsize][]; 168 System.arraycopy(m_map, 0, newMap, 0, m_map.length); 169 m_map=newMap; 170 } 171 int[] block=m_map[index]; 172 if(null==block) 173 block=m_map[index]=new int[m_blocksize]; 174 block[offset]=value; 175 176 // Cache the current row of m_map. Next m_blocksize-1 177 // values added will go to this row. 178 m_buildCache = block; 179 m_buildCacheStartIndex = m_firstFree-offset; 180 181 ++m_firstFree; 182 } 183 } 184 185 /** 186 * Append several int values onto the vector. 187 * 188 * @param value Int to add to the list 189 */ 190 private void addElements(int value, int numberOfElements) 191 { 192 if(m_firstFree+numberOfElements<m_blocksize) 193 for (int i = 0; i < numberOfElements; i++) 194 { 195 m_map0[m_firstFree++]=value; 196 } 197 else 198 { 199 int index=m_firstFree>>>m_SHIFT; 200 int offset=m_firstFree&m_MASK; 201 m_firstFree+=numberOfElements; 202 while( numberOfElements>0) 203 { 204 if(index>=m_map.length) 205 { 206 int newsize=index+m_numblocks; 207 int[][] newMap=new int[newsize][]; 208 System.arraycopy(m_map, 0, newMap, 0, m_map.length); 209 m_map=newMap; 210 } 211 int[] block=m_map[index]; 212 if(null==block) 213 block=m_map[index]=new int[m_blocksize]; 214 int copied=(m_blocksize-offset < numberOfElements) 215 ? m_blocksize-offset : numberOfElements; 216 numberOfElements-=copied; 217 while(copied-- > 0) 218 block[offset++]=value; 219 220 ++index;offset=0; 221 } 222 } 223 } 224 225 /** 226 * Append several slots onto the vector, but do not set the values. 227 * Note: "Not Set" means the value is unspecified. 228 * 229 * @param numberOfElements Int to add to the list 230 */ 231 private void addElements(int numberOfElements) 232 { 233 int newlen=m_firstFree+numberOfElements; 234 if(newlen>m_blocksize) 235 { 236 int index=m_firstFree>>>m_SHIFT; 237 int newindex=(m_firstFree+numberOfElements)>>>m_SHIFT; 238 for(int i=index+1;i<=newindex;++i) 239 m_map[i]=new int[m_blocksize]; 240 } 241 m_firstFree=newlen; 242 } 243 244 /** 245 * Inserts the specified node in this vector at the specified index. 246 * Each component in this vector with an index greater or equal to 247 * the specified index is shifted upward to have an index one greater 248 * than the value it had previously. 249 * 250 * Insertion may be an EXPENSIVE operation! 251 * 252 * @param value Int to insert 253 * @param at Index of where to insert 254 */ 255 private void insertElementAt(int value, int at) 256 { 257 if(at==m_firstFree) 258 addElement(value); 259 else if (at>m_firstFree) 260 { 261 int index=at>>>m_SHIFT; 262 if(index>=m_map.length) 263 { 264 int newsize=index+m_numblocks; 265 int[][] newMap=new int[newsize][]; 266 System.arraycopy(m_map, 0, newMap, 0, m_map.length); 267 m_map=newMap; 268 } 269 int[] block=m_map[index]; 270 if(null==block) 271 block=m_map[index]=new int[m_blocksize]; 272 int offset=at&m_MASK; 273 block[offset]=value; 274 m_firstFree=offset+1; 275 } 276 else 277 { 278 int index=at>>>m_SHIFT; 279 int maxindex=m_firstFree>>>m_SHIFT; // %REVIEW% (m_firstFree+1?) 280 ++m_firstFree; 281 int offset=at&m_MASK; 282 int push; 283 284 // ***** Easier to work down from top? 285 while(index<=maxindex) 286 { 287 int copylen=m_blocksize-offset-1; 288 int[] block=m_map[index]; 289 if(null==block) 290 { 291 push=0; 292 block=m_map[index]=new int[m_blocksize]; 293 } 294 else 295 { 296 push=block[m_blocksize-1]; 297 System.arraycopy(block, offset , block, offset+1, copylen); 298 } 299 block[offset]=value; 300 value=push; 301 offset=0; 302 ++index; 303 } 304 } 305 } 306 307 /** 308 * Wipe it out. Currently defined as equivalent to setSize(0). 309 */ 310 public void removeAllElements() 311 { 312 m_firstFree = 0; 313 m_buildCache = m_map0; 314 m_buildCacheStartIndex = 0; 315 } 316 317 /** 318 * Removes the first occurrence of the argument from this vector. 319 * If the object is found in this vector, each component in the vector 320 * with an index greater or equal to the object's index is shifted 321 * downward to have an index one smaller than the value it had 322 * previously. 323 * 324 * @param s Int to remove from array 325 * 326 * @return True if the int was removed, false if it was not found 327 */ 328 private boolean removeElement(int s) 329 { 330 int at=indexOf(s,0); 331 if(at<0) 332 return false; 333 removeElementAt(at); 334 return true; 335 } 336 337 /** 338 * Deletes the component at the specified index. Each component in 339 * this vector with an index greater or equal to the specified 340 * index is shifted downward to have an index one smaller than 341 * the value it had previously. 342 * 343 * @param i index of where to remove and int 344 */ 345 private void removeElementAt(int at) 346 { 347 // No point in removing elements that "don't exist"... 348 if(at<m_firstFree) 349 { 350 int index=at>>>m_SHIFT; 351 int maxindex=m_firstFree>>>m_SHIFT; 352 int offset=at&m_MASK; 353 354 while(index<=maxindex) 355 { 356 int copylen=m_blocksize-offset-1; 357 int[] block=m_map[index]; 358 if(null==block) 359 block=m_map[index]=new int[m_blocksize]; 360 else 361 System.arraycopy(block, offset+1, block, offset, copylen); 362 if(index<maxindex) 363 { 364 int[] next=m_map[index+1]; 365 if(next!=null) 366 block[m_blocksize-1]=(next!=null) ? next[0] : 0; 367 } 368 else 369 block[m_blocksize-1]=0; 370 offset=0; 371 ++index; 372 } 373 } 374 --m_firstFree; 375 } 376 377 /** 378 * Sets the component at the specified index of this vector to be the 379 * specified object. The previous component at that position is discarded. 380 * 381 * The index must be a value greater than or equal to 0 and less 382 * than the current size of the vector. 383 * 384 * @param value object to set 385 * @param at Index of where to set the object 386 */ 387 public void setElementAt(int value, int at) 388 { 389 if(at<m_blocksize) 390 m_map0[at]=value; 391 else 392 { 393 int index=at>>>m_SHIFT; 394 int offset=at&m_MASK; 395 396 if(index>=m_map.length) 397 { 398 int newsize=index+m_numblocks; 399 int[][] newMap=new int[newsize][]; 400 System.arraycopy(m_map, 0, newMap, 0, m_map.length); 401 m_map=newMap; 402 } 403 404 int[] block=m_map[index]; 405 if(null==block) 406 block=m_map[index]=new int[m_blocksize]; 407 block[offset]=value; 408 } 409 410 if(at>=m_firstFree) 411 m_firstFree=at+1; 412 } 413 414 415 /** 416 * Get the nth element. This is often at the innermost loop of an 417 * application, so performance is critical. 418 * 419 * @param i index of value to get 420 * 421 * @return value at given index. If that value wasn't previously set, 422 * the result is undefined for performance reasons. It may throw an 423 * exception (see below), may return zero, or (if setSize has previously 424 * been used) may return stale data. 425 * 426 * @throws ArrayIndexOutOfBoundsException if the index was _clearly_ 427 * unreasonable (negative, or past the highest block). 428 * 429 * @throws NullPointerException if the index points to a block that could 430 * have existed (based on the highest index used) but has never had anything 431 * set into it. 432 * %REVIEW% Could add a catch to create the block in that case, or return 0. 433 * Try/Catch is _supposed_ to be nearly free when not thrown to. Do we 434 * believe that? Should we have a separate safeElementAt? 435 */ 436 public int elementAt(int i) 437 { 438 // This is actually a significant optimization! 439 if(i<m_blocksize) 440 return m_map0[i]; 441 442 return m_map[i>>>m_SHIFT][i&m_MASK]; 443 } 444 445 /** 446 * Tell if the table contains the given node. 447 * 448 * @param s object to look for 449 * 450 * @return true if the object is in the list 451 */ 452 private boolean contains(int s) 453 { 454 return (indexOf(s,0) >= 0); 455 } 456 457 /** 458 * Searches for the first occurence of the given argument, 459 * beginning the search at index, and testing for equality 460 * using the equals method. 461 * 462 * @param elem object to look for 463 * @param index Index of where to begin search 464 * @return the index of the first occurrence of the object 465 * argument in this vector at position index or later in the 466 * vector; returns -1 if the object is not found. 467 */ 468 public int indexOf(int elem, int index) 469 { 470 if(index>=m_firstFree) 471 return -1; 472 473 int bindex=index>>>m_SHIFT; 474 int boffset=index&m_MASK; 475 int maxindex=m_firstFree>>>m_SHIFT; 476 int[] block; 477 478 for(;bindex<maxindex;++bindex) 479 { 480 block=m_map[bindex]; 481 if(block!=null) 482 for(int offset=boffset;offset<m_blocksize;++offset) 483 if(block[offset]==elem) 484 return offset+bindex*m_blocksize; 485 boffset=0; // after first 486 } 487 // Last block may need to stop before end 488 int maxoffset=m_firstFree&m_MASK; 489 block=m_map[maxindex]; 490 for(int offset=boffset;offset<maxoffset;++offset) 491 if(block[offset]==elem) 492 return offset+maxindex*m_blocksize; 493 494 return -1; 495 } 496 497 /** 498 * Searches for the first occurence of the given argument, 499 * beginning the search at index, and testing for equality 500 * using the equals method. 501 * 502 * @param elem object to look for 503 * @return the index of the first occurrence of the object 504 * argument in this vector at position index or later in the 505 * vector; returns -1 if the object is not found. 506 */ 507 public int indexOf(int elem) 508 { 509 return indexOf(elem,0); 510 } 511 512 /** 513 * Searches for the first occurence of the given argument, 514 * beginning the search at index, and testing for equality 515 * using the equals method. 516 * 517 * @param elem Object to look for 518 * @return the index of the first occurrence of the object 519 * argument in this vector at position index or later in the 520 * vector; returns -1 if the object is not found. 521 */ 522 private int lastIndexOf(int elem) 523 { 524 int boffset=m_firstFree&m_MASK; 525 for(int index=m_firstFree>>>m_SHIFT; 526 index>=0; 527 --index) 528 { 529 int[] block=m_map[index]; 530 if(block!=null) 531 for(int offset=boffset; offset>=0; --offset) 532 if(block[offset]==elem) 533 return offset+index*m_blocksize; 534 boffset=0; // after first 535 } 536 return -1; 537 } 538 539 /** 540 * Return the internal m_map0 array 541 * @return the m_map0 array 542 */ 543 public final int[] getMap0() 544 { 545 return m_map0; 546 } 547 548 /** 549 * Return the m_map double array 550 * @return the internal map of array of arrays 551 */ 552 public final int[][] getMap() 553 { 554 return m_map; 555 } 556 557 }