001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: ExsltStrings.java 1225758 2011-12-30 05:44:27Z mrglavas $ 020 */ 021 package org.apache.xalan.lib; 022 023 import java.util.StringTokenizer; 024 025 import javax.xml.parsers.DocumentBuilderFactory; 026 import javax.xml.parsers.ParserConfigurationException; 027 028 import org.apache.xpath.NodeSet; 029 import org.w3c.dom.Document; 030 import org.w3c.dom.Element; 031 import org.w3c.dom.Node; 032 import org.w3c.dom.NodeList; 033 import org.w3c.dom.Text; 034 035 /** 036 * This class contains EXSLT strings extension functions. 037 * 038 * It is accessed by specifying a namespace URI as follows: 039 * <pre> 040 * xmlns:str="http://exslt.org/strings" 041 * </pre> 042 * The documentation for each function has been copied from the relevant 043 * EXSLT Implementer page. 044 * 045 * @see <a href="http://www.exslt.org/">EXSLT</a> 046 047 * @xsl.usage general 048 */ 049 public class ExsltStrings extends ExsltBase 050 { 051 /** 052 * The str:align function aligns a string within another string. 053 * <p> 054 * The first argument gives the target string to be aligned. The second argument gives 055 * the padding string within which it is to be aligned. 056 * <p> 057 * If the target string is shorter than the padding string then a range of characters 058 * in the padding string are repaced with those in the target string. Which characters 059 * are replaced depends on the value of the third argument, which gives the type of 060 * alignment. It can be one of 'left', 'right' or 'center'. If no third argument is 061 * given or if it is not one of these values, then it defaults to left alignment. 062 * <p> 063 * With left alignment, the range of characters replaced by the target string begins 064 * with the first character in the padding string. With right alignment, the range of 065 * characters replaced by the target string ends with the last character in the padding 066 * string. With center alignment, the range of characters replaced by the target string 067 * is in the middle of the padding string, such that either the number of unreplaced 068 * characters on either side of the range is the same or there is one less on the left 069 * than there is on the right. 070 * <p> 071 * If the target string is longer than the padding string, then it is truncated to be 072 * the same length as the padding string and returned. 073 * 074 * @param targetStr The target string 075 * @param paddingStr The padding string 076 * @param type The type of alignment 077 * 078 * @return The string after alignment 079 */ 080 public static String align(String targetStr, String paddingStr, String type) 081 { 082 if (targetStr.length() >= paddingStr.length()) 083 return targetStr.substring(0, paddingStr.length()); 084 085 if (type.equals("right")) 086 { 087 return paddingStr.substring(0, paddingStr.length() - targetStr.length()) + targetStr; 088 } 089 else if (type.equals("center")) 090 { 091 int startIndex = (paddingStr.length() - targetStr.length()) / 2; 092 return paddingStr.substring(0, startIndex) + targetStr + paddingStr.substring(startIndex + targetStr.length()); 093 } 094 // Default is left 095 else 096 { 097 return targetStr + paddingStr.substring(targetStr.length()); 098 } 099 } 100 101 /** 102 * See above 103 */ 104 public static String align(String targetStr, String paddingStr) 105 { 106 return align(targetStr, paddingStr, "left"); 107 } 108 109 /** 110 * The str:concat function takes a node set and returns the concatenation of the 111 * string values of the nodes in that node set. If the node set is empty, it returns 112 * an empty string. 113 * 114 * @param nl A node set 115 * @return The concatenation of the string values of the nodes in that node set 116 */ 117 public static String concat(NodeList nl) 118 { 119 StringBuffer sb = new StringBuffer(); 120 for (int i = 0; i < nl.getLength(); i++) 121 { 122 Node node = nl.item(i); 123 String value = toString(node); 124 125 if (value != null && value.length() > 0) 126 sb.append(value); 127 } 128 129 return sb.toString(); 130 } 131 132 /** 133 * The str:padding function creates a padding string of a certain length. 134 * The first argument gives the length of the padding string to be created. 135 * The second argument gives a string to be used to create the padding. This 136 * string is repeated as many times as is necessary to create a string of the 137 * length specified by the first argument; if the string is more than a character 138 * long, it may have to be truncated to produce the required length. If no second 139 * argument is specified, it defaults to a space (' '). If the second argument is 140 * an empty string, str:padding returns an empty string. 141 * 142 * @param length The length of the padding string to be created 143 * @param pattern The string to be used as pattern 144 * 145 * @return A padding string of the given length 146 */ 147 public static String padding(double length, String pattern) 148 { 149 if (pattern == null || pattern.length() == 0) 150 return ""; 151 152 StringBuffer sb = new StringBuffer(); 153 int len = (int)length; 154 int numAdded = 0; 155 int index = 0; 156 while (numAdded < len) 157 { 158 if (index == pattern.length()) 159 index = 0; 160 161 sb.append(pattern.charAt(index)); 162 index++; 163 numAdded++; 164 } 165 166 return sb.toString(); 167 } 168 169 /** 170 * See above 171 */ 172 public static String padding(double length) 173 { 174 return padding(length, " "); 175 } 176 177 /** 178 * The str:split function splits up a string and returns a node set of token 179 * elements, each containing one token from the string. 180 * <p> 181 * The first argument is the string to be split. The second argument is a pattern 182 * string. The string given by the first argument is split at any occurrence of 183 * this pattern. For example: 184 * <pre> 185 * str:split('a, simple, list', ', ') gives the node set consisting of: 186 * 187 * <token>a</token> 188 * <token>simple</token> 189 * <token>list</token> 190 * </pre> 191 * If the second argument is omitted, the default is the string ' ' (i.e. a space). 192 * 193 * @param str The string to be split 194 * @param pattern The pattern 195 * 196 * @return A node set of split tokens 197 */ 198 public static NodeList split(String str, String pattern) 199 { 200 201 202 NodeSet resultSet = new NodeSet(); 203 resultSet.setShouldCacheNodes(true); 204 205 boolean done = false; 206 int fromIndex = 0; 207 int matchIndex = 0; 208 String token = null; 209 210 while (!done && fromIndex < str.length()) 211 { 212 matchIndex = str.indexOf(pattern, fromIndex); 213 if (matchIndex >= 0) 214 { 215 token = str.substring(fromIndex, matchIndex); 216 fromIndex = matchIndex + pattern.length(); 217 } 218 else 219 { 220 done = true; 221 token = str.substring(fromIndex); 222 } 223 224 Document doc = DocumentHolder.m_doc; 225 synchronized (doc) 226 { 227 Element element = doc.createElement("token"); 228 Text text = doc.createTextNode(token); 229 element.appendChild(text); 230 resultSet.addNode(element); 231 } 232 } 233 234 return resultSet; 235 } 236 237 /** 238 * See above 239 */ 240 public static NodeList split(String str) 241 { 242 return split(str, " "); 243 } 244 245 /** 246 * The str:tokenize function splits up a string and returns a node set of token 247 * elements, each containing one token from the string. 248 * <p> 249 * The first argument is the string to be tokenized. The second argument is a 250 * string consisting of a number of characters. Each character in this string is 251 * taken as a delimiting character. The string given by the first argument is split 252 * at any occurrence of any of these characters. For example: 253 * <pre> 254 * str:tokenize('2001-06-03T11:40:23', '-T:') gives the node set consisting of: 255 * 256 * <token>2001</token> 257 * <token>06</token> 258 * <token>03</token> 259 * <token>11</token> 260 * <token>40</token> 261 * <token>23</token> 262 * </pre> 263 * If the second argument is omitted, the default is the string '	

 ' 264 * (i.e. whitespace characters). 265 * <p> 266 * If the second argument is an empty string, the function returns a set of token 267 * elements, each of which holds a single character. 268 * <p> 269 * Note: This one is different from the tokenize extension function in the Xalan 270 * namespace. The one in Xalan returns a set of Text nodes, while this one wraps 271 * the Text nodes inside the token Element nodes. 272 * 273 * @param toTokenize The string to be tokenized 274 * @param delims The delimiter string 275 * 276 * @return A node set of split token elements 277 */ 278 public static NodeList tokenize(String toTokenize, String delims) 279 { 280 281 282 NodeSet resultSet = new NodeSet(); 283 284 if (delims != null && delims.length() > 0) 285 { 286 StringTokenizer lTokenizer = new StringTokenizer(toTokenize, delims); 287 288 Document doc = DocumentHolder.m_doc; 289 synchronized (doc) 290 { 291 while (lTokenizer.hasMoreTokens()) 292 { 293 Element element = doc.createElement("token"); 294 element.appendChild(doc.createTextNode(lTokenizer.nextToken())); 295 resultSet.addNode(element); 296 } 297 } 298 } 299 // If the delimiter is an empty string, create one token Element for 300 // every single character. 301 else 302 { 303 304 Document doc = DocumentHolder.m_doc; 305 synchronized (doc) 306 { 307 for (int i = 0; i < toTokenize.length(); i++) 308 { 309 Element element = doc.createElement("token"); 310 element.appendChild(doc.createTextNode(toTokenize.substring(i, i+1))); 311 resultSet.addNode(element); 312 } 313 } 314 } 315 316 return resultSet; 317 } 318 319 /** 320 * See above 321 */ 322 public static NodeList tokenize(String toTokenize) 323 { 324 return tokenize(toTokenize, " \t\n\r"); 325 } 326 /** 327 * This class is not loaded until first referenced (see Java Language 328 * Specification by Gosling/Joy/Steele, section 12.4.1) 329 * 330 * The static members are created when this class is first referenced, as a 331 * lazy initialization not needing checking against null or any 332 * synchronization. 333 * 334 */ 335 private static class DocumentHolder 336 { 337 // Reuse the Document object to reduce memory usage. 338 private static final Document m_doc; 339 static { 340 try 341 { 342 m_doc =DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 343 } 344 345 catch(ParserConfigurationException pce) 346 { 347 throw new org.apache.xml.utils.WrappedRuntimeException(pce); 348 } 349 350 } 351 } 352 353 }