001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 /* 019 * $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $ 020 */ 021 022 package org.apache.xml.utils; 023 024 /** 025 * This class defines the basic XML character properties. The data 026 * in this class can be used to verify that a character is a valid 027 * XML character or if the character is a space, name start, or name 028 * character. 029 * <p> 030 * A series of convenience methods are supplied to ease the burden 031 * of the developer. Because inlining the checks can improve per 032 * character performance, the tables of character properties are 033 * public. Using the character as an index into the <code>CHARS</code> 034 * array and applying the appropriate mask flag (e.g. 035 * <code>MASK_VALID</code>), yields the same results as calling the 036 * convenience methods. There is one exception: check the comments 037 * for the <code>isValid</code> method for details. 038 * 039 * @author Glenn Marcy, IBM 040 * @author Andy Clark, IBM 041 * @author Eric Ye, IBM 042 * @author Arnaud Le Hors, IBM 043 * @author Rahul Srivastava, Sun Microsystems Inc. 044 * 045 * @version $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $ 046 */ 047 public class XMLChar { 048 049 // 050 // Constants 051 // 052 053 /** Character flags. */ 054 private static final byte[] CHARS = new byte[1 << 16]; 055 056 /** Valid character mask. */ 057 public static final int MASK_VALID = 0x01; 058 059 /** Space character mask. */ 060 public static final int MASK_SPACE = 0x02; 061 062 /** Name start character mask. */ 063 public static final int MASK_NAME_START = 0x04; 064 065 /** Name character mask. */ 066 public static final int MASK_NAME = 0x08; 067 068 /** Pubid character mask. */ 069 public static final int MASK_PUBID = 0x10; 070 071 /** 072 * Content character mask. Special characters are those that can 073 * be considered the start of markup, such as '<' and '&'. 074 * The various newline characters are considered special as well. 075 * All other valid XML characters can be considered content. 076 * <p> 077 * This is an optimization for the inner loop of character scanning. 078 */ 079 public static final int MASK_CONTENT = 0x20; 080 081 /** NCName start character mask. */ 082 public static final int MASK_NCNAME_START = 0x40; 083 084 /** NCName character mask. */ 085 public static final int MASK_NCNAME = 0x80; 086 087 // 088 // Static initialization 089 // 090 091 static { 092 093 // 094 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | 095 // [#xE000-#xFFFD] | [#x10000-#x10FFFF] 096 // 097 098 int charRange[] = { 099 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 100 }; 101 102 // 103 // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 104 // 105 106 int spaceChar[] = { 107 0x0020, 0x0009, 0x000D, 0x000A, 108 }; 109 110 // 111 // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 112 // CombiningChar | Extender 113 // 114 115 int nameChar[] = { 116 0x002D, 0x002E, // '-' and '.' 117 }; 118 119 // 120 // [5] Name ::= (Letter | '_' | ':') (NameChar)* 121 // 122 123 int nameStartChar[] = { 124 0x003A, 0x005F, // ':' and '_' 125 }; 126 127 // 128 // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 129 // 130 131 int pubidChar[] = { 132 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 133 0x005F 134 }; 135 136 int pubidRange[] = { 137 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 138 }; 139 140 // 141 // [84] Letter ::= BaseChar | Ideographic 142 // 143 144 int letterRange[] = { 145 // BaseChar 146 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 147 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 148 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 149 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 150 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 151 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 152 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 153 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 154 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 155 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 156 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 157 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 158 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 159 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 160 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 161 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 162 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 163 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 164 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 165 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 166 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 167 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 168 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 169 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 170 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 171 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 172 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 173 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 174 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 175 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 176 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 177 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 178 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 179 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 180 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 181 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 182 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 183 0xAC00, 0xD7A3, 184 // Ideographic 185 0x3021, 0x3029, 0x4E00, 0x9FA5, 186 }; 187 int letterChar[] = { 188 // BaseChar 189 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 190 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 191 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 192 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 193 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 194 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 195 0x1F5D, 0x1FBE, 0x2126, 0x212E, 196 // Ideographic 197 0x3007, 198 }; 199 200 // 201 // [87] CombiningChar ::= ... 202 // 203 204 int combiningCharRange[] = { 205 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 206 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 207 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 208 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 209 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 210 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 211 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 212 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 213 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 214 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 215 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 216 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 217 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 218 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 219 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 220 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 221 0x20D0, 0x20DC, 0x302A, 0x302F, 222 }; 223 224 int combiningCharChar[] = { 225 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 226 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 227 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 228 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 229 }; 230 231 // 232 // [88] Digit ::= ... 233 // 234 235 int digitRange[] = { 236 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 237 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 238 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 239 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 240 }; 241 242 // 243 // [89] Extender ::= ... 244 // 245 246 int extenderRange[] = { 247 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 248 }; 249 250 int extenderChar[] = { 251 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 252 }; 253 254 // 255 // SpecialChar ::= '<', '&', '\n', '\r', ']' 256 // 257 258 int specialChar[] = { 259 '<', '&', '\n', '\r', ']', 260 }; 261 262 // 263 // Initialize 264 // 265 266 // set valid characters 267 for (int i = 0; i < charRange.length; i += 2) { 268 for (int j = charRange[i]; j <= charRange[i + 1]; j++) { 269 CHARS[j] |= MASK_VALID | MASK_CONTENT; 270 } 271 } 272 273 // remove special characters 274 for (int i = 0; i < specialChar.length; i++) { 275 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 276 } 277 278 // set space characters 279 for (int i = 0; i < spaceChar.length; i++) { 280 CHARS[spaceChar[i]] |= MASK_SPACE; 281 } 282 283 // set name start characters 284 for (int i = 0; i < nameStartChar.length; i++) { 285 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 286 MASK_NCNAME_START | MASK_NCNAME; 287 } 288 for (int i = 0; i < letterRange.length; i += 2) { 289 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) { 290 CHARS[j] |= MASK_NAME_START | MASK_NAME | 291 MASK_NCNAME_START | MASK_NCNAME; 292 } 293 } 294 for (int i = 0; i < letterChar.length; i++) { 295 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 296 MASK_NCNAME_START | MASK_NCNAME; 297 } 298 299 // set name characters 300 for (int i = 0; i < nameChar.length; i++) { 301 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 302 } 303 for (int i = 0; i < digitRange.length; i += 2) { 304 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) { 305 CHARS[j] |= MASK_NAME | MASK_NCNAME; 306 } 307 } 308 for (int i = 0; i < combiningCharRange.length; i += 2) { 309 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) { 310 CHARS[j] |= MASK_NAME | MASK_NCNAME; 311 } 312 } 313 for (int i = 0; i < combiningCharChar.length; i++) { 314 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 315 } 316 for (int i = 0; i < extenderRange.length; i += 2) { 317 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) { 318 CHARS[j] |= MASK_NAME | MASK_NCNAME; 319 } 320 } 321 for (int i = 0; i < extenderChar.length; i++) { 322 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 323 } 324 325 // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars 326 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 327 328 // set Pubid characters 329 for (int i = 0; i < pubidChar.length; i++) { 330 CHARS[pubidChar[i]] |= MASK_PUBID; 331 } 332 for (int i = 0; i < pubidRange.length; i += 2) { 333 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) { 334 CHARS[j] |= MASK_PUBID; 335 } 336 } 337 338 } // <clinit>() 339 340 // 341 // Public static methods 342 // 343 344 /** 345 * Returns true if the specified character is a supplemental character. 346 * 347 * @param c The character to check. 348 */ 349 public static boolean isSupplemental(int c) { 350 return (c >= 0x10000 && c <= 0x10FFFF); 351 } 352 353 /** 354 * Returns true the supplemental character corresponding to the given 355 * surrogates. 356 * 357 * @param h The high surrogate. 358 * @param l The low surrogate. 359 */ 360 public static int supplemental(char h, char l) { 361 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 362 } 363 364 /** 365 * Returns the high surrogate of a supplemental character 366 * 367 * @param c The supplemental character to "split". 368 */ 369 public static char highSurrogate(int c) { 370 return (char) (((c - 0x00010000) >> 10) + 0xD800); 371 } 372 373 /** 374 * Returns the low surrogate of a supplemental character 375 * 376 * @param c The supplemental character to "split". 377 */ 378 public static char lowSurrogate(int c) { 379 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00); 380 } 381 382 /** 383 * Returns whether the given character is a high surrogate 384 * 385 * @param c The character to check. 386 */ 387 public static boolean isHighSurrogate(int c) { 388 return (0xD800 <= c && c <= 0xDBFF); 389 } 390 391 /** 392 * Returns whether the given character is a low surrogate 393 * 394 * @param c The character to check. 395 */ 396 public static boolean isLowSurrogate(int c) { 397 return (0xDC00 <= c && c <= 0xDFFF); 398 } 399 400 401 /** 402 * Returns true if the specified character is valid. This method 403 * also checks the surrogate character range from 0x10000 to 0x10FFFF. 404 * <p> 405 * If the program chooses to apply the mask directly to the 406 * <code>CHARS</code> array, then they are responsible for checking 407 * the surrogate character range. 408 * 409 * @param c The character to check. 410 */ 411 public static boolean isValid(int c) { 412 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 413 (0x10000 <= c && c <= 0x10FFFF); 414 } // isValid(int):boolean 415 416 /** 417 * Returns true if the specified character is invalid. 418 * 419 * @param c The character to check. 420 */ 421 public static boolean isInvalid(int c) { 422 return !isValid(c); 423 } // isInvalid(int):boolean 424 425 /** 426 * Returns true if the specified character can be considered content. 427 * 428 * @param c The character to check. 429 */ 430 public static boolean isContent(int c) { 431 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 432 (0x10000 <= c && c <= 0x10FFFF); 433 } // isContent(int):boolean 434 435 /** 436 * Returns true if the specified character can be considered markup. 437 * Markup characters include '<', '&', and '%'. 438 * 439 * @param c The character to check. 440 */ 441 public static boolean isMarkup(int c) { 442 return c == '<' || c == '&' || c == '%'; 443 } // isMarkup(int):boolean 444 445 /** 446 * Returns true if the specified character is a space character 447 * as defined by production [3] in the XML 1.0 specification. 448 * 449 * @param c The character to check. 450 */ 451 public static boolean isSpace(int c) { 452 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 453 } // isSpace(int):boolean 454 455 /** 456 * Returns true if the specified character is a valid name start 457 * character as defined by production [5] in the XML 1.0 458 * specification. 459 * 460 * @param c The character to check. 461 */ 462 public static boolean isNameStart(int c) { 463 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 464 } // isNameStart(int):boolean 465 466 /** 467 * Returns true if the specified character is a valid name 468 * character as defined by production [4] in the XML 1.0 469 * specification. 470 * 471 * @param c The character to check. 472 */ 473 public static boolean isName(int c) { 474 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 475 } // isName(int):boolean 476 477 /** 478 * Returns true if the specified character is a valid NCName start 479 * character as defined by production [4] in Namespaces in XML 480 * recommendation. 481 * 482 * @param c The character to check. 483 */ 484 public static boolean isNCNameStart(int c) { 485 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 486 } // isNCNameStart(int):boolean 487 488 /** 489 * Returns true if the specified character is a valid NCName 490 * character as defined by production [5] in Namespaces in XML 491 * recommendation. 492 * 493 * @param c The character to check. 494 */ 495 public static boolean isNCName(int c) { 496 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 497 } // isNCName(int):boolean 498 499 /** 500 * Returns true if the specified character is a valid Pubid 501 * character as defined by production [13] in the XML 1.0 502 * specification. 503 * 504 * @param c The character to check. 505 */ 506 public static boolean isPubid(int c) { 507 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 508 } // isPubid(int):boolean 509 510 /* 511 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 512 */ 513 /** 514 * Check to see if a string is a valid Name according to [5] 515 * in the XML 1.0 Recommendation 516 * 517 * @param name string to check 518 * @return true if name is a valid Name 519 */ 520 public static boolean isValidName(String name) { 521 if (name.length() == 0) 522 return false; 523 char ch = name.charAt(0); 524 if( isNameStart(ch) == false) 525 return false; 526 for (int i = 1; i < name.length(); i++ ) { 527 ch = name.charAt(i); 528 if( isName( ch ) == false ){ 529 return false; 530 } 531 } 532 return true; 533 } // isValidName(String):boolean 534 535 536 /* 537 * from the namespace rec 538 * [4] NCName ::= (Letter | '_') (NCNameChar)* 539 */ 540 /** 541 * Check to see if a string is a valid NCName according to [4] 542 * from the XML Namespaces 1.0 Recommendation 543 * 544 * @param ncName string to check 545 * @return true if name is a valid NCName 546 */ 547 public static boolean isValidNCName(String ncName) { 548 if (ncName.length() == 0) 549 return false; 550 char ch = ncName.charAt(0); 551 if( isNCNameStart(ch) == false) 552 return false; 553 for (int i = 1; i < ncName.length(); i++ ) { 554 ch = ncName.charAt(i); 555 if( isNCName( ch ) == false ){ 556 return false; 557 } 558 } 559 return true; 560 } // isValidNCName(String):boolean 561 562 /* 563 * [7] Nmtoken ::= (NameChar)+ 564 */ 565 /** 566 * Check to see if a string is a valid Nmtoken according to [7] 567 * in the XML 1.0 Recommendation 568 * 569 * @param nmtoken string to check 570 * @return true if nmtoken is a valid Nmtoken 571 */ 572 public static boolean isValidNmtoken(String nmtoken) { 573 if (nmtoken.length() == 0) 574 return false; 575 for (int i = 0; i < nmtoken.length(); i++ ) { 576 char ch = nmtoken.charAt(i); 577 if( ! isName( ch ) ){ 578 return false; 579 } 580 } 581 return true; 582 } // isValidName(String):boolean 583 584 585 586 587 588 // encodings 589 590 /** 591 * Returns true if the encoding name is a valid IANA encoding. 592 * This method does not verify that there is a decoder available 593 * for this encoding, only that the characters are valid for an 594 * IANA encoding name. 595 * 596 * @param ianaEncoding The IANA encoding name. 597 */ 598 public static boolean isValidIANAEncoding(String ianaEncoding) { 599 if (ianaEncoding != null) { 600 int length = ianaEncoding.length(); 601 if (length > 0) { 602 char c = ianaEncoding.charAt(0); 603 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 604 for (int i = 1; i < length; i++) { 605 c = ianaEncoding.charAt(i); 606 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 607 (c < '0' || c > '9') && c != '.' && c != '_' && 608 c != '-') { 609 return false; 610 } 611 } 612 return true; 613 } 614 } 615 } 616 return false; 617 } // isValidIANAEncoding(String):boolean 618 619 /** 620 * Returns true if the encoding name is a valid Java encoding. 621 * This method does not verify that there is a decoder available 622 * for this encoding, only that the characters are valid for an 623 * Java encoding name. 624 * 625 * @param javaEncoding The Java encoding name. 626 */ 627 public static boolean isValidJavaEncoding(String javaEncoding) { 628 if (javaEncoding != null) { 629 int length = javaEncoding.length(); 630 if (length > 0) { 631 for (int i = 1; i < length; i++) { 632 char c = javaEncoding.charAt(i); 633 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 634 (c < '0' || c > '9') && c != '.' && c != '_' && 635 c != '-') { 636 return false; 637 } 638 } 639 return true; 640 } 641 } 642 return false; 643 } // isValidIANAEncoding(String):boolean 644 645 /** 646 * Simple check to determine if qname is legal. If it returns false 647 * then <param>str</param> is illegal; if it returns true then 648 * <param>str</param> is legal. 649 */ 650 public static boolean isValidQName(String str) { 651 652 final int colon = str.indexOf(':'); 653 654 if (colon == 0 || colon == str.length() - 1) { 655 return false; 656 } 657 658 if (colon > 0) { 659 final String prefix = str.substring(0,colon); 660 final String localPart = str.substring(colon+1); 661 return isValidNCName(prefix) && isValidNCName(localPart); 662 } 663 else { 664 return isValidNCName(str); 665 } 666 } 667 668 } // class XMLChar