001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.xml.serializer.utils; 020 021 import java.util.Arrays; 022 023 /** 024 * THIS IS A COPY OF THE XERCES-2J CLASS org.apache.xerces.utls.XMLChar 025 * 026 * This class defines the basic properties of characters in XML 1.1. The data 027 * in this class can be used to verify that a character is a valid 028 * XML 1.1 character or if the character is a space, name start, or name 029 * character. 030 * <p> 031 * A series of convenience methods are supplied to ease the burden 032 * of the developer. Using the character as an index into the <code>XML11CHARS</code> 033 * array and applying the appropriate mask flag (e.g. 034 * <code>MASK_VALID</code>), yields the same results as calling the 035 * convenience methods. There is one exception: check the comments 036 * for the <code>isValid</code> method for details. 037 * 038 * @author Glenn Marcy, IBM 039 * @author Andy Clark, IBM 040 * @author Arnaud Le Hors, IBM 041 * @author Neil Graham, IBM 042 * @author Michael Glavassevich, IBM 043 * 044 * @version $Id: XML11Char.java 1225426 2011-12-29 04:13:08Z mrglavas $ 045 */ 046 public class XML11Char { 047 048 // 049 // Constants 050 // 051 052 /** Character flags for XML 1.1. */ 053 private static final byte XML11CHARS [] = new byte [1 << 16]; 054 055 /** XML 1.1 Valid character mask. */ 056 public static final int MASK_XML11_VALID = 0x01; 057 058 /** XML 1.1 Space character mask. */ 059 public static final int MASK_XML11_SPACE = 0x02; 060 061 /** XML 1.1 Name start character mask. */ 062 public static final int MASK_XML11_NAME_START = 0x04; 063 064 /** XML 1.1 Name character mask. */ 065 public static final int MASK_XML11_NAME = 0x08; 066 067 /** XML 1.1 control character mask */ 068 public static final int MASK_XML11_CONTROL = 0x10; 069 070 /** XML 1.1 content for external entities (valid - "special" chars - control chars) */ 071 public static final int MASK_XML11_CONTENT = 0x20; 072 073 /** XML namespaces 1.1 NCNameStart */ 074 public static final int MASK_XML11_NCNAME_START = 0x40; 075 076 /** XML namespaces 1.1 NCName */ 077 public static final int MASK_XML11_NCNAME = 0x80; 078 079 /** XML 1.1 content for internal entities (valid - "special" chars) */ 080 public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT; 081 082 // 083 // Static initialization 084 // 085 086 static { 087 088 // Initializing the Character Flag Array 089 // Code generated by: XML11CharGenerator. 090 091 Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17 092 XML11CHARS[9] = 35; 093 XML11CHARS[10] = 3; 094 Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17 095 XML11CHARS[13] = 3; 096 Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17 097 XML11CHARS[32] = 35; 098 Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33 099 XML11CHARS[38] = 1; 100 Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33 101 Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87 102 XML11CHARS[47] = 33; 103 Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87 104 XML11CHARS[58] = 45; 105 XML11CHARS[59] = 33; 106 XML11CHARS[60] = 1; 107 Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33 108 Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19 109 Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33 110 XML11CHARS[93] = 1; 111 XML11CHARS[94] = 33; 112 XML11CHARS[95] = -19; 113 XML11CHARS[96] = 33; 114 Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19 115 Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33 116 Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17 117 XML11CHARS[133] = 35; 118 Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17 119 Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33 120 XML11CHARS[183] = -87; 121 Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33 122 Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19 123 XML11CHARS[215] = 33; 124 Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19 125 XML11CHARS[247] = 33; 126 Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19 127 Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87 128 Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19 129 XML11CHARS[894] = 33; 130 Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19 131 Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33 132 Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19 133 Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33 134 XML11CHARS[8232] = 35; 135 Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33 136 Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87 137 Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33 138 Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19 139 Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33 140 Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19 141 Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33 142 Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19 143 Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33 144 Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19 145 Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33 146 Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19 147 148 } // <clinit>() 149 150 // 151 // Public static methods 152 // 153 154 /** 155 * Returns true if the specified character is a space character 156 * as amdended in the XML 1.1 specification. 157 * 158 * @param c The character to check. 159 */ 160 public static boolean isXML11Space(int c) { 161 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0); 162 } // isXML11Space(int):boolean 163 164 /** 165 * Returns true if the specified character is valid. This method 166 * also checks the surrogate character range from 0x10000 to 0x10FFFF. 167 * <p> 168 * If the program chooses to apply the mask directly to the 169 * <code>XML11CHARS</code> array, then they are responsible for checking 170 * the surrogate character range. 171 * 172 * @param c The character to check. 173 */ 174 public static boolean isXML11Valid(int c) { 175 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0) 176 || (0x10000 <= c && c <= 0x10FFFF); 177 } // isXML11Valid(int):boolean 178 179 /** 180 * Returns true if the specified character is invalid. 181 * 182 * @param c The character to check. 183 */ 184 public static boolean isXML11Invalid(int c) { 185 return !isXML11Valid(c); 186 } // isXML11Invalid(int):boolean 187 188 /** 189 * Returns true if the specified character is valid and permitted outside 190 * of a character reference. 191 * That is, this method will return false for the same set as 192 * isXML11Valid, except it also reports false for "control characters". 193 * 194 * @param c The character to check. 195 */ 196 public static boolean isXML11ValidLiteral(int c) { 197 return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0)) 198 || (0x10000 <= c && c <= 0x10FFFF)); 199 } // isXML11ValidLiteral(int):boolean 200 201 /** 202 * Returns true if the specified character can be considered 203 * content in an external parsed entity. 204 * 205 * @param c The character to check. 206 */ 207 public static boolean isXML11Content(int c) { 208 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) || 209 (0x10000 <= c && c <= 0x10FFFF); 210 } // isXML11Content(int):boolean 211 212 /** 213 * Returns true if the specified character can be considered 214 * content in an internal parsed entity. 215 * 216 * @param c The character to check. 217 */ 218 public static boolean isXML11InternalEntityContent(int c) { 219 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) || 220 (0x10000 <= c && c <= 0x10FFFF); 221 } // isXML11InternalEntityContent(int):boolean 222 223 /** 224 * Returns true if the specified character is a valid name start 225 * character as defined by production [4] in the XML 1.1 226 * specification. 227 * 228 * @param c The character to check. 229 */ 230 public static boolean isXML11NameStart(int c) { 231 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0) 232 || (0x10000 <= c && c < 0xF0000); 233 } // isXML11NameStart(int):boolean 234 235 /** 236 * Returns true if the specified character is a valid name 237 * character as defined by production [4a] in the XML 1.1 238 * specification. 239 * 240 * @param c The character to check. 241 */ 242 public static boolean isXML11Name(int c) { 243 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0) 244 || (c >= 0x10000 && c < 0xF0000); 245 } // isXML11Name(int):boolean 246 247 /** 248 * Returns true if the specified character is a valid NCName start 249 * character as defined by production [4] in Namespaces in XML 250 * 1.1 recommendation. 251 * 252 * @param c The character to check. 253 */ 254 public static boolean isXML11NCNameStart(int c) { 255 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0) 256 || (0x10000 <= c && c < 0xF0000); 257 } // isXML11NCNameStart(int):boolean 258 259 /** 260 * Returns true if the specified character is a valid NCName 261 * character as defined by production [5] in Namespaces in XML 262 * 1.1 recommendation. 263 * 264 * @param c The character to check. 265 */ 266 public static boolean isXML11NCName(int c) { 267 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0) 268 || (0x10000 <= c && c < 0xF0000); 269 } // isXML11NCName(int):boolean 270 271 /** 272 * Returns whether the given character is a valid 273 * high surrogate for a name character. This includes 274 * all high surrogates for characters [0x10000-0xEFFFF]. 275 * In other words everything excluding planes 15 and 16. 276 * 277 * @param c The character to check. 278 */ 279 public static boolean isXML11NameHighSurrogate(int c) { 280 return (0xD800 <= c && c <= 0xDB7F); 281 } 282 283 /* 284 * [5] Name ::= NameStartChar NameChar* 285 */ 286 /** 287 * Check to see if a string is a valid Name according to [5] 288 * in the XML 1.1 Recommendation 289 * 290 * @param name string to check 291 * @return true if name is a valid Name 292 */ 293 public static boolean isXML11ValidName(String name) { 294 int length = name.length(); 295 if (length == 0) 296 return false; 297 int i = 1; 298 char ch = name.charAt(0); 299 if( !isXML11NameStart(ch) ) { 300 if ( length > 1 && isXML11NameHighSurrogate(ch) ) { 301 char ch2 = name.charAt(1); 302 if ( !XMLChar.isLowSurrogate(ch2) || 303 !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) { 304 return false; 305 } 306 i = 2; 307 } 308 else { 309 return false; 310 } 311 } 312 while (i < length) { 313 ch = name.charAt(i); 314 if ( !isXML11Name(ch) ) { 315 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 316 char ch2 = name.charAt(i); 317 if ( !XMLChar.isLowSurrogate(ch2) || 318 !isXML11Name(XMLChar.supplemental(ch, ch2)) ) { 319 return false; 320 } 321 } 322 else { 323 return false; 324 } 325 } 326 ++i; 327 } 328 return true; 329 } // isXML11ValidName(String):boolean 330 331 332 /* 333 * from the namespace 1.1 rec 334 * [4] NCName ::= NCNameStartChar NCNameChar* 335 */ 336 /** 337 * Check to see if a string is a valid NCName according to [4] 338 * from the XML Namespaces 1.1 Recommendation 339 * 340 * @param ncName string to check 341 * @return true if name is a valid NCName 342 */ 343 public static boolean isXML11ValidNCName(String ncName) { 344 int length = ncName.length(); 345 if (length == 0) 346 return false; 347 int i = 1; 348 char ch = ncName.charAt(0); 349 if( !isXML11NCNameStart(ch) ) { 350 if ( length > 1 && isXML11NameHighSurrogate(ch) ) { 351 char ch2 = ncName.charAt(1); 352 if ( !XMLChar.isLowSurrogate(ch2) || 353 !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) { 354 return false; 355 } 356 i = 2; 357 } 358 else { 359 return false; 360 } 361 } 362 while (i < length) { 363 ch = ncName.charAt(i); 364 if ( !isXML11NCName(ch) ) { 365 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 366 char ch2 = ncName.charAt(i); 367 if ( !XMLChar.isLowSurrogate(ch2) || 368 !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) { 369 return false; 370 } 371 } 372 else { 373 return false; 374 } 375 } 376 ++i; 377 } 378 return true; 379 } // isXML11ValidNCName(String):boolean 380 381 /* 382 * [7] Nmtoken ::= (NameChar)+ 383 */ 384 /** 385 * Check to see if a string is a valid Nmtoken according to [7] 386 * in the XML 1.1 Recommendation 387 * 388 * @param nmtoken string to check 389 * @return true if nmtoken is a valid Nmtoken 390 */ 391 public static boolean isXML11ValidNmtoken(String nmtoken) { 392 int length = nmtoken.length(); 393 if (length == 0) 394 return false; 395 for (int i = 0; i < length; ++i ) { 396 char ch = nmtoken.charAt(i); 397 if( !isXML11Name(ch) ) { 398 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 399 char ch2 = nmtoken.charAt(i); 400 if ( !XMLChar.isLowSurrogate(ch2) || 401 !isXML11Name(XMLChar.supplemental(ch, ch2)) ) { 402 return false; 403 } 404 } 405 else { 406 return false; 407 } 408 } 409 } 410 return true; 411 } // isXML11ValidName(String):boolean 412 413 } // class XML11Char 414