001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $
020     */
021    
022    package org.apache.xml.utils;
023    
024    /**
025     * This class defines the basic XML character properties. The data
026     * in this class can be used to verify that a character is a valid
027     * XML character or if the character is a space, name start, or name
028     * character.
029     * <p>
030     * A series of convenience methods are supplied to ease the burden
031     * of the developer. Because inlining the checks can improve per
032     * character performance, the tables of character properties are
033     * public. Using the character as an index into the <code>CHARS</code>
034     * array and applying the appropriate mask flag (e.g.
035     * <code>MASK_VALID</code>), yields the same results as calling the
036     * convenience methods. There is one exception: check the comments
037     * for the <code>isValid</code> method for details.
038     *
039     * @author Glenn Marcy, IBM
040     * @author Andy Clark, IBM
041     * @author Eric Ye, IBM
042     * @author Arnaud  Le Hors, IBM
043     * @author Rahul Srivastava, Sun Microsystems Inc.
044     *
045     * @version $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $
046     */
047    public class XMLChar {
048    
049        //
050        // Constants
051        //
052    
053        /** Character flags. */
054        private static final byte[] CHARS = new byte[1 << 16];
055    
056        /** Valid character mask. */
057        public static final int MASK_VALID = 0x01;
058    
059        /** Space character mask. */
060        public static final int MASK_SPACE = 0x02;
061    
062        /** Name start character mask. */
063        public static final int MASK_NAME_START = 0x04;
064    
065        /** Name character mask. */
066        public static final int MASK_NAME = 0x08;
067    
068        /** Pubid character mask. */
069        public static final int MASK_PUBID = 0x10;
070        
071        /** 
072         * Content character mask. Special characters are those that can
073         * be considered the start of markup, such as '&lt;' and '&amp;'. 
074         * The various newline characters are considered special as well.
075         * All other valid XML characters can be considered content.
076         * <p>
077         * This is an optimization for the inner loop of character scanning.
078         */
079        public static final int MASK_CONTENT = 0x20;
080    
081        /** NCName start character mask. */
082        public static final int MASK_NCNAME_START = 0x40;
083    
084        /** NCName character mask. */
085        public static final int MASK_NCNAME = 0x80;
086    
087        //
088        // Static initialization
089        //
090    
091        static {
092            
093            //
094            // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
095            //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
096            //
097    
098            int charRange[] = { 
099                0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
100            };
101    
102            //
103            // [3] S ::= (#x20 | #x9 | #xD | #xA)+
104            //
105    
106            int spaceChar[] = { 
107                0x0020, 0x0009, 0x000D, 0x000A,
108            };
109    
110            //
111            // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
112            //                  CombiningChar | Extender
113            //
114    
115            int nameChar[] = { 
116                0x002D, 0x002E, // '-' and '.'
117            };
118    
119            //
120            // [5] Name ::= (Letter | '_' | ':') (NameChar)*
121            //
122    
123            int nameStartChar[] = { 
124                0x003A, 0x005F, // ':' and '_'
125            };
126    
127            //
128            // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
129            //
130    
131            int pubidChar[] = {
132                0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
133                0x005F
134            };
135    
136            int pubidRange[] = {
137                0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
138            };
139    
140            //
141            // [84] Letter ::= BaseChar | Ideographic
142            //
143    
144            int letterRange[] = {
145                // BaseChar
146                0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
147                0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
148                0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
149                0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
150                0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
151                0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
152                0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
153                0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
154                0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
155                0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
156                0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
157                0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
158                0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
159                0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
160                0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
161                0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
162                0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
163                0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
164                0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
165                0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
166                0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
167                0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
168                0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
169                0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
170                0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
171                0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
172                0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
173                0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
174                0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
175                0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
176                0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
177                0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
178                0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
179                0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
180                0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
181                0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
182                0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
183                0xAC00, 0xD7A3,
184                // Ideographic
185                0x3021, 0x3029, 0x4E00, 0x9FA5,
186            };
187            int letterChar[] = {
188                // BaseChar
189                0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
190                0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
191                0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
192                0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
193                0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
194                0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
195                0x1F5D, 0x1FBE, 0x2126, 0x212E,
196                // Ideographic
197                0x3007,
198            };
199    
200            //
201            // [87] CombiningChar ::= ...
202            //
203    
204            int combiningCharRange[] = {
205                0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
206                0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
207                0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
208                0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
209                0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
210                0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
211                0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
212                0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
213                0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
214                0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
215                0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
216                0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
217                0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
218                0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
219                0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
220                0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
221                0x20D0, 0x20DC, 0x302A, 0x302F,
222            };
223    
224            int combiningCharChar[] = {
225                0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
226                0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
227                0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
228                0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
229            };
230    
231            //
232            // [88] Digit ::= ...
233            //
234    
235            int digitRange[] = {
236                0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
237                0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
238                0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
239                0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
240            };
241    
242            //
243            // [89] Extender ::= ...
244            //
245    
246            int extenderRange[] = {
247                0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
248            };
249    
250            int extenderChar[] = {
251                0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
252            };
253    
254            //
255            // SpecialChar ::= '<', '&', '\n', '\r', ']'
256            //
257    
258            int specialChar[] = {
259                '<', '&', '\n', '\r', ']',
260            };
261    
262            //
263            // Initialize
264            //
265    
266            // set valid characters
267            for (int i = 0; i < charRange.length; i += 2) {
268                for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
269                    CHARS[j] |= MASK_VALID | MASK_CONTENT;
270                }
271            }
272    
273            // remove special characters
274            for (int i = 0; i < specialChar.length; i++) {
275                CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
276            }
277    
278            // set space characters
279            for (int i = 0; i < spaceChar.length; i++) {
280                CHARS[spaceChar[i]] |= MASK_SPACE;
281            }
282    
283            // set name start characters
284            for (int i = 0; i < nameStartChar.length; i++) {
285                CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 
286                                           MASK_NCNAME_START | MASK_NCNAME;
287            }
288            for (int i = 0; i < letterRange.length; i += 2) {
289                for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
290                    CHARS[j] |= MASK_NAME_START | MASK_NAME |
291                                MASK_NCNAME_START | MASK_NCNAME;
292                }
293            }
294            for (int i = 0; i < letterChar.length; i++) {
295                CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
296                                        MASK_NCNAME_START | MASK_NCNAME;
297            }
298    
299            // set name characters
300            for (int i = 0; i < nameChar.length; i++) {
301                CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
302            }
303            for (int i = 0; i < digitRange.length; i += 2) {
304                for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
305                    CHARS[j] |= MASK_NAME | MASK_NCNAME;
306                }
307            }
308            for (int i = 0; i < combiningCharRange.length; i += 2) {
309                for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
310                    CHARS[j] |= MASK_NAME | MASK_NCNAME;
311                }
312            }
313            for (int i = 0; i < combiningCharChar.length; i++) {
314                CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
315            }
316            for (int i = 0; i < extenderRange.length; i += 2) {
317                for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
318                    CHARS[j] |= MASK_NAME | MASK_NCNAME;
319                }
320            }
321            for (int i = 0; i < extenderChar.length; i++) {
322                CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
323            }
324    
325            // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
326            CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
327    
328            // set Pubid characters
329            for (int i = 0; i < pubidChar.length; i++) {
330                CHARS[pubidChar[i]] |= MASK_PUBID;
331            }
332            for (int i = 0; i < pubidRange.length; i += 2) {
333                for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
334                    CHARS[j] |= MASK_PUBID;
335                }
336            }
337    
338        } // <clinit>()
339    
340        //
341        // Public static methods
342        //
343    
344        /**
345         * Returns true if the specified character is a supplemental character.
346         *
347         * @param c The character to check.
348         */
349        public static boolean isSupplemental(int c) {
350            return (c >= 0x10000 && c <= 0x10FFFF);
351        }
352    
353        /**
354         * Returns true the supplemental character corresponding to the given
355         * surrogates.
356         *
357         * @param h The high surrogate.
358         * @param l The low surrogate.
359         */
360        public static int supplemental(char h, char l) {
361            return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
362        }
363    
364        /**
365         * Returns the high surrogate of a supplemental character
366         *
367         * @param c The supplemental character to "split".
368         */
369        public static char highSurrogate(int c) {
370            return (char) (((c - 0x00010000) >> 10) + 0xD800);
371        }
372    
373        /**
374         * Returns the low surrogate of a supplemental character
375         *
376         * @param c The supplemental character to "split".
377         */
378        public static char lowSurrogate(int c) {
379            return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
380        }
381    
382        /**
383         * Returns whether the given character is a high surrogate
384         *
385         * @param c The character to check.
386         */
387        public static boolean isHighSurrogate(int c) {
388            return (0xD800 <= c && c <= 0xDBFF);
389        }
390    
391        /**
392         * Returns whether the given character is a low surrogate
393         *
394         * @param c The character to check.
395         */
396        public static boolean isLowSurrogate(int c) {
397            return (0xDC00 <= c && c <= 0xDFFF);
398        }
399    
400    
401        /**
402         * Returns true if the specified character is valid. This method
403         * also checks the surrogate character range from 0x10000 to 0x10FFFF.
404         * <p>
405         * If the program chooses to apply the mask directly to the
406         * <code>CHARS</code> array, then they are responsible for checking
407         * the surrogate character range.
408         *
409         * @param c The character to check.
410         */
411        public static boolean isValid(int c) {
412            return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
413                   (0x10000 <= c && c <= 0x10FFFF);
414        } // isValid(int):boolean
415    
416        /**
417         * Returns true if the specified character is invalid.
418         *
419         * @param c The character to check.
420         */
421        public static boolean isInvalid(int c) {
422            return !isValid(c);
423        } // isInvalid(int):boolean
424    
425        /**
426         * Returns true if the specified character can be considered content.
427         *
428         * @param c The character to check.
429         */
430        public static boolean isContent(int c) {
431            return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
432                   (0x10000 <= c && c <= 0x10FFFF);
433        } // isContent(int):boolean
434    
435        /**
436         * Returns true if the specified character can be considered markup.
437         * Markup characters include '&lt;', '&amp;', and '%'.
438         *
439         * @param c The character to check.
440         */
441        public static boolean isMarkup(int c) {
442            return c == '<' || c == '&' || c == '%';
443        } // isMarkup(int):boolean
444    
445        /**
446         * Returns true if the specified character is a space character
447         * as defined by production [3] in the XML 1.0 specification.
448         *
449         * @param c The character to check.
450         */
451        public static boolean isSpace(int c) {
452            return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
453        } // isSpace(int):boolean
454    
455        /**
456         * Returns true if the specified character is a valid name start
457         * character as defined by production [5] in the XML 1.0
458         * specification.
459         *
460         * @param c The character to check.
461         */
462        public static boolean isNameStart(int c) {
463            return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
464        } // isNameStart(int):boolean
465    
466        /**
467         * Returns true if the specified character is a valid name
468         * character as defined by production [4] in the XML 1.0
469         * specification.
470         *
471         * @param c The character to check.
472         */
473        public static boolean isName(int c) {
474            return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
475        } // isName(int):boolean
476    
477        /**
478         * Returns true if the specified character is a valid NCName start
479         * character as defined by production [4] in Namespaces in XML
480         * recommendation.
481         *
482         * @param c The character to check.
483         */
484        public static boolean isNCNameStart(int c) {
485            return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
486        } // isNCNameStart(int):boolean
487    
488        /**
489         * Returns true if the specified character is a valid NCName
490         * character as defined by production [5] in Namespaces in XML
491         * recommendation.
492         *
493         * @param c The character to check.
494         */
495        public static boolean isNCName(int c) {
496            return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
497        } // isNCName(int):boolean
498    
499        /**
500         * Returns true if the specified character is a valid Pubid
501         * character as defined by production [13] in the XML 1.0
502         * specification.
503         *
504         * @param c The character to check.
505         */
506        public static boolean isPubid(int c) {
507            return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
508        } // isPubid(int):boolean
509    
510        /*
511         * [5] Name ::= (Letter | '_' | ':') (NameChar)*
512         */
513        /**
514         * Check to see if a string is a valid Name according to [5]
515         * in the XML 1.0 Recommendation
516         *
517         * @param name string to check
518         * @return true if name is a valid Name
519         */
520        public static boolean isValidName(String name) {
521            if (name.length() == 0)
522                return false;
523            char ch = name.charAt(0);
524            if( isNameStart(ch) == false)
525               return false;
526            for (int i = 1; i < name.length(); i++ ) {
527               ch = name.charAt(i);
528               if( isName( ch ) == false ){
529                  return false;
530               }
531            }
532            return true;
533        } // isValidName(String):boolean
534        
535    
536        /*
537         * from the namespace rec
538         * [4] NCName ::= (Letter | '_') (NCNameChar)*
539         */
540        /**
541         * Check to see if a string is a valid NCName according to [4]
542         * from the XML Namespaces 1.0 Recommendation
543         *
544         * @param ncName string to check
545         * @return true if name is a valid NCName
546         */
547        public static boolean isValidNCName(String ncName) {
548            if (ncName.length() == 0)
549                return false;
550            char ch = ncName.charAt(0);
551            if( isNCNameStart(ch) == false)
552               return false;
553            for (int i = 1; i < ncName.length(); i++ ) {
554               ch = ncName.charAt(i);
555               if( isNCName( ch ) == false ){
556                  return false;
557               }
558            }
559            return true;
560        } // isValidNCName(String):boolean
561    
562        /*
563         * [7] Nmtoken ::= (NameChar)+
564         */
565        /**
566         * Check to see if a string is a valid Nmtoken according to [7]
567         * in the XML 1.0 Recommendation
568         *
569         * @param nmtoken string to check
570         * @return true if nmtoken is a valid Nmtoken 
571         */
572        public static boolean isValidNmtoken(String nmtoken) {
573            if (nmtoken.length() == 0)
574                return false;
575            for (int i = 0; i < nmtoken.length(); i++ ) {
576               char ch = nmtoken.charAt(i);
577               if(  ! isName( ch ) ){
578                  return false;
579               }
580            }
581            return true;
582        } // isValidName(String):boolean
583    
584    
585    
586    
587    
588        // encodings
589    
590        /**
591         * Returns true if the encoding name is a valid IANA encoding.
592         * This method does not verify that there is a decoder available
593         * for this encoding, only that the characters are valid for an
594         * IANA encoding name.
595         *
596         * @param ianaEncoding The IANA encoding name.
597         */
598        public static boolean isValidIANAEncoding(String ianaEncoding) {
599            if (ianaEncoding != null) {
600                int length = ianaEncoding.length();
601                if (length > 0) {
602                    char c = ianaEncoding.charAt(0);
603                    if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
604                        for (int i = 1; i < length; i++) {
605                            c = ianaEncoding.charAt(i);
606                            if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
607                                (c < '0' || c > '9') && c != '.' && c != '_' &&
608                                c != '-') {
609                                return false;
610                            }
611                        }
612                        return true;
613                    }
614                }
615            }
616            return false;
617        } // isValidIANAEncoding(String):boolean
618    
619        /**
620         * Returns true if the encoding name is a valid Java encoding.
621         * This method does not verify that there is a decoder available
622         * for this encoding, only that the characters are valid for an
623         * Java encoding name.
624         *
625         * @param javaEncoding The Java encoding name.
626         */
627        public static boolean isValidJavaEncoding(String javaEncoding) {
628            if (javaEncoding != null) {
629                int length = javaEncoding.length();
630                if (length > 0) {
631                    for (int i = 1; i < length; i++) {
632                        char c = javaEncoding.charAt(i);
633                        if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
634                            (c < '0' || c > '9') && c != '.' && c != '_' &&
635                            c != '-') {
636                            return false;
637                        }
638                    }
639                    return true;
640                }
641            }
642            return false;
643        } // isValidIANAEncoding(String):boolean
644        
645       /**
646         * Simple check to determine if qname is legal. If it returns false
647         * then <param>str</param> is illegal; if it returns true then 
648         * <param>str</param> is legal.
649         */
650        public static boolean isValidQName(String str) {
651           
652           final int colon = str.indexOf(':');
653           
654           if (colon == 0 || colon == str.length() - 1) {
655               return false;
656           }       
657           
658           if (colon > 0) {
659               final String prefix = str.substring(0,colon);
660               final String localPart = str.substring(colon+1);
661               return isValidNCName(prefix) && isValidNCName(localPart);
662           }
663           else {
664               return isValidNCName(str);
665           }       
666        }      
667    
668    } // class XMLChar