001 /**
002 * Copyright 2005 Alan Green
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 */
017
018
019 package org.codehaus.groovy.antlr;
020
021 import java.io.IOException;
022 import java.io.Reader;
023
024 import antlr.CharScanner;
025
026 /**
027 * Translates GLS-defined unicode escapes into characters. Throws an exception
028 * in the event of an invalid unicode escape being detected.
029 *
030 * <p>No attempt has been made to optimise this class for speed or
031 * space.</p>
032 *
033 * @version $Revision: 1.3 $
034 */
035 public class UnicodeEscapingReader extends Reader {
036
037 private Reader reader;
038 private CharScanner lexer;
039 private boolean hasNextChar = false;
040 private int nextChar;
041 private SourceBuffer sourceBuffer;
042
043 /**
044 * Constructor.
045 * @param reader The reader that this reader will filter over.
046 */
047 public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) {
048 this.reader = reader;
049 this.sourceBuffer = sourceBuffer;
050 }
051
052 /**
053 * Sets the lexer that is using this reader. Must be called before the
054 * lexer is used.
055 */
056 public void setLexer(CharScanner lexer) {
057 this.lexer = lexer;
058 }
059
060 /**
061 * Reads characters from the underlying reader.
062 * @see java.io.Reader#read(char[],int,int)
063 */
064 public int read(char cbuf[], int off, int len) throws IOException {
065 int c = 0;
066 int count = 0;
067 while (count < len && (c = read())!= -1) {
068 cbuf[off + count] = (char) c;
069 count++;
070 }
071 return (count == 0 && c == -1) ? -1 : count;
072 }
073
074 /**
075 * Gets the next character from the underlying reader,
076 * translating escapes as required.
077 * @see java.io.Reader#close()
078 */
079 public int read() throws IOException {
080 if (hasNextChar) {
081 hasNextChar = false;
082 write(nextChar);
083 return nextChar;
084 }
085
086 int c = reader.read();
087 if (c != '\\') {
088 write(c);
089 return c;
090 }
091
092 // Have one backslash, continue if next char is 'u'
093 c = reader.read();
094 if (c != 'u') {
095 hasNextChar = true;
096 nextChar = c;
097 write('\\');
098 return '\\';
099 }
100
101 // Swallow multiple 'u's
102 do {
103 c = reader.read();
104 } while (c == 'u');
105
106 // Get first hex digit
107 checkHexDigit(c);
108 StringBuffer charNum = new StringBuffer();
109 charNum.append((char) c);
110
111 // Must now be three more hex digits
112 for (int i = 0; i < 3; i++) {
113 c = reader.read();
114 checkHexDigit(c);
115 charNum.append((char) c);
116 }
117 int rv = Integer.parseInt(charNum.toString(), 16);
118 write(rv);
119 return rv;
120 }
121 private void write(int c) {
122 if (sourceBuffer != null) {sourceBuffer.write(c);}
123 }
124 /**
125 * Checks that the given character is indeed a hex digit.
126 */
127 private void checkHexDigit(int c) throws IOException {
128 if (c >= '0' && c <= '9') {
129 return;
130 }
131 if (c >= 'a' && c <= 'f') {
132 return;
133 }
134 if (c >= 'A' && c <= 'F') {
135 return;
136 }
137 // Causes the invalid escape to be skipped
138 hasNextChar = true;
139 nextChar = c;
140 throw new IOException("Did not find four digit hex character code."
141 + " line: " + lexer.getLine() + " col:" + lexer.getColumn());
142 }
143
144 /**
145 * Closes this reader by calling close on the underlying reader.
146 * @see java.io.Reader#close()
147 */
148 public void close() throws IOException {
149 reader.close();
150 }
151 }