compatible with engine.io-parser 1.0.6

This commit is contained in:
Naoyuki Kanezawa
2014-06-01 20:21:54 +09:00
parent 7fcd5c5568
commit 7c67fa5d9b
5 changed files with 308 additions and 5 deletions

View File

@@ -81,6 +81,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<version>2.14.1</version>
<configuration>
<argLine>-Dfile.encoding=UTF-8</argLine>
<systemProperties>
<property>
<name>java.util.logging.config.file</name>

View File

@@ -1,8 +1,9 @@
package com.github.nkzawa.engineio.parser;
import com.github.nkzawa.utf8.UTF8;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -47,7 +48,7 @@ public class Parser {
String encoded = String.valueOf(packets.get(packet.type));
if (null != packet.data) {
encoded += packet.data;
encoded += UTF8.encode(String.valueOf(packet.data));
}
@SuppressWarnings("unchecked")
@@ -70,6 +71,7 @@ public class Parser {
} catch (IndexOutOfBoundsException e) {
type = -1;
}
data = UTF8.decode(data);
if (type < 0 || type >= packetslist.size()) {
return err;
@@ -102,7 +104,7 @@ public class Parser {
@Override
public void call(Object packet) {
if (packet instanceof String) {
String encodingLength = String.valueOf(((String)packet).getBytes(Charset.forName("UTF-8")).length);
String encodingLength = String.valueOf(((String) packet).length());
byte[] sizeBuffer = new byte[encodingLength.length() + 2];
sizeBuffer[0] = (byte)0; // is a string
@@ -110,7 +112,7 @@ public class Parser {
sizeBuffer[i + 1] = (byte)Character.getNumericValue(encodingLength.charAt(i));
}
sizeBuffer[sizeBuffer.length - 1] = (byte)255;
results.add(Buffer.concat(new byte[][] {sizeBuffer, ((String)packet).getBytes(Charset.forName("UTF-8"))}));
results.add(Buffer.concat(new byte[][] {sizeBuffer, stringToByteArray((String)packet)}));
return;
}
@@ -202,7 +204,7 @@ public class Parser {
byte[] msg = new byte[bufferTail.remaining()];
bufferTail.get(msg);
if (isString) {
buffers.add(new String(msg, Charset.forName("UTF-8")));
buffers.add(byteArrayToString(msg));
} else {
buffers.add(msg);
}
@@ -226,6 +228,22 @@ public class Parser {
}
}
public static String byteArrayToString(byte[] bytes) {
StringBuilder builder = new StringBuilder();
for (byte b : bytes) {
builder.appendCodePoint(b & 0xFF);
}
return builder.toString();
}
public static byte[] stringToByteArray(String string) {
int len = string.length();
byte[] bytes = new byte[len];
for (int i = 0; i < len; i++) {
bytes[i] = (byte)Character.codePointAt(string, i);
}
return bytes;
}
public static interface EncodeCallback<T> {

View File

@@ -0,0 +1,164 @@
package com.github.nkzawa.utf8;
import java.util.ArrayList;
import java.util.List;
/**
* UTF-8 encoder/decoder ported from utf8.js.
*
* @see <a href="https://github.com/mathiasbynens/utf8.js">https://github.com/mathiasbynens/utf8.js</a>
*/
public class UTF8 {
private static int[] byteArray;
private static int byteCount;
private static int byteIndex;
public static String encode(String string) {
int[] codePoints = uc2decode(string);
int length = codePoints.length;
int index = -1;
int codePoint;
StringBuilder byteString = new StringBuilder();
while (++index < length) {
codePoint = codePoints[index];
byteString.append(encodeCodePoint(codePoint));
}
return byteString.toString();
}
public static String decode(String byteString) {
byteArray = uc2decode(byteString);
byteCount = byteArray.length;
byteIndex = 0;
List<Integer> codePoints = new ArrayList<Integer>();
int tmp;
while ((tmp = decodeSymbol()) != -1) {
codePoints.add(tmp);
}
return ucs2encode(listToArray(codePoints));
}
private static int[] uc2decode(String string) {
int length = string.length();
int[] output = new int[string.codePointCount(0, length)];
int counter = 0;
int value;
for (int i = 0; i < length; i += Character.charCount(value)) {
value = string.codePointAt(i);
output[counter++] = value;
}
return output;
}
private static String encodeCodePoint(int codePoint) {
StringBuilder symbol = new StringBuilder();
if ((codePoint & 0xFFFFFF80) == 0) {
return symbol.append(Character.toChars(codePoint)).toString();
}
if ((codePoint & 0xFFFFF800) == 0) {
symbol.append(Character.toChars(((codePoint >> 6) & 0x1F) | 0xC0));
} else if ((codePoint & 0xFFFF0000) == 0) {
symbol.append(Character.toChars(((codePoint >> 12) & 0x0F) | 0xE0));
symbol.append(createByte(codePoint, 6));
} else if ((codePoint & 0xFFE00000) == 0) {
symbol.append(Character.toChars(((codePoint >> 18) & 0x07) | 0xF0));
symbol.append(createByte(codePoint, 12));
symbol.append(createByte(codePoint, 6));
}
symbol.append(Character.toChars((codePoint & 0x3F) | 0x80));
return symbol.toString();
}
private static char[] createByte(int codePoint, int shift) {
return Character.toChars(((codePoint >> shift) & 0x3F) | 0x80);
}
private static int decodeSymbol() {
int byte1;
int byte2;
int byte3;
int byte4;
int codePoint;
if (byteIndex > byteCount) {
throw new RuntimeException("Invalid byte index");
}
if (byteIndex == byteCount) {
return -1;
}
byte1 = byteArray[byteIndex] & 0xFF;
byteIndex++;
if ((byte1 & 0x80) == 0) {
return byte1;
}
if ((byte1 & 0xE0) == 0xC0) {
byte2 = readContinuationByte();
codePoint = ((byte1 & 0x1F) << 6) | byte2;
if (codePoint >= 0x80) {
return codePoint;
} else {
throw new RuntimeException("Invalid continuation byte");
}
}
if ((byte1 & 0xF0) == 0xE0) {
byte2 = readContinuationByte();
byte3 = readContinuationByte();
codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
if (codePoint >= 0x0800) {
return codePoint;
} else {
throw new RuntimeException("Invalid continuation byte");
}
}
if ((byte1 & 0xF8) == 0xF0) {
byte2 = readContinuationByte();
byte3 = readContinuationByte();
byte4 = readContinuationByte();
codePoint = ((byte1 & 0x0F) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
return codePoint;
}
}
throw new RuntimeException("Invalid continuation byte");
}
private static int readContinuationByte() {
if (byteIndex >= byteCount) {
throw new RuntimeException("Invalid byte index");
}
int continuationByte = byteArray[byteIndex] & 0xFF;
byteIndex++;
if ((continuationByte & 0xC0) == 0x80) {
return continuationByte & 0x3F;
}
throw new RuntimeException("Invalid continuation byte");
}
private static String ucs2encode(int[] array) {
StringBuilder output = new StringBuilder();
for (int value : array) {
output.appendCodePoint(value);
}
return output.toString();
}
private static int[] listToArray(List<Integer> list) {
int size = list.size();
int[] array = new int[size];
for (int i = 0; i < size; i++) {
array[i] = list.get(i);
}
return array;
}
}

View File

@@ -104,6 +104,30 @@ public class ParserTest {
});
}
@Test
public void encodeUTF8SpecialCharsMessagePacket() {
encodePacket(new Packet<String>(Packet.MESSAGE, "utf8 — string"), new EncodeCallback<String>() {
@Override
public void call(String data) {
Packet<String> p = decodePacket(data);
assertThat(p.type, is(Packet.MESSAGE));
assertThat(p.data, is("utf8 — string"));
}
});
}
@Test
public void encodeMessagePacketCoercingToString() {
encodePacket(new Packet<Integer>(Packet.MESSAGE, 1), new EncodeCallback<String>() {
@Override
public void call(String data) {
Packet<String> p = decodePacket(data);
assertThat(p.type, is(Packet.MESSAGE));
assertThat(p.data, is("1"));
}
});
}
@Test
public void encodeUpgradePacket() {
encodePacket(new Packet<String>(Packet.UPGRADE), new EncodeCallback<String>() {

View File

@@ -0,0 +1,96 @@
package com.github.nkzawa.utf8;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
@RunWith(JUnit4.class)
public class UTF8Test {
private static final Data[] DATA = new Data[] {
// 1-byte
new Data(0x0000, "\u0000", "\u0000"),
new Data(0x005c, "\u005C\u005C", "\u005C\u005C"), // = backslash
new Data(0x007f, "\u007F", "\u007F"),
// 2-byte
new Data(0x0080, "\u0080", "\u00C2\u0080"),
new Data(0x05CA, "\u05CA", "\u00D7\u008A"),
new Data(0x07FF, "\u07FF", "\u00DF\u00BF"),
// 3-byte
new Data(0x0800, "\u0800", "\u00E0\u00A0\u0080"),
new Data(0x2C3C, "\u2C3C", "\u00E2\u00B0\u00BC"),
new Data(0x07FF, "\uFFFF", "\u00EF\u00BF\u00BF"),
// unmatched surrogate halves
// high surrogates: 0xD800 to 0xDBFF
new Data(0xD800, "\uD800", "\u00ED\u00A0\u0080"),
new Data("High surrogate followed by another high surrogate",
"\uD800\uD800", "\u00ED\u00A0\u0080\u00ED\u00A0\u0080"),
new Data("High surrogate followed by a symbol that is not a surrogate",
"\uD800A", "\u00ED\u00A0\u0080A"),
new Data("Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate",
"\uD800\uD834\uDF06\uD800", "\u00ED\u00A0\u0080\u00F0\u009D\u008C\u0086\u00ED\u00A0\u0080"),
new Data(0xD9AF, "\uD9AF", "\u00ED\u00A6\u00AF"),
new Data(0xDBFF, "\uDBFF", "\u00ED\u00AF\u00BF"),
// low surrogates: 0xDC00 to 0xDFFF
new Data(0xDC00, "\uDC00", "\u00ED\u00B0\u0080"),
new Data("Low surrogate followed by another low surrogate",
"\uDC00\uDC00", "\u00ED\u00B0\u0080\u00ED\u00B0\u0080"),
new Data("Low surrogate followed by a symbol that is not a surrogate",
"\uDC00A", "\u00ED\u00B0\u0080A"),
new Data("Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate",
"\uDC00\uD834\uDF06\uDC00", "\u00ED\u00B0\u0080\u00F0\u009D\u008C\u0086\u00ED\u00B0\u0080"),
new Data(0xDEEE, "\uDEEE", "\u00ED\u00BB\u00AE"),
new Data(0xDFFF, "\uDFFF", "\u00ED\u00BF\u00BF"),
// 4-byte
new Data(0x010000, "\uD800\uDC00", "\u00F0\u0090\u0080\u0080"),
new Data(0x01D306, "\uD834\uDF06", "\u00F0\u009D\u008C\u0086"),
new Data(0x010FFF, "\uDBFF\uDFFF", "\u00F4\u008F\u00BF\u00BF"),
};
@Rule
public ExpectedException exception = ExpectedException.none();
@Test
public void encodeAndDecode() {
for (Data data : DATA) {
String reason = data.description != null? data.description : "U+" + Integer.toHexString(data.codePoint).toUpperCase();
assertThat("Encoding: " + reason, data.encoded, is(UTF8.encode(data.decoded)));
assertThat("Decoding: " + reason, data.decoded, is(UTF8.decode(data.encoded)));
}
exception.expect(RuntimeException.class);
UTF8.decode("\uFFFF");
exception.expect(RuntimeException.class);
UTF8.decode("\u00E9\u0000\u0000");
exception.expect(RuntimeException.class);
UTF8.decode("\u00C2\uFFFF");
exception.expect(RuntimeException.class);
UTF8.decode("\u00F0\u009D");
}
private static class Data {
public int codePoint = -1;
public String description;
public String decoded;
public String encoded;
public Data(int codePoint, String decoded, String encoded) {
this.codePoint = codePoint;
this.decoded = decoded;
this.encoded = encoded;
}
public Data(String description, String decoded, String encoded) {
this.description = description;
this.decoded = decoded;
this.encoded = encoded;
}
}
}