compatible with engine.io-parser 1.0.6
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
package com.github.nkzawa.engineio.parser;
|
||||
|
||||
|
||||
import com.github.nkzawa.utf8.UTF8;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@@ -47,7 +48,7 @@ public class Parser {
|
||||
String encoded = String.valueOf(packets.get(packet.type));
|
||||
|
||||
if (null != packet.data) {
|
||||
encoded += packet.data;
|
||||
encoded += UTF8.encode(String.valueOf(packet.data));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@@ -70,6 +71,7 @@ public class Parser {
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
type = -1;
|
||||
}
|
||||
data = UTF8.decode(data);
|
||||
|
||||
if (type < 0 || type >= packetslist.size()) {
|
||||
return err;
|
||||
@@ -102,7 +104,7 @@ public class Parser {
|
||||
@Override
|
||||
public void call(Object packet) {
|
||||
if (packet instanceof String) {
|
||||
String encodingLength = String.valueOf(((String)packet).getBytes(Charset.forName("UTF-8")).length);
|
||||
String encodingLength = String.valueOf(((String) packet).length());
|
||||
byte[] sizeBuffer = new byte[encodingLength.length() + 2];
|
||||
|
||||
sizeBuffer[0] = (byte)0; // is a string
|
||||
@@ -110,7 +112,7 @@ public class Parser {
|
||||
sizeBuffer[i + 1] = (byte)Character.getNumericValue(encodingLength.charAt(i));
|
||||
}
|
||||
sizeBuffer[sizeBuffer.length - 1] = (byte)255;
|
||||
results.add(Buffer.concat(new byte[][] {sizeBuffer, ((String)packet).getBytes(Charset.forName("UTF-8"))}));
|
||||
results.add(Buffer.concat(new byte[][] {sizeBuffer, stringToByteArray((String)packet)}));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -202,7 +204,7 @@ public class Parser {
|
||||
byte[] msg = new byte[bufferTail.remaining()];
|
||||
bufferTail.get(msg);
|
||||
if (isString) {
|
||||
buffers.add(new String(msg, Charset.forName("UTF-8")));
|
||||
buffers.add(byteArrayToString(msg));
|
||||
} else {
|
||||
buffers.add(msg);
|
||||
}
|
||||
@@ -226,6 +228,22 @@ public class Parser {
|
||||
}
|
||||
}
|
||||
|
||||
public static String byteArrayToString(byte[] bytes) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (byte b : bytes) {
|
||||
builder.appendCodePoint(b & 0xFF);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static byte[] stringToByteArray(String string) {
|
||||
int len = string.length();
|
||||
byte[] bytes = new byte[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
bytes[i] = (byte)Character.codePointAt(string, i);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static interface EncodeCallback<T> {
|
||||
|
||||
|
||||
164
src/main/java/com/github/nkzawa/utf8/UTF8.java
Normal file
164
src/main/java/com/github/nkzawa/utf8/UTF8.java
Normal file
@@ -0,0 +1,164 @@
|
||||
package com.github.nkzawa.utf8;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* UTF-8 encoder/decoder ported from utf8.js.
|
||||
*
|
||||
* @see <a href="https://github.com/mathiasbynens/utf8.js">https://github.com/mathiasbynens/utf8.js</a>
|
||||
*/
|
||||
public class UTF8 {
|
||||
|
||||
private static int[] byteArray;
|
||||
private static int byteCount;
|
||||
private static int byteIndex;
|
||||
|
||||
public static String encode(String string) {
|
||||
int[] codePoints = uc2decode(string);
|
||||
int length = codePoints.length;
|
||||
int index = -1;
|
||||
int codePoint;
|
||||
StringBuilder byteString = new StringBuilder();
|
||||
while (++index < length) {
|
||||
codePoint = codePoints[index];
|
||||
byteString.append(encodeCodePoint(codePoint));
|
||||
}
|
||||
return byteString.toString();
|
||||
}
|
||||
|
||||
public static String decode(String byteString) {
|
||||
byteArray = uc2decode(byteString);
|
||||
byteCount = byteArray.length;
|
||||
byteIndex = 0;
|
||||
List<Integer> codePoints = new ArrayList<Integer>();
|
||||
int tmp;
|
||||
while ((tmp = decodeSymbol()) != -1) {
|
||||
codePoints.add(tmp);
|
||||
}
|
||||
return ucs2encode(listToArray(codePoints));
|
||||
}
|
||||
|
||||
private static int[] uc2decode(String string) {
|
||||
int length = string.length();
|
||||
int[] output = new int[string.codePointCount(0, length)];
|
||||
int counter = 0;
|
||||
int value;
|
||||
for (int i = 0; i < length; i += Character.charCount(value)) {
|
||||
value = string.codePointAt(i);
|
||||
output[counter++] = value;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
private static String encodeCodePoint(int codePoint) {
|
||||
StringBuilder symbol = new StringBuilder();
|
||||
if ((codePoint & 0xFFFFFF80) == 0) {
|
||||
return symbol.append(Character.toChars(codePoint)).toString();
|
||||
}
|
||||
if ((codePoint & 0xFFFFF800) == 0) {
|
||||
symbol.append(Character.toChars(((codePoint >> 6) & 0x1F) | 0xC0));
|
||||
} else if ((codePoint & 0xFFFF0000) == 0) {
|
||||
symbol.append(Character.toChars(((codePoint >> 12) & 0x0F) | 0xE0));
|
||||
symbol.append(createByte(codePoint, 6));
|
||||
} else if ((codePoint & 0xFFE00000) == 0) {
|
||||
symbol.append(Character.toChars(((codePoint >> 18) & 0x07) | 0xF0));
|
||||
symbol.append(createByte(codePoint, 12));
|
||||
symbol.append(createByte(codePoint, 6));
|
||||
}
|
||||
symbol.append(Character.toChars((codePoint & 0x3F) | 0x80));
|
||||
return symbol.toString();
|
||||
}
|
||||
|
||||
private static char[] createByte(int codePoint, int shift) {
|
||||
return Character.toChars(((codePoint >> shift) & 0x3F) | 0x80);
|
||||
}
|
||||
|
||||
private static int decodeSymbol() {
|
||||
int byte1;
|
||||
int byte2;
|
||||
int byte3;
|
||||
int byte4;
|
||||
int codePoint;
|
||||
|
||||
if (byteIndex > byteCount) {
|
||||
throw new RuntimeException("Invalid byte index");
|
||||
}
|
||||
|
||||
if (byteIndex == byteCount) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
byte1 = byteArray[byteIndex] & 0xFF;
|
||||
byteIndex++;
|
||||
|
||||
if ((byte1 & 0x80) == 0) {
|
||||
return byte1;
|
||||
}
|
||||
|
||||
if ((byte1 & 0xE0) == 0xC0) {
|
||||
byte2 = readContinuationByte();
|
||||
codePoint = ((byte1 & 0x1F) << 6) | byte2;
|
||||
if (codePoint >= 0x80) {
|
||||
return codePoint;
|
||||
} else {
|
||||
throw new RuntimeException("Invalid continuation byte");
|
||||
}
|
||||
}
|
||||
|
||||
if ((byte1 & 0xF0) == 0xE0) {
|
||||
byte2 = readContinuationByte();
|
||||
byte3 = readContinuationByte();
|
||||
codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
|
||||
if (codePoint >= 0x0800) {
|
||||
return codePoint;
|
||||
} else {
|
||||
throw new RuntimeException("Invalid continuation byte");
|
||||
}
|
||||
}
|
||||
|
||||
if ((byte1 & 0xF8) == 0xF0) {
|
||||
byte2 = readContinuationByte();
|
||||
byte3 = readContinuationByte();
|
||||
byte4 = readContinuationByte();
|
||||
codePoint = ((byte1 & 0x0F) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
|
||||
if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
|
||||
return codePoint;
|
||||
}
|
||||
}
|
||||
|
||||
throw new RuntimeException("Invalid continuation byte");
|
||||
}
|
||||
|
||||
private static int readContinuationByte() {
|
||||
if (byteIndex >= byteCount) {
|
||||
throw new RuntimeException("Invalid byte index");
|
||||
}
|
||||
|
||||
int continuationByte = byteArray[byteIndex] & 0xFF;
|
||||
byteIndex++;
|
||||
|
||||
if ((continuationByte & 0xC0) == 0x80) {
|
||||
return continuationByte & 0x3F;
|
||||
}
|
||||
|
||||
throw new RuntimeException("Invalid continuation byte");
|
||||
}
|
||||
|
||||
private static String ucs2encode(int[] array) {
|
||||
StringBuilder output = new StringBuilder();
|
||||
for (int value : array) {
|
||||
output.appendCodePoint(value);
|
||||
}
|
||||
return output.toString();
|
||||
}
|
||||
|
||||
private static int[] listToArray(List<Integer> list) {
|
||||
int size = list.size();
|
||||
int[] array = new int[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
array[i] = list.get(i);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
}
|
||||
@@ -104,6 +104,30 @@ public class ParserTest {
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeUTF8SpecialCharsMessagePacket() {
|
||||
encodePacket(new Packet<String>(Packet.MESSAGE, "utf8 — string"), new EncodeCallback<String>() {
|
||||
@Override
|
||||
public void call(String data) {
|
||||
Packet<String> p = decodePacket(data);
|
||||
assertThat(p.type, is(Packet.MESSAGE));
|
||||
assertThat(p.data, is("utf8 — string"));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeMessagePacketCoercingToString() {
|
||||
encodePacket(new Packet<Integer>(Packet.MESSAGE, 1), new EncodeCallback<String>() {
|
||||
@Override
|
||||
public void call(String data) {
|
||||
Packet<String> p = decodePacket(data);
|
||||
assertThat(p.type, is(Packet.MESSAGE));
|
||||
assertThat(p.data, is("1"));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeUpgradePacket() {
|
||||
encodePacket(new Packet<String>(Packet.UPGRADE), new EncodeCallback<String>() {
|
||||
|
||||
96
src/test/java/com/github/nkzawa/utf8/UTF8Test.java
Normal file
96
src/test/java/com/github/nkzawa/utf8/UTF8Test.java
Normal file
@@ -0,0 +1,96 @@
|
||||
package com.github.nkzawa.utf8;
|
||||
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.ExpectedException;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
public class UTF8Test {
|
||||
private static final Data[] DATA = new Data[] {
|
||||
// 1-byte
|
||||
new Data(0x0000, "\u0000", "\u0000"),
|
||||
new Data(0x005c, "\u005C\u005C", "\u005C\u005C"), // = backslash
|
||||
new Data(0x007f, "\u007F", "\u007F"),
|
||||
// 2-byte
|
||||
new Data(0x0080, "\u0080", "\u00C2\u0080"),
|
||||
new Data(0x05CA, "\u05CA", "\u00D7\u008A"),
|
||||
new Data(0x07FF, "\u07FF", "\u00DF\u00BF"),
|
||||
// 3-byte
|
||||
new Data(0x0800, "\u0800", "\u00E0\u00A0\u0080"),
|
||||
new Data(0x2C3C, "\u2C3C", "\u00E2\u00B0\u00BC"),
|
||||
new Data(0x07FF, "\uFFFF", "\u00EF\u00BF\u00BF"),
|
||||
// unmatched surrogate halves
|
||||
// high surrogates: 0xD800 to 0xDBFF
|
||||
new Data(0xD800, "\uD800", "\u00ED\u00A0\u0080"),
|
||||
new Data("High surrogate followed by another high surrogate",
|
||||
"\uD800\uD800", "\u00ED\u00A0\u0080\u00ED\u00A0\u0080"),
|
||||
new Data("High surrogate followed by a symbol that is not a surrogate",
|
||||
"\uD800A", "\u00ED\u00A0\u0080A"),
|
||||
new Data("Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate",
|
||||
"\uD800\uD834\uDF06\uD800", "\u00ED\u00A0\u0080\u00F0\u009D\u008C\u0086\u00ED\u00A0\u0080"),
|
||||
new Data(0xD9AF, "\uD9AF", "\u00ED\u00A6\u00AF"),
|
||||
new Data(0xDBFF, "\uDBFF", "\u00ED\u00AF\u00BF"),
|
||||
// low surrogates: 0xDC00 to 0xDFFF
|
||||
new Data(0xDC00, "\uDC00", "\u00ED\u00B0\u0080"),
|
||||
new Data("Low surrogate followed by another low surrogate",
|
||||
"\uDC00\uDC00", "\u00ED\u00B0\u0080\u00ED\u00B0\u0080"),
|
||||
new Data("Low surrogate followed by a symbol that is not a surrogate",
|
||||
"\uDC00A", "\u00ED\u00B0\u0080A"),
|
||||
new Data("Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate",
|
||||
"\uDC00\uD834\uDF06\uDC00", "\u00ED\u00B0\u0080\u00F0\u009D\u008C\u0086\u00ED\u00B0\u0080"),
|
||||
new Data(0xDEEE, "\uDEEE", "\u00ED\u00BB\u00AE"),
|
||||
new Data(0xDFFF, "\uDFFF", "\u00ED\u00BF\u00BF"),
|
||||
// 4-byte
|
||||
new Data(0x010000, "\uD800\uDC00", "\u00F0\u0090\u0080\u0080"),
|
||||
new Data(0x01D306, "\uD834\uDF06", "\u00F0\u009D\u008C\u0086"),
|
||||
new Data(0x010FFF, "\uDBFF\uDFFF", "\u00F4\u008F\u00BF\u00BF"),
|
||||
};
|
||||
|
||||
@Rule
|
||||
public ExpectedException exception = ExpectedException.none();
|
||||
|
||||
@Test
|
||||
public void encodeAndDecode() {
|
||||
for (Data data : DATA) {
|
||||
String reason = data.description != null? data.description : "U+" + Integer.toHexString(data.codePoint).toUpperCase();
|
||||
assertThat("Encoding: " + reason, data.encoded, is(UTF8.encode(data.decoded)));
|
||||
assertThat("Decoding: " + reason, data.decoded, is(UTF8.decode(data.encoded)));
|
||||
}
|
||||
|
||||
exception.expect(RuntimeException.class);
|
||||
UTF8.decode("\uFFFF");
|
||||
|
||||
exception.expect(RuntimeException.class);
|
||||
UTF8.decode("\u00E9\u0000\u0000");
|
||||
|
||||
exception.expect(RuntimeException.class);
|
||||
UTF8.decode("\u00C2\uFFFF");
|
||||
|
||||
exception.expect(RuntimeException.class);
|
||||
UTF8.decode("\u00F0\u009D");
|
||||
}
|
||||
|
||||
private static class Data {
|
||||
public int codePoint = -1;
|
||||
public String description;
|
||||
public String decoded;
|
||||
public String encoded;
|
||||
|
||||
public Data(int codePoint, String decoded, String encoded) {
|
||||
this.codePoint = codePoint;
|
||||
this.decoded = decoded;
|
||||
this.encoded = encoded;
|
||||
}
|
||||
|
||||
public Data(String description, String decoded, String encoded) {
|
||||
this.description = description;
|
||||
this.decoded = decoded;
|
||||
this.encoded = encoded;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user