Skip to content
Snippets Groups Projects
Commit f6d3acbe authored by Mickael Gaillard's avatar Mickael Gaillard Committed by Daniel Stonier
Browse files

Add support of UTF-8!

For i18n (eg. TTS).

"A string must always contain UTF-8 encoded or 7-bit ASCII text." https://developers.google.com/protocol-buffers/docs/proto#scalar

"unicode strings are currently not supported as a ROS data type. utf-8 should be used to be compatible with ROS string serialization. " http://wiki.ros.org/msg
parent 0ffb8114
Branches
Tags
No related merge requests found
......@@ -28,6 +28,7 @@ import java.nio.charset.Charset;
/**
* @author damonkohler@google.com (Damon Kohler)
* @author mick.gaillard@gmail.com (Mickael Gaillard)
*/
public enum PrimitiveFieldType implements FieldType {
......@@ -571,7 +572,7 @@ public enum PrimitiveFieldType implements FieldType {
@Override
public <T> void serialize(T value, ChannelBuffer buffer) {
Preconditions.checkArgument(value instanceof String);
byte[] bytes = ((String) value).getBytes();
byte[] bytes = ((String) value).getBytes(DEFAULT_CHARSET);
buffer.writeInt(bytes.length);
buffer.writeBytes(bytes);
}
......@@ -581,7 +582,7 @@ public enum PrimitiveFieldType implements FieldType {
public String deserialize(ChannelBuffer buffer) {
int length = buffer.readInt();
ByteBuffer stringBuffer = buffer.readSlice(length).toByteBuffer();
return Charset.forName("US-ASCII").decode(stringBuffer).toString();
return DEFAULT_CHARSET.decode(stringBuffer).toString();
}
@SuppressWarnings("unchecked")
......@@ -678,6 +679,7 @@ public enum PrimitiveFieldType implements FieldType {
}
};
private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
private static final ImmutableSet<String> TYPE_NAMES;
static {
......
......@@ -30,6 +30,7 @@ import org.ros.message.Time;
/**
* @author damonkohler@google.com (Damon Kohler)
* @author mick.gaillard@gmail.com (Mickael Gaillard)
*/
public class RawMessageSerializationTest {
......@@ -139,6 +140,71 @@ public class RawMessageSerializationTest {
checkSerializeAndDeserialize(rawMessage);
}
@Test
public void testStringUTF8() {
RawMessage rawMessage = messageFactory.newFromType("std_msgs/String");
rawMessage.setString("data", "éêè €àáßëœ 文字化け");
checkSerializeAndDeserialize(rawMessage);
// i18n test case
// base on http://www.inter-locale.com/whitepaper/learn/learn-to-test.html
// Combining Marks and Accents test
rawMessage.setString("data", "àéîōũ");
checkSerializeAndDeserialize(rawMessage);
// DOS 860 test
rawMessage.setString("data", "você nós mãe avô irmã criança");
checkSerializeAndDeserialize(rawMessage);
// Windows-1252 test
rawMessage.setString("data", "€ŒœŠš™©‰ƒ");
checkSerializeAndDeserialize(rawMessage);
// Turkish test
rawMessage.setString("data", "ışık bir İyi Günler");
checkSerializeAndDeserialize(rawMessage);
// Dakuten and handakuten marks test
rawMessage.setString("data", "がざばだぱか゛さ゛た゛は");
checkSerializeAndDeserialize(rawMessage);
// Combining Grapheme Joiner character
rawMessage.setString("data", "אִ͏ַ");
checkSerializeAndDeserialize(rawMessage);
// Bidi with Latin test
rawMessage.setString("data", "abcאבגדabc ");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "אבגדabcאבגד");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "אבגד012אבגד");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "אבגד 012 012");
checkSerializeAndDeserialize(rawMessage);
// Complex Scripts test
rawMessage.setString("data", "สวัสดี");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "டாஹ்கோ");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "بِسْمِ اللّهِ الرَّحْمـَنِ الرَّحِيمِ");
checkSerializeAndDeserialize(rawMessage);
// Numeric Shaping test
rawMessage.setString("data", "عدد مارس ١٩٩٨");
checkSerializeAndDeserialize(rawMessage);
// Common Scripts and Encodings test
rawMessage.setString("data", "Слава Жанна Ювеналий Ярополк");
checkSerializeAndDeserialize(rawMessage);
}
@Test
public void testTime() {
RawMessage rawMessage = messageFactory.newFromType("std_msgs/Time");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment