Skip to content
Snippets Groups Projects
Commit f6d3acbe authored by Mickael Gaillard's avatar Mickael Gaillard Committed by Daniel Stonier
Browse files

Add support of UTF-8!

For i18n (eg. TTS).

"A string must always contain UTF-8 encoded or 7-bit ASCII text." https://developers.google.com/protocol-buffers/docs/proto#scalar

"unicode strings are currently not supported as a ROS data type. utf-8 should be used to be compatible with ROS string serialization. " http://wiki.ros.org/msg
parent 0ffb8114
Branches
Tags
No related merge requests found
...@@ -28,6 +28,7 @@ import java.nio.charset.Charset; ...@@ -28,6 +28,7 @@ import java.nio.charset.Charset;
/** /**
* @author damonkohler@google.com (Damon Kohler) * @author damonkohler@google.com (Damon Kohler)
* @author mick.gaillard@gmail.com (Mickael Gaillard)
*/ */
public enum PrimitiveFieldType implements FieldType { public enum PrimitiveFieldType implements FieldType {
...@@ -571,7 +572,7 @@ public enum PrimitiveFieldType implements FieldType { ...@@ -571,7 +572,7 @@ public enum PrimitiveFieldType implements FieldType {
@Override @Override
public <T> void serialize(T value, ChannelBuffer buffer) { public <T> void serialize(T value, ChannelBuffer buffer) {
Preconditions.checkArgument(value instanceof String); Preconditions.checkArgument(value instanceof String);
byte[] bytes = ((String) value).getBytes(); byte[] bytes = ((String) value).getBytes(DEFAULT_CHARSET);
buffer.writeInt(bytes.length); buffer.writeInt(bytes.length);
buffer.writeBytes(bytes); buffer.writeBytes(bytes);
} }
...@@ -581,7 +582,7 @@ public enum PrimitiveFieldType implements FieldType { ...@@ -581,7 +582,7 @@ public enum PrimitiveFieldType implements FieldType {
public String deserialize(ChannelBuffer buffer) { public String deserialize(ChannelBuffer buffer) {
int length = buffer.readInt(); int length = buffer.readInt();
ByteBuffer stringBuffer = buffer.readSlice(length).toByteBuffer(); ByteBuffer stringBuffer = buffer.readSlice(length).toByteBuffer();
return Charset.forName("US-ASCII").decode(stringBuffer).toString(); return DEFAULT_CHARSET.decode(stringBuffer).toString();
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
...@@ -678,6 +679,7 @@ public enum PrimitiveFieldType implements FieldType { ...@@ -678,6 +679,7 @@ public enum PrimitiveFieldType implements FieldType {
} }
}; };
private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
private static final ImmutableSet<String> TYPE_NAMES; private static final ImmutableSet<String> TYPE_NAMES;
static { static {
......
...@@ -30,6 +30,7 @@ import org.ros.message.Time; ...@@ -30,6 +30,7 @@ import org.ros.message.Time;
/** /**
* @author damonkohler@google.com (Damon Kohler) * @author damonkohler@google.com (Damon Kohler)
* @author mick.gaillard@gmail.com (Mickael Gaillard)
*/ */
public class RawMessageSerializationTest { public class RawMessageSerializationTest {
...@@ -139,6 +140,71 @@ public class RawMessageSerializationTest { ...@@ -139,6 +140,71 @@ public class RawMessageSerializationTest {
checkSerializeAndDeserialize(rawMessage); checkSerializeAndDeserialize(rawMessage);
} }
@Test
public void testStringUTF8() {
RawMessage rawMessage = messageFactory.newFromType("std_msgs/String");
rawMessage.setString("data", "éêè €àáßëœ 文字化け");
checkSerializeAndDeserialize(rawMessage);
// i18n test case
// base on http://www.inter-locale.com/whitepaper/learn/learn-to-test.html
// Combining Marks and Accents test
rawMessage.setString("data", "àéîōũ");
checkSerializeAndDeserialize(rawMessage);
// DOS 860 test
rawMessage.setString("data", "você nós mãe avô irmã criança");
checkSerializeAndDeserialize(rawMessage);
// Windows-1252 test
rawMessage.setString("data", "€ŒœŠš™©‰ƒ");
checkSerializeAndDeserialize(rawMessage);
// Turkish test
rawMessage.setString("data", "ışık bir İyi Günler");
checkSerializeAndDeserialize(rawMessage);
// Dakuten and handakuten marks test
rawMessage.setString("data", "がざばだぱか゛さ゛た゛は");
checkSerializeAndDeserialize(rawMessage);
// Combining Grapheme Joiner character
rawMessage.setString("data", "אִ͏ַ");
checkSerializeAndDeserialize(rawMessage);
// Bidi with Latin test
rawMessage.setString("data", "abcאבגדabc ");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "אבגדabcאבגד");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "אבגד012אבגד");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "אבגד 012 012");
checkSerializeAndDeserialize(rawMessage);
// Complex Scripts test
rawMessage.setString("data", "สวัสดี");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "டாஹ்கோ");
checkSerializeAndDeserialize(rawMessage);
rawMessage.setString("data", "بِسْمِ اللّهِ الرَّحْمـَنِ الرَّحِيمِ");
checkSerializeAndDeserialize(rawMessage);
// Numeric Shaping test
rawMessage.setString("data", "عدد مارس ١٩٩٨");
checkSerializeAndDeserialize(rawMessage);
// Common Scripts and Encodings test
rawMessage.setString("data", "Слава Жанна Ювеналий Ярополк");
checkSerializeAndDeserialize(rawMessage);
}
@Test @Test
public void testTime() { public void testTime() {
RawMessage rawMessage = messageFactory.newFromType("std_msgs/Time"); RawMessage rawMessage = messageFactory.newFromType("std_msgs/Time");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment