From f6d3acbe6a4493a47455547d0289789811bafdbf Mon Sep 17 00:00:00 2001 From: Mickael Gaillard <mickael.gaillard@tactfactory.com> Date: Sat, 19 Apr 2014 14:25:08 +0200 Subject: [PATCH] Add support of UTF-8! For i18n (eg. TTS). "A string must always contain UTF-8 encoded or 7-bit ASCII text." https://developers.google.com/protocol-buffers/docs/proto#scalar "unicode strings are currently not supported as a ROS data type. utf-8 should be used to be compatible with ROS string serialization. " http://wiki.ros.org/msg --- .../message/field/PrimitiveFieldType.java | 6 +- .../message/RawMessageSerializationTest.java | 66 +++++++++++++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java b/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java index bed4916..bfbe835 100644 --- a/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java +++ b/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java @@ -28,6 +28,7 @@ import java.nio.charset.Charset; /** * @author damonkohler@google.com (Damon Kohler) + * @author mick.gaillard@gmail.com (Mickael Gaillard) */ public enum PrimitiveFieldType implements FieldType { @@ -571,7 +572,7 @@ public enum PrimitiveFieldType implements FieldType { @Override public <T> void serialize(T value, ChannelBuffer buffer) { Preconditions.checkArgument(value instanceof String); - byte[] bytes = ((String) value).getBytes(); + byte[] bytes = ((String) value).getBytes(DEFAULT_CHARSET); buffer.writeInt(bytes.length); buffer.writeBytes(bytes); } @@ -581,7 +582,7 @@ public enum PrimitiveFieldType implements FieldType { public String deserialize(ChannelBuffer buffer) { int length = buffer.readInt(); ByteBuffer stringBuffer = buffer.readSlice(length).toByteBuffer(); - return Charset.forName("US-ASCII").decode(stringBuffer).toString(); + return DEFAULT_CHARSET.decode(stringBuffer).toString(); } @SuppressWarnings("unchecked") @@ -678,6 +679,7 @@ public enum PrimitiveFieldType implements FieldType { } }; + private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8"); private static final ImmutableSet<String> TYPE_NAMES; static { diff --git a/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java b/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java index d5bf1bb..4f3ce91 100644 --- a/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java +++ b/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java @@ -30,6 +30,7 @@ import org.ros.message.Time; /** * @author damonkohler@google.com (Damon Kohler) + * @author mick.gaillard@gmail.com (Mickael Gaillard) */ public class RawMessageSerializationTest { @@ -138,6 +139,71 @@ public class RawMessageSerializationTest { rawMessage.setString("data", "Hello, ROS!"); checkSerializeAndDeserialize(rawMessage); } + + @Test + public void testStringUTF8() { + RawMessage rawMessage = messageFactory.newFromType("std_msgs/String"); + rawMessage.setString("data", "éêè €àáßëœ 文字化け"); + checkSerializeAndDeserialize(rawMessage); + + // i18n test case + // base on http://www.inter-locale.com/whitepaper/learn/learn-to-test.html + + // Combining Marks and Accents test + rawMessage.setString("data", "àéîōũ"); + checkSerializeAndDeserialize(rawMessage); + + // DOS 860 test + rawMessage.setString("data", "você nós mãe avô irmã criança"); + checkSerializeAndDeserialize(rawMessage); + + // Windows-1252 test + rawMessage.setString("data", "€ŒœŠš™©‰ƒ"); + checkSerializeAndDeserialize(rawMessage); + + // Turkish test + rawMessage.setString("data", "ışık bir İyi Günler"); + checkSerializeAndDeserialize(rawMessage); + + // Dakuten and handakuten marks test + rawMessage.setString("data", "がざばだぱか゛さ゛た゛は"); + checkSerializeAndDeserialize(rawMessage); + + // Combining Grapheme Joiner character + rawMessage.setString("data", "אִ͏ַ"); + checkSerializeAndDeserialize(rawMessage); + + // Bidi with Latin test + rawMessage.setString("data", "abcאבגדabc "); + checkSerializeAndDeserialize(rawMessage); + + rawMessage.setString("data", "אבגדabcאבגד"); + checkSerializeAndDeserialize(rawMessage); + + rawMessage.setString("data", "אבגד012אבגד"); + checkSerializeAndDeserialize(rawMessage); + + rawMessage.setString("data", "אבגד 012 012"); + checkSerializeAndDeserialize(rawMessage); + + // Complex Scripts test + rawMessage.setString("data", "สวัสดี"); + checkSerializeAndDeserialize(rawMessage); + + rawMessage.setString("data", "டாஹ்கோ"); + checkSerializeAndDeserialize(rawMessage); + + rawMessage.setString("data", "بِسْمِ اللّهِ الرَّحْمـَنِ الرَّحِيمِ"); + checkSerializeAndDeserialize(rawMessage); + + // Numeric Shaping test + rawMessage.setString("data", "عدد مارس ١٩٩٨"); + checkSerializeAndDeserialize(rawMessage); + + // Common Scripts and Encodings test + rawMessage.setString("data", "Слава Жанна Ювеналий Ярополк"); + checkSerializeAndDeserialize(rawMessage); + } @Test public void testTime() { -- GitLab