From f6d3acbe6a4493a47455547d0289789811bafdbf Mon Sep 17 00:00:00 2001
From: Mickael Gaillard <mickael.gaillard@tactfactory.com>
Date: Sat, 19 Apr 2014 14:25:08 +0200
Subject: [PATCH] Add support of UTF-8! For i18n (eg. TTS).

"A string must always contain UTF-8 encoded or 7-bit ASCII text." https://developers.google.com/protocol-buffers/docs/proto#scalar

"unicode strings are currently not supported as a ROS data type. utf-8 should be used to be compatible with ROS string serialization. " http://wiki.ros.org/msg
---
 .../message/field/PrimitiveFieldType.java     |  6 +-
 .../message/RawMessageSerializationTest.java  | 66 +++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java b/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java
index bed4916..bfbe835 100644
--- a/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java
+++ b/message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java
@@ -28,6 +28,7 @@ import java.nio.charset.Charset;
 
 /**
  * @author damonkohler@google.com (Damon Kohler)
+ * @author mick.gaillard@gmail.com (Mickael Gaillard)
  */
 public enum PrimitiveFieldType implements FieldType {
 
@@ -571,7 +572,7 @@ public enum PrimitiveFieldType implements FieldType {
     @Override
     public <T> void serialize(T value, ChannelBuffer buffer) {
       Preconditions.checkArgument(value instanceof String);
-      byte[] bytes = ((String) value).getBytes();
+      byte[] bytes = ((String) value).getBytes(DEFAULT_CHARSET);
       buffer.writeInt(bytes.length);
       buffer.writeBytes(bytes);
     }
@@ -581,7 +582,7 @@ public enum PrimitiveFieldType implements FieldType {
     public String deserialize(ChannelBuffer buffer) {
       int length = buffer.readInt();
       ByteBuffer stringBuffer = buffer.readSlice(length).toByteBuffer();
-      return Charset.forName("US-ASCII").decode(stringBuffer).toString();
+      return DEFAULT_CHARSET.decode(stringBuffer).toString();
     }
 
     @SuppressWarnings("unchecked")
@@ -678,6 +679,7 @@ public enum PrimitiveFieldType implements FieldType {
     }
   };
 
+  private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
   private static final ImmutableSet<String> TYPE_NAMES;
 
   static {
diff --git a/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java b/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java
index d5bf1bb..4f3ce91 100644
--- a/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java
+++ b/message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java
@@ -30,6 +30,7 @@ import org.ros.message.Time;
 
 /**
  * @author damonkohler@google.com (Damon Kohler)
+ * @author mick.gaillard@gmail.com (Mickael Gaillard)
  */
 public class RawMessageSerializationTest {
 
@@ -138,6 +139,71 @@ public class RawMessageSerializationTest {
     rawMessage.setString("data", "Hello, ROS!");
     checkSerializeAndDeserialize(rawMessage);
   }
+  
+  @Test
+  public void testStringUTF8() {
+    RawMessage rawMessage = messageFactory.newFromType("std_msgs/String");
+    rawMessage.setString("data", "éêè €àáßëœ 文字化け");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // i18n test case
+    // base on http://www.inter-locale.com/whitepaper/learn/learn-to-test.html
+
+    // Combining Marks and Accents test
+    rawMessage.setString("data", "àéîōũ");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // DOS 860 test
+    rawMessage.setString("data", "você nós mãe avô irmã criança");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Windows-1252 test
+    rawMessage.setString("data", "€ŒœŠš™©‰ƒ");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Turkish test
+    rawMessage.setString("data", "ışık bir İyi Günler");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Dakuten and handakuten marks test
+    rawMessage.setString("data", "がざばだぱか゛さ゛た゛は");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Combining Grapheme Joiner character
+    rawMessage.setString("data", "אִ͏ַ");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Bidi with Latin test
+    rawMessage.setString("data", "abcאבגדabc ");
+    checkSerializeAndDeserialize(rawMessage);
+
+    rawMessage.setString("data", "אבגדabcאבגד");
+    checkSerializeAndDeserialize(rawMessage);
+
+    rawMessage.setString("data", "אבגד012אבגד");
+    checkSerializeAndDeserialize(rawMessage);
+
+    rawMessage.setString("data", "אבגד 012 012");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Complex Scripts test
+    rawMessage.setString("data", "สวัสดี");
+    checkSerializeAndDeserialize(rawMessage);
+
+    rawMessage.setString("data", "டாஹ்கோ");
+    checkSerializeAndDeserialize(rawMessage);
+
+    rawMessage.setString("data", "بِسْمِ اللّهِ الرَّحْمـَنِ الرَّحِيمِ");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Numeric Shaping test
+    rawMessage.setString("data", "عدد مارس ١٩٩٨");
+    checkSerializeAndDeserialize(rawMessage);
+
+    // Common Scripts and Encodings test
+    rawMessage.setString("data", "Слава Жанна Ювеналий Ярополк");
+    checkSerializeAndDeserialize(rawMessage);
+  }
 
   @Test
   public void testTime() {
-- 
GitLab