blob: 13e29c9b9e323a59e981cea91f0f919be5ec2dba [file] [log] [blame]
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +02001// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Unit tests for Unicode functions
4 *
5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
6 */
7
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +02008#include <charset.h>
9#include <command.h>
Heinrich Schuchardtaf114232020-10-30 12:23:59 +010010#include <efi_loader.h>
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +020011#include <errno.h>
Simon Glassf7ae49f2020-05-10 11:40:05 -060012#include <log.h>
Simon Glass336d4612020-02-03 07:36:16 -070013#include <malloc.h>
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +020014#include <test/test.h>
15#include <test/suites.h>
16#include <test/ut.h>
17
18/* Linker list entry for a Unicode test */
19#define UNICODE_TEST(_name) UNIT_TEST(_name, 0, unicode_test)
20
21/* Constants c1-c4 and d1-d4 encode the same letters */
22
23/* Six characters translating to one utf-8 byte each. */
24static const u16 c1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
25/* One character translating to two utf-8 bytes */
26static const u16 c2[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00};
27/* Three characters translating to three utf-8 bytes each */
28static const u16 c3[] = {0x6f5c, 0x6c34, 0x8266, 0x00};
29/* Three letters translating to four utf-8 bytes each */
30static const u16 c4[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87,
31 0x0000};
32
33/* Illegal utf-16 strings */
34static const u16 i1[] = {0x69, 0x31, 0xdc87, 0x6c, 0x00};
35static const u16 i2[] = {0x69, 0x32, 0xd801, 0xd801, 0x6c, 0x00};
36static const u16 i3[] = {0x69, 0x33, 0xd801, 0x00};
37
38/* Six characters translating to one utf-16 word each. */
39static const char d1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
40/* Eight characters translating to one utf-16 word each */
41static const char d2[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75,
42 0x72, 0x00};
43/* Three characters translating to one utf-16 word each */
44static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89,
45 0xa6, 0x00};
46/* Three letters translating to two utf-16 word each */
47static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
48 0xf0, 0x90, 0x92, 0x87, 0x00};
Heinrich Schuchardte91789e2021-02-27 14:08:38 +010049/* Letter not in code page 437 */
50static const char d5[] = {0xCE, 0x92, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F,
51 0x74, 0x20, 0x42, 0x00};
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +020052
53/* Illegal utf-8 strings */
54static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
55static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
56static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
Heinrich Schuchardtddbaff52021-02-27 14:08:37 +010057static const char j4[] = {0xa1, 0x00};
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +020058
Heinrich Schuchardt02b31dc2019-07-14 17:47:46 +020059static int unicode_test_u16_strlen(struct unit_test_state *uts)
60{
61 ut_asserteq(6, u16_strlen(c1));
62 ut_asserteq(8, u16_strlen(c2));
63 ut_asserteq(3, u16_strlen(c3));
64 ut_asserteq(6, u16_strlen(c4));
65 return 0;
66}
67UNICODE_TEST(unicode_test_u16_strlen);
68
Heinrich Schuchardtf823e322022-12-18 05:32:14 +000069static int unicode_test_u16_strnlen(struct unit_test_state *uts)
70{
71 ut_asserteq(0, u16_strnlen(c1, 0));
72 ut_asserteq(4, u16_strnlen(c1, 4));
73 ut_asserteq(6, u16_strnlen(c1, 6));
74 ut_asserteq(6, u16_strnlen(c1, 7));
75
76 return 0;
77}
78UNICODE_TEST(unicode_test_u16_strnlen);
79
Heinrich Schuchardtbc196812019-02-15 23:12:50 +010080static int unicode_test_u16_strdup(struct unit_test_state *uts)
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +010081{
82 u16 *copy = u16_strdup(c4);
83
84 ut_assert(copy != c4);
Simon Glassf91f3662020-05-10 12:52:45 -060085 ut_asserteq_mem(copy, c4, sizeof(c4));
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +010086 free(copy);
Simon Glassf91f3662020-05-10 12:52:45 -060087
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +010088 return 0;
89}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +010090UNICODE_TEST(unicode_test_u16_strdup);
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +010091
Heinrich Schuchardtbc196812019-02-15 23:12:50 +010092static int unicode_test_u16_strcpy(struct unit_test_state *uts)
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +010093{
94 u16 *r;
95 u16 copy[10];
96
97 r = u16_strcpy(copy, c1);
98 ut_assert(r == copy);
Simon Glassf91f3662020-05-10 12:52:45 -060099 ut_asserteq_mem(copy, c1, sizeof(c1));
100
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +0100101 return 0;
102}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100103UNICODE_TEST(unicode_test_u16_strcpy);
Heinrich Schuchardtabb93cb2018-12-14 22:00:37 +0100104
Heinrich Schuchardtfbba2f62018-08-31 21:31:30 +0200105/* U-Boot uses UTF-16 strings in the EFI context only. */
106#if CONFIG_IS_ENABLED(EFI_LOADER) && !defined(API_BUILD)
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100107static int unicode_test_string16(struct unit_test_state *uts)
Heinrich Schuchardtfbba2f62018-08-31 21:31:30 +0200108{
109 char buf[20];
Heinrich Schuchardtc672dd72022-01-29 18:28:08 +0100110 int ret;
Heinrich Schuchardtfbba2f62018-08-31 21:31:30 +0200111
112 /* Test length and precision */
113 memset(buf, 0xff, sizeof(buf));
114 sprintf(buf, "%8.6ls", c2);
115 ut_asserteq(' ', buf[1]);
116 ut_assert(!strncmp(&buf[2], d2, 7));
117 ut_assert(!buf[9]);
118
119 memset(buf, 0xff, sizeof(buf));
120 sprintf(buf, "%8.6ls", c4);
121 ut_asserteq(' ', buf[4]);
122 ut_assert(!strncmp(&buf[5], d4, 12));
123 ut_assert(!buf[17]);
124
125 memset(buf, 0xff, sizeof(buf));
126 sprintf(buf, "%-8.2ls", c4);
127 ut_asserteq(' ', buf[8]);
128 ut_assert(!strncmp(buf, d4, 8));
129 ut_assert(!buf[14]);
130
131 /* Test handling of illegal utf-16 sequences */
132 memset(buf, 0xff, sizeof(buf));
133 sprintf(buf, "%ls", i1);
134 ut_asserteq_str("i1?l", buf);
135
136 memset(buf, 0xff, sizeof(buf));
137 sprintf(buf, "%ls", i2);
138 ut_asserteq_str("i2?l", buf);
139
140 memset(buf, 0xff, sizeof(buf));
141 sprintf(buf, "%ls", i3);
142 ut_asserteq_str("i3?", buf);
143
Heinrich Schuchardtc672dd72022-01-29 18:28:08 +0100144 memset(buf, 0xff, sizeof(buf));
145 ret = snprintf(buf, 4, "%ls", c1);
146 ut_asserteq(6, ret);
147 ut_asserteq_str("U-B", buf);
148
149 memset(buf, 0xff, sizeof(buf));
150 ret = snprintf(buf, 6, "%ls", c2);
151 ut_asserteq_str("kafb", buf);
152 ut_asserteq(9, ret);
153
154 memset(buf, 0xff, sizeof(buf));
155 ret = snprintf(buf, 7, "%ls", c2);
156 ut_asserteq_str("kafb\xC3\xA1", buf);
157 ut_asserteq(9, ret);
158
159 memset(buf, 0xff, sizeof(buf));
160 ret = snprintf(buf, 8, "%ls", c3);
161 ut_asserteq_str("\xE6\xBD\x9C\xE6\xB0\xB4", buf);
162 ut_asserteq(9, ret);
163
164 memset(buf, 0xff, sizeof(buf));
165 ret = snprintf(buf, 11, "%ls", c4);
166 ut_asserteq_str("\xF0\x90\x92\x8D\xF0\x90\x92\x96", buf);
167 ut_asserteq(12, ret);
168
169 memset(buf, 0xff, sizeof(buf));
170 ret = snprintf(buf, 4, "%ls", c4);
171 ut_asserteq_str("", buf);
172 ut_asserteq(12, ret);
173
Heinrich Schuchardtfbba2f62018-08-31 21:31:30 +0200174 return 0;
175}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100176UNICODE_TEST(unicode_test_string16);
Heinrich Schuchardtfbba2f62018-08-31 21:31:30 +0200177#endif
178
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100179static int unicode_test_utf8_get(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200180{
181 const char *s;
182 s32 code;
183 int i;
184
185 /* Check characters less than 0x800 */
186 s = d2;
187 for (i = 0; i < 8; ++i) {
188 code = utf8_get((const char **)&s);
189 /* c2 is the utf-8 encoding of d2 */
190 ut_asserteq(c2[i], code);
191 if (!code)
192 break;
193 }
Marek Vasutfa847bb2023-03-10 04:33:13 +0100194 ut_asserteq_ptr(s, d2 + 9);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200195
196 /* Check characters less than 0x10000 */
197 s = d3;
198 for (i = 0; i < 4; ++i) {
199 code = utf8_get((const char **)&s);
200 /* c3 is the utf-8 encoding of d3 */
201 ut_asserteq(c3[i], code);
202 if (!code)
203 break;
204 }
Marek Vasutfa847bb2023-03-10 04:33:13 +0100205 ut_asserteq_ptr(s, d3 + 9);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200206
207 /* Check character greater 0xffff */
208 s = d4;
209 code = utf8_get((const char **)&s);
210 ut_asserteq(0x0001048d, code);
211 ut_asserteq_ptr(s, d4 + 4);
212
Heinrich Schuchardtddbaff52021-02-27 14:08:37 +0100213 /* Check illegal character */
214 s = j4;
215 code = utf8_get((const char **)&s);
216 ut_asserteq(-1, code);
217 ut_asserteq_ptr(j4 + 1, s);
218
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200219 return 0;
220}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100221UNICODE_TEST(unicode_test_utf8_get);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200222
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100223static int unicode_test_utf8_put(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200224{
225 char buffer[8] = { 0, };
226 char *pos;
227
228 /* Commercial at, translates to one character */
229 pos = buffer;
Marek Vasutfa847bb2023-03-10 04:33:13 +0100230 ut_assert(!utf8_put('@', &pos));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200231 ut_asserteq(1, pos - buffer);
232 ut_asserteq('@', buffer[0]);
233 ut_assert(!buffer[1]);
234
235 /* Latin letter G with acute, translates to two charactes */
236 pos = buffer;
237 ut_assert(!utf8_put(0x1f4, &pos));
238 ut_asserteq(2, pos - buffer);
239 ut_asserteq_str("\xc7\xb4", buffer);
240
241 /* Tagalog letter i, translates to three characters */
242 pos = buffer;
243 ut_assert(!utf8_put(0x1701, &pos));
244 ut_asserteq(3, pos - buffer);
245 ut_asserteq_str("\xe1\x9c\x81", buffer);
246
247 /* Hamster face, translates to four characters */
248 pos = buffer;
249 ut_assert(!utf8_put(0x1f439, &pos));
250 ut_asserteq(4, pos - buffer);
251 ut_asserteq_str("\xf0\x9f\x90\xb9", buffer);
252
253 /* Illegal code */
254 pos = buffer;
255 ut_asserteq(-1, utf8_put(0xd888, &pos));
256
257 return 0;
258}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100259UNICODE_TEST(unicode_test_utf8_put);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200260
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100261static int unicode_test_utf8_utf16_strlen(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200262{
263 ut_asserteq(6, utf8_utf16_strlen(d1));
264 ut_asserteq(8, utf8_utf16_strlen(d2));
265 ut_asserteq(3, utf8_utf16_strlen(d3));
266 ut_asserteq(6, utf8_utf16_strlen(d4));
267
268 /* illegal utf-8 sequences */
269 ut_asserteq(4, utf8_utf16_strlen(j1));
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +0200270 ut_asserteq(4, utf8_utf16_strlen(j2));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200271 ut_asserteq(3, utf8_utf16_strlen(j3));
272
273 return 0;
274}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100275UNICODE_TEST(unicode_test_utf8_utf16_strlen);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200276
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100277static int unicode_test_utf8_utf16_strnlen(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200278{
279 ut_asserteq(3, utf8_utf16_strnlen(d1, 3));
280 ut_asserteq(6, utf8_utf16_strnlen(d1, 13));
281 ut_asserteq(6, utf8_utf16_strnlen(d2, 6));
282 ut_asserteq(2, utf8_utf16_strnlen(d3, 2));
283 ut_asserteq(4, utf8_utf16_strnlen(d4, 2));
284 ut_asserteq(6, utf8_utf16_strnlen(d4, 3));
285
286 /* illegal utf-8 sequences */
287 ut_asserteq(4, utf8_utf16_strnlen(j1, 16));
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +0200288 ut_asserteq(4, utf8_utf16_strnlen(j2, 16));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200289 ut_asserteq(3, utf8_utf16_strnlen(j3, 16));
290
291 return 0;
292}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100293UNICODE_TEST(unicode_test_utf8_utf16_strnlen);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200294
295/**
296 * ut_u16_strcmp() - Compare to u16 strings.
297 *
298 * @a1: first string
299 * @a2: second string
300 * @count: number of u16 to compare
301 * Return: -1 if a1 < a2, 0 if a1 == a2, 1 if a1 > a2
302 */
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100303static int unicode_test_u16_strcmp(const u16 *a1, const u16 *a2, size_t count)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200304{
305 for (; (*a1 || *a2) && count; ++a1, ++a2, --count) {
306 if (*a1 < *a2)
307 return -1;
308 if (*a1 > *a2)
309 return 1;
310 }
311 return 0;
312}
313
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100314static int unicode_test_utf8_utf16_strcpy(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200315{
316 u16 buf[16];
317 u16 *pos;
318
319 pos = buf;
320 utf8_utf16_strcpy(&pos, d1);
321 ut_asserteq(6, pos - buf);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100322 ut_assert(!unicode_test_u16_strcmp(buf, c1, SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200323
324 pos = buf;
325 utf8_utf16_strcpy(&pos, d2);
326 ut_asserteq(8, pos - buf);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100327 ut_assert(!unicode_test_u16_strcmp(buf, c2, SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200328
329 pos = buf;
330 utf8_utf16_strcpy(&pos, d3);
331 ut_asserteq(3, pos - buf);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100332 ut_assert(!unicode_test_u16_strcmp(buf, c3, SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200333
334 pos = buf;
335 utf8_utf16_strcpy(&pos, d4);
336 ut_asserteq(6, pos - buf);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100337 ut_assert(!unicode_test_u16_strcmp(buf, c4, SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200338
339 /* Illegal utf-8 strings */
340 pos = buf;
341 utf8_utf16_strcpy(&pos, j1);
342 ut_asserteq(4, pos - buf);
Simon Glass5b9a5b22022-01-23 12:55:14 -0700343 ut_assert(!unicode_test_u16_strcmp(buf, u"j1?l", SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200344
345 pos = buf;
346 utf8_utf16_strcpy(&pos, j2);
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +0200347 ut_asserteq(4, pos - buf);
Simon Glass5b9a5b22022-01-23 12:55:14 -0700348 ut_assert(!unicode_test_u16_strcmp(buf, u"j2?l", SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200349
350 pos = buf;
351 utf8_utf16_strcpy(&pos, j3);
352 ut_asserteq(3, pos - buf);
Simon Glass5b9a5b22022-01-23 12:55:14 -0700353 ut_assert(!unicode_test_u16_strcmp(buf, u"j3?", SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200354
355 return 0;
356}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100357UNICODE_TEST(unicode_test_utf8_utf16_strcpy);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200358
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100359static int unicode_test_utf8_utf16_strncpy(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200360{
361 u16 buf[16];
362 u16 *pos;
363
364 pos = buf;
365 memset(buf, 0, sizeof(buf));
366 utf8_utf16_strncpy(&pos, d1, 4);
367 ut_asserteq(4, pos - buf);
368 ut_assert(!buf[4]);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100369 ut_assert(!unicode_test_u16_strcmp(buf, c1, 4));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200370
371 pos = buf;
372 memset(buf, 0, sizeof(buf));
373 utf8_utf16_strncpy(&pos, d2, 10);
374 ut_asserteq(8, pos - buf);
375 ut_assert(buf[4]);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100376 ut_assert(!unicode_test_u16_strcmp(buf, c2, SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200377
378 pos = buf;
379 memset(buf, 0, sizeof(buf));
380 utf8_utf16_strncpy(&pos, d3, 2);
381 ut_asserteq(2, pos - buf);
382 ut_assert(!buf[2]);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100383 ut_assert(!unicode_test_u16_strcmp(buf, c3, 2));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200384
385 pos = buf;
386 memset(buf, 0, sizeof(buf));
387 utf8_utf16_strncpy(&pos, d4, 2);
388 ut_asserteq(4, pos - buf);
389 ut_assert(!buf[4]);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100390 ut_assert(!unicode_test_u16_strcmp(buf, c4, 4));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200391
392 pos = buf;
393 memset(buf, 0, sizeof(buf));
394 utf8_utf16_strncpy(&pos, d4, 10);
395 ut_asserteq(6, pos - buf);
396 ut_assert(buf[5]);
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100397 ut_assert(!unicode_test_u16_strcmp(buf, c4, SIZE_MAX));
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200398
399 return 0;
400}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100401UNICODE_TEST(unicode_test_utf8_utf16_strncpy);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200402
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100403static int unicode_test_utf16_get(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200404{
405 const u16 *s;
406 s32 code;
407 int i;
408
409 /* Check characters less than 0x10000 */
410 s = c2;
411 for (i = 0; i < 9; ++i) {
412 code = utf16_get((const u16 **)&s);
413 ut_asserteq(c2[i], code);
414 if (!code)
415 break;
416 }
417 ut_asserteq_ptr(c2 + 8, s);
418
419 /* Check character greater 0xffff */
420 s = c4;
421 code = utf16_get((const u16 **)&s);
422 ut_asserteq(0x0001048d, code);
423 ut_asserteq_ptr(c4 + 2, s);
424
425 return 0;
426}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100427UNICODE_TEST(unicode_test_utf16_get);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200428
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100429static int unicode_test_utf16_put(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200430{
431 u16 buffer[4] = { 0, };
432 u16 *pos;
433
434 /* Commercial at, translates to one word */
435 pos = buffer;
436 ut_assert(!utf16_put('@', &pos));
437 ut_asserteq(1, pos - buffer);
438 ut_asserteq((u16)'@', buffer[0]);
439 ut_assert(!buffer[1]);
440
441 /* Hamster face, translates to two words */
442 pos = buffer;
443 ut_assert(!utf16_put(0x1f439, &pos));
444 ut_asserteq(2, pos - buffer);
445 ut_asserteq((u16)0xd83d, buffer[0]);
446 ut_asserteq((u16)0xdc39, buffer[1]);
447 ut_assert(!buffer[2]);
448
449 /* Illegal code */
450 pos = buffer;
451 ut_asserteq(-1, utf16_put(0xd888, &pos));
452
453 return 0;
454}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100455UNICODE_TEST(unicode_test_utf16_put);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200456
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100457static int unicode_test_utf16_strnlen(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200458{
459 ut_asserteq(3, utf16_strnlen(c1, 3));
460 ut_asserteq(6, utf16_strnlen(c1, 13));
461 ut_asserteq(6, utf16_strnlen(c2, 6));
462 ut_asserteq(2, utf16_strnlen(c3, 2));
463 ut_asserteq(2, utf16_strnlen(c4, 2));
464 ut_asserteq(3, utf16_strnlen(c4, 3));
465
466 /* illegal utf-16 word sequences */
467 ut_asserteq(4, utf16_strnlen(i1, 16));
468 ut_asserteq(4, utf16_strnlen(i2, 16));
469 ut_asserteq(3, utf16_strnlen(i3, 16));
470
471 return 0;
472}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100473UNICODE_TEST(unicode_test_utf16_strnlen);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200474
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100475static int unicode_test_utf16_utf8_strlen(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200476{
477 ut_asserteq(6, utf16_utf8_strlen(c1));
478 ut_asserteq(9, utf16_utf8_strlen(c2));
479 ut_asserteq(9, utf16_utf8_strlen(c3));
480 ut_asserteq(12, utf16_utf8_strlen(c4));
481
482 /* illegal utf-16 word sequences */
483 ut_asserteq(4, utf16_utf8_strlen(i1));
484 ut_asserteq(4, utf16_utf8_strlen(i2));
485 ut_asserteq(3, utf16_utf8_strlen(i3));
486
487 return 0;
488}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100489UNICODE_TEST(unicode_test_utf16_utf8_strlen);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200490
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100491static int unicode_test_utf16_utf8_strnlen(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200492{
493 ut_asserteq(3, utf16_utf8_strnlen(c1, 3));
494 ut_asserteq(6, utf16_utf8_strnlen(c1, 13));
495 ut_asserteq(7, utf16_utf8_strnlen(c2, 6));
496 ut_asserteq(6, utf16_utf8_strnlen(c3, 2));
497 ut_asserteq(8, utf16_utf8_strnlen(c4, 2));
498 ut_asserteq(12, utf16_utf8_strnlen(c4, 3));
499 return 0;
500}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100501UNICODE_TEST(unicode_test_utf16_utf8_strnlen);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200502
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100503static int unicode_test_utf16_utf8_strcpy(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200504{
505 char buf[16];
506 char *pos;
507
508 pos = buf;
509 utf16_utf8_strcpy(&pos, c1);
510 ut_asserteq(6, pos - buf);
511 ut_asserteq_str(d1, buf);
512
513 pos = buf;
514 utf16_utf8_strcpy(&pos, c2);
515 ut_asserteq(9, pos - buf);
516 ut_asserteq_str(d2, buf);
517
518 pos = buf;
519 utf16_utf8_strcpy(&pos, c3);
520 ut_asserteq(9, pos - buf);
521 ut_asserteq_str(d3, buf);
522
523 pos = buf;
524 utf16_utf8_strcpy(&pos, c4);
525 ut_asserteq(12, pos - buf);
526 ut_asserteq_str(d4, buf);
527
528 /* Illegal utf-16 strings */
529 pos = buf;
530 utf16_utf8_strcpy(&pos, i1);
531 ut_asserteq(4, pos - buf);
532 ut_asserteq_str("i1?l", buf);
533
534 pos = buf;
535 utf16_utf8_strcpy(&pos, i2);
536 ut_asserteq(4, pos - buf);
537 ut_asserteq_str("i2?l", buf);
538
539 pos = buf;
540 utf16_utf8_strcpy(&pos, i3);
541 ut_asserteq(3, pos - buf);
542 ut_asserteq_str("i3?", buf);
543
544 return 0;
545}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100546UNICODE_TEST(unicode_test_utf16_utf8_strcpy);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200547
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100548static int unicode_test_utf16_utf8_strncpy(struct unit_test_state *uts)
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200549{
550 char buf[16];
551 char *pos;
552
553 pos = buf;
554 memset(buf, 0, sizeof(buf));
555 utf16_utf8_strncpy(&pos, c1, 4);
556 ut_asserteq(4, pos - buf);
557 ut_assert(!buf[4]);
558 ut_assert(!strncmp(buf, d1, 4));
559
560 pos = buf;
561 memset(buf, 0, sizeof(buf));
562 utf16_utf8_strncpy(&pos, c2, 10);
563 ut_asserteq(9, pos - buf);
564 ut_assert(buf[4]);
565 ut_assert(!strncmp(buf, d2, SIZE_MAX));
566
567 pos = buf;
568 memset(buf, 0, sizeof(buf));
569 utf16_utf8_strncpy(&pos, c3, 2);
570 ut_asserteq(6, pos - buf);
571 ut_assert(!buf[6]);
572 ut_assert(!strncmp(buf, d3, 6));
573
574 pos = buf;
575 memset(buf, 0, sizeof(buf));
576 utf16_utf8_strncpy(&pos, c4, 2);
577 ut_asserteq(8, pos - buf);
578 ut_assert(!buf[8]);
579 ut_assert(!strncmp(buf, d4, 8));
580
581 pos = buf;
582 memset(buf, 0, sizeof(buf));
583 utf16_utf8_strncpy(&pos, c4, 10);
584 ut_asserteq(12, pos - buf);
585 ut_assert(buf[5]);
586 ut_assert(!strncmp(buf, d4, SIZE_MAX));
587
588 return 0;
589}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100590UNICODE_TEST(unicode_test_utf16_utf8_strncpy);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200591
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100592static int unicode_test_utf_to_lower(struct unit_test_state *uts)
Heinrich Schuchardt1a1012a2018-09-04 19:34:57 +0200593{
594 ut_asserteq('@', utf_to_lower('@'));
595 ut_asserteq('a', utf_to_lower('A'));
596 ut_asserteq('z', utf_to_lower('Z'));
597 ut_asserteq('[', utf_to_lower('['));
598 ut_asserteq('m', utf_to_lower('m'));
599 /* Latin letter O with diaresis (umlaut) */
600 ut_asserteq(0x00f6, utf_to_lower(0x00d6));
601#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
602 /* Cyrillic letter I*/
603 ut_asserteq(0x0438, utf_to_lower(0x0418));
604#endif
605 return 0;
606}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100607UNICODE_TEST(unicode_test_utf_to_lower);
Heinrich Schuchardt1a1012a2018-09-04 19:34:57 +0200608
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100609static int unicode_test_utf_to_upper(struct unit_test_state *uts)
Heinrich Schuchardt1a1012a2018-09-04 19:34:57 +0200610{
611 ut_asserteq('`', utf_to_upper('`'));
612 ut_asserteq('A', utf_to_upper('a'));
613 ut_asserteq('Z', utf_to_upper('z'));
614 ut_asserteq('{', utf_to_upper('{'));
615 ut_asserteq('M', utf_to_upper('M'));
616 /* Latin letter O with diaresis (umlaut) */
617 ut_asserteq(0x00d6, utf_to_upper(0x00f6));
618#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
619 /* Cyrillic letter I */
620 ut_asserteq(0x0418, utf_to_upper(0x0438));
621#endif
622 return 0;
623}
Heinrich Schuchardtbc196812019-02-15 23:12:50 +0100624UNICODE_TEST(unicode_test_utf_to_upper);
Heinrich Schuchardt1a1012a2018-09-04 19:34:57 +0200625
Heinrich Schuchardt07355762022-12-29 14:44:04 +0100626static int unicode_test_u16_strcasecmp(struct unit_test_state *uts)
627{
628 ut_assert(u16_strcasecmp(u"abcd", u"abcd") == 0);
629 ut_assert(u16_strcasecmp(u"aBcd", u"abcd") == 0);
630 ut_assert(u16_strcasecmp(u"abcd", u"abCd") == 0);
631 ut_assert(u16_strcasecmp(u"abcdE", u"abcd") > 0);
632 ut_assert(u16_strcasecmp(u"abcd", u"abcdE") < 0);
633 ut_assert(u16_strcasecmp(u"abcE", u"abcd") > 0);
634 ut_assert(u16_strcasecmp(u"abcd", u"abcE") < 0);
635 ut_assert(u16_strcasecmp(u"abcd", u"abcd") == 0);
636 ut_assert(u16_strcasecmp(u"abcd", u"abcd") == 0);
637 if (CONFIG_IS_ENABLED(EFI_UNICODE_CAPITALIZATION)) {
638 /* Cyrillic letters */
639 ut_assert(u16_strcasecmp(u"\x043a\x043d\x0438\x0433\x0430",
640 u"\x041a\x041d\x0418\x0413\x0410") == 0);
641 ut_assert(u16_strcasecmp(u"\x043a\x043d\x0438\x0433\x0430",
642 u"\x041a\x041d\x0418\x0413\x0411") < 0);
643 ut_assert(u16_strcasecmp(u"\x043a\x043d\x0438\x0433\x0431",
644 u"\x041a\x041d\x0418\x0413\x0410") > 0);
645 }
646
647 return 0;
648}
649UNICODE_TEST(unicode_test_u16_strcasecmp);
650
AKASHI Takahiro79907a42019-09-18 10:26:30 +0900651static int unicode_test_u16_strncmp(struct unit_test_state *uts)
652{
Simon Glass5b9a5b22022-01-23 12:55:14 -0700653 ut_assert(u16_strncmp(u"abc", u"abc", 3) == 0);
654 ut_assert(u16_strncmp(u"abcdef", u"abcghi", 3) == 0);
655 ut_assert(u16_strncmp(u"abcdef", u"abcghi", 6) < 0);
656 ut_assert(u16_strncmp(u"abcghi", u"abcdef", 6) > 0);
657 ut_assert(u16_strcmp(u"abc", u"abc") == 0);
658 ut_assert(u16_strcmp(u"abcdef", u"deghi") < 0);
659 ut_assert(u16_strcmp(u"deghi", u"abcdef") > 0);
AKASHI Takahiro79907a42019-09-18 10:26:30 +0900660 return 0;
661}
662UNICODE_TEST(unicode_test_u16_strncmp);
663
Heinrich Schuchardtefe3b5c2020-05-09 09:16:49 +0200664static int unicode_test_u16_strsize(struct unit_test_state *uts)
665{
666 ut_asserteq_64(u16_strsize(c1), 14);
667 ut_asserteq_64(u16_strsize(c2), 18);
668 ut_asserteq_64(u16_strsize(c3), 8);
669 ut_asserteq_64(u16_strsize(c4), 14);
670 return 0;
671}
672UNICODE_TEST(unicode_test_u16_strsize);
673
Heinrich Schuchardt73bb90c2021-02-27 14:08:36 +0100674static int unicode_test_utf_to_cp(struct unit_test_state *uts)
675{
676 int ret;
677 s32 c;
678
679 c = '\n';
680 ret = utf_to_cp(&c, codepage_437);
681 ut_asserteq(0, ret);
682 ut_asserteq('\n', c);
683
684 c = 'a';
685 ret = utf_to_cp(&c, codepage_437);
686 ut_asserteq(0, ret);
687 ut_asserteq('a', c);
688
689 c = 0x03c4; /* Greek small letter tau */
690 ret = utf_to_cp(&c, codepage_437);
691 ut_asserteq(0, ret);
692 ut_asserteq(0xe7, c);
693
694 c = 0x03a4; /* Greek capital letter tau */
695 ret = utf_to_cp(&c, codepage_437);
696 ut_asserteq(-ENOENT, ret);
697 ut_asserteq('?', c);
698
699 return 0;
700}
701UNICODE_TEST(unicode_test_utf_to_cp);
702
Heinrich Schuchardte91789e2021-02-27 14:08:38 +0100703static void utf8_to_cp437_stream_helper(const char *in, char *out)
704{
705 char buffer[5];
706 int ret;
707
708 *buffer = 0;
709 for (; *in; ++in) {
710 ret = utf8_to_cp437_stream(*in, buffer);
711 if (ret)
712 *out++ = ret;
713 }
714 *out = 0;
715}
716
717static int unicode_test_utf8_to_cp437_stream(struct unit_test_state *uts)
718{
719 char buf[16];
720
721 utf8_to_cp437_stream_helper(d1, buf);
722 ut_asserteq_str("U-Boot", buf);
723 utf8_to_cp437_stream_helper(d2, buf);
724 ut_asserteq_str("kafb\xa0tur", buf);
725 utf8_to_cp437_stream_helper(d5, buf);
726 ut_asserteq_str("? is not B", buf);
727 utf8_to_cp437_stream_helper(j2, buf);
728 ut_asserteq_str("j2l", buf);
729
730 return 0;
731}
732UNICODE_TEST(unicode_test_utf8_to_cp437_stream);
733
734static void utf8_to_utf32_stream_helper(const char *in, s32 *out)
735{
736 char buffer[5];
737 int ret;
738
739 *buffer = 0;
740 for (; *in; ++in) {
741 ret = utf8_to_utf32_stream(*in, buffer);
742 if (ret)
743 *out++ = ret;
744 }
745 *out = 0;
746}
747
748static int unicode_test_utf8_to_utf32_stream(struct unit_test_state *uts)
749{
750 s32 buf[16];
751
752 const u32 u1[] = {0x55, 0x2D, 0x42, 0x6F, 0x6F, 0x74, 0x0000};
753 const u32 u2[] = {0x6B, 0x61, 0x66, 0x62, 0xE1, 0x74, 0x75, 0x72, 0x00};
Heinrich Schuchardtaeba3852024-01-18 18:57:12 +0100754 const u32 u3[] = {0x6f5c, 0x6c34, 0x8266};
Heinrich Schuchardte91789e2021-02-27 14:08:38 +0100755 const u32 u4[] = {0x6A, 0x32, 0x6C, 0x00};
Heinrich Schuchardtaeba3852024-01-18 18:57:12 +0100756 const u32 u5[] = {0x0392, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, 0x74,
757 0x20, 0x42, 0x00};
Heinrich Schuchardte91789e2021-02-27 14:08:38 +0100758
759 memset(buf, 0, sizeof(buf));
760 utf8_to_utf32_stream_helper(d1, buf);
761 ut_asserteq_mem(u1, buf, sizeof(u1));
762
763 memset(buf, 0, sizeof(buf));
764 utf8_to_utf32_stream_helper(d2, buf);
765 ut_asserteq_mem(u2, buf, sizeof(u2));
766
767 memset(buf, 0, sizeof(buf));
Heinrich Schuchardtaeba3852024-01-18 18:57:12 +0100768 utf8_to_utf32_stream_helper(d3, buf);
Heinrich Schuchardte91789e2021-02-27 14:08:38 +0100769 ut_asserteq_mem(u3, buf, sizeof(u3));
770
771 memset(buf, 0, sizeof(buf));
Heinrich Schuchardtaeba3852024-01-18 18:57:12 +0100772 utf8_to_utf32_stream_helper(d5, buf);
773 ut_asserteq_mem(u5, buf, sizeof(u5));
774
775 memset(buf, 0, sizeof(buf));
Heinrich Schuchardte91789e2021-02-27 14:08:38 +0100776 utf8_to_utf32_stream_helper(j2, buf);
777 ut_asserteq_mem(u4, buf, sizeof(u4));
778
779 return 0;
780}
781UNICODE_TEST(unicode_test_utf8_to_utf32_stream);
782
Heinrich Schuchardtaf114232020-10-30 12:23:59 +0100783#ifdef CONFIG_EFI_LOADER
784static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts)
785{
786 u16 buf[16];
Simon Glass5b9a5b22022-01-23 12:55:14 -0700787 u16 const expected[] = u"Capsule0AF9";
Heinrich Schuchardtaf114232020-10-30 12:23:59 +0100788 u16 *pos;
789
790 memset(buf, 0xeb, sizeof(buf));
Ilias Apalodimasfe179d72020-12-31 12:26:46 +0200791 pos = efi_create_indexed_name(buf, sizeof(buf), "Capsule", 0x0af9);
Heinrich Schuchardtaf114232020-10-30 12:23:59 +0100792
793 ut_asserteq_mem(expected, buf, sizeof(expected));
794 ut_asserteq(pos - buf, u16_strnlen(buf, SIZE_MAX));
795
796 return 0;
797}
798UNICODE_TEST(unicode_test_efi_create_indexed_name);
799#endif
800
Masahisa Kojimab8cd1e72022-04-28 17:09:35 +0900801static int unicode_test_u16_strlcat(struct unit_test_state *uts)
802{
803 u16 buf[40];
804 u16 dest[] = {0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0};
805 u16 src[] = {0x03B1, 0x2172, 0x6F5C, 0x8247, 0};
806 u16 concat_str[] = {0x3053, 0x3093, 0x306b, 0x3061, 0x306f,
807 0x03B1, 0x2172, 0x6F5C, 0x8247, 0};
808 u16 null_src = u'\0';
809 size_t ret, expected;
810 int i;
811
812 /* dest and src are empty string */
813 memset(buf, 0, sizeof(buf));
Dan Carpenterbe5f9a72023-07-27 10:12:58 +0300814 ret = u16_strlcat(buf, &null_src, ARRAY_SIZE(buf));
Matthias Schiffer7c00b802023-07-14 13:24:51 +0200815 ut_asserteq(0, ret);
Masahisa Kojimab8cd1e72022-04-28 17:09:35 +0900816
817 /* dest is empty string */
818 memset(buf, 0, sizeof(buf));
Dan Carpenterbe5f9a72023-07-27 10:12:58 +0300819 ret = u16_strlcat(buf, src, ARRAY_SIZE(buf));
Matthias Schiffer7c00b802023-07-14 13:24:51 +0200820 ut_asserteq(4, ret);
Masahisa Kojimab8cd1e72022-04-28 17:09:35 +0900821 ut_assert(!unicode_test_u16_strcmp(buf, src, 40));
822
823 /* src is empty string */
824 memset(buf, 0xCD, (sizeof(buf) - sizeof(u16)));
825 buf[39] = 0;
826 memcpy(buf, dest, sizeof(dest));
Dan Carpenterbe5f9a72023-07-27 10:12:58 +0300827 ret = u16_strlcat(buf, &null_src, ARRAY_SIZE(buf));
Matthias Schiffer7c00b802023-07-14 13:24:51 +0200828 ut_asserteq(5, ret);
Masahisa Kojimab8cd1e72022-04-28 17:09:35 +0900829 ut_assert(!unicode_test_u16_strcmp(buf, dest, 40));
830
831 for (i = 0; i <= 40; i++) {
832 memset(buf, 0xCD, (sizeof(buf) - sizeof(u16)));
833 buf[39] = 0;
834 memcpy(buf, dest, sizeof(dest));
Matthias Schiffer7c00b802023-07-14 13:24:51 +0200835 expected = min(5, i) + 4;
Masahisa Kojimab8cd1e72022-04-28 17:09:35 +0900836 ret = u16_strlcat(buf, src, i);
837 ut_asserteq(expected, ret);
838 if (i <= 6) {
839 ut_assert(!unicode_test_u16_strcmp(buf, dest, 40));
840 } else if (i < 10) {
841 ut_assert(!unicode_test_u16_strcmp(buf, concat_str, i - 1));
842 } else {
843 ut_assert(!unicode_test_u16_strcmp(buf, concat_str, 40));
844 }
845 }
846
847 return 0;
848}
849UNICODE_TEST(unicode_test_u16_strlcat);
850
Simon Glass09140112020-05-10 11:40:03 -0600851int do_ut_unicode(struct cmd_tbl *cmdtp, int flag, int argc, char *const argv[])
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200852{
Simon Glassa7a98752021-03-07 17:35:10 -0700853 struct unit_test *tests = UNIT_TEST_SUITE_START(unicode_test);
854 const int n_ents = UNIT_TEST_SUITE_COUNT(unicode_test);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200855
Philippe Reynes4ad4edf2019-12-17 19:07:04 +0100856 return cmd_ut_category("Unicode", "unicode_test_",
857 tests, n_ents, argc, argv);
Heinrich Schuchardtf11a1642018-08-31 21:31:28 +0200858}