TGUI  0.9.5
Loading...
Searching...
No Matches
Utf.hpp
1
2//
3// TGUI - Texus' Graphical User Interface
4// Copyright (C) 2012-2022 Bruno Van de Velde (vdv_b@tgui.eu)
5//
6// This software is provided 'as-is', without any express or implied warranty.
7// In no event will the authors be held liable for any damages arising from the use of this software.
8//
9// Permission is granted to anyone to use this software for any purpose,
10// including commercial applications, and to alter it and redistribute it freely,
11// subject to the following restrictions:
12//
13// 1. The origin of this software must not be misrepresented;
14// you must not claim that you wrote the original software.
15// If you use this software in a product, an acknowledgment
16// in the product documentation would be appreciated but is not required.
17//
18// 2. Altered source versions must be plainly marked as such,
19// and must not be misrepresented as being the original software.
20//
21// 3. This notice may not be removed or altered from any source distribution.
22//
24
25
26#ifndef TGUI_UTF_HPP
27#define TGUI_UTF_HPP
28
29#include <TGUI/Config.hpp>
30#include <cstdint>
31#include <string>
32#include <array>
33
35
36// Disable warning in Visual Studio about being able to use "if constexpr".
37// The code would use "if constexpr" if the compiler would just define __cpp_if_constexpr
38#if defined TGUI_SYSTEM_WINDOWS && defined _MSC_VER
39 #pragma warning(push)
40 #pragma warning(disable:4127)
41#endif
42
43namespace tgui
44{
45 namespace utf
46 {
52 template <typename CharT> // CharT is either char or char8_t
53 void encodeCharUtf8(char32_t input, std::basic_string<CharT>& outStrUtf8)
54 {
55 if (input < 128)
56 {
57 outStrUtf8.push_back(static_cast<CharT>(input));
58 return;
59 }
60
61 // Encode the character (if it is valid)
62 if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
63 return;
64
65 // Get the number of bytes to write
66 std::size_t bytestoWrite;
67 if (input < 0x800)
68 bytestoWrite = 2;
69 else if (input < 0x10000)
70 bytestoWrite = 3;
71 else if (input <= 0x0010FFFF)
72 bytestoWrite = 4;
73 else
74 return;
75
76 static const std::uint8_t firstByteMask[5] = { 0, 0, 0xC0, 0xE0, 0xF0 };
77
78 // Extract the bytes to write
79 std::array<CharT, 4> bytes;
80 if (bytestoWrite == 4) { bytes[3] = static_cast<CharT>((input | 0x80) & 0xBF); input >>= 6; }
81 if (bytestoWrite >= 3) { bytes[2] = static_cast<CharT>((input | 0x80) & 0xBF); input >>= 6; }
82 if (bytestoWrite >= 2) { bytes[1] = static_cast<CharT>((input | 0x80) & 0xBF); input >>= 6; }
83 if (bytestoWrite >= 1) { bytes[0] = static_cast<CharT>(input | firstByteMask[bytestoWrite]); }
84
85 // Add them to the output
86 outStrUtf8.append(bytes.begin(), bytes.begin() + bytestoWrite);
87 }
88
89
97 template <typename CharIt> // CharIt is an iterator for a string containing either char or char8_t
98 CharIt decodeCharUtf8(CharIt inputCharIt, CharIt inputEndIt, std::u32string& outStrUtf32)
99 {
100 if (static_cast<std::uint8_t>(*inputCharIt) < 128)
101 {
102 outStrUtf32.push_back(static_cast<char32_t>(static_cast<std::uint8_t>(*inputCharIt)));
103 return ++inputCharIt;
104 }
105
106 // Some useful precomputed data
107 static const std::uint32_t offsetsMap[6] = { 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
108 static const std::uint8_t trailingMap[128] =
109 {
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
111 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
114 };
115
116 // decode the character
117 const std::uint8_t trailingBytes = trailingMap[static_cast<std::uint8_t>(*inputCharIt) - 128];
118 const std::uint32_t offset = offsetsMap[trailingBytes];
119 const auto remainingBytes = std::distance(inputCharIt, inputEndIt) - 1;
120 if (remainingBytes >= static_cast<decltype(remainingBytes)>(trailingBytes))
121 {
122 char32_t outputChar = 0;
123 for (std::uint8_t i = 0; i < trailingBytes; ++i)
124 {
125 outputChar += static_cast<char32_t>(static_cast<std::uint8_t>(*inputCharIt++));
126 outputChar <<= 6;
127 }
128
129 outputChar += static_cast<char32_t>(static_cast<std::uint8_t>(*inputCharIt++));
130 outputChar -= offset;
131 outStrUtf32.push_back(outputChar);
132 }
133 else // Incomplete character
134 inputCharIt = inputEndIt;
135
136 return inputCharIt;
137 }
138
139
140#if defined(__cpp_lib_char8_t) && (__cpp_lib_char8_t >= 201811L)
146 inline std::u8string convertUtf32toUtf8(const std::u32string& strUtf32)
147 {
148 std::u8string outStrUtf8;
149 outStrUtf8.reserve(strUtf32.length() + 1);
150 for (const char32_t& codepoint : strUtf32)
151 encodeCharUtf8(codepoint, outStrUtf8);
152
153 return outStrUtf8;
154 }
155#endif
156
163 template <typename CharIt>
164 std::u32string convertUtf8toUtf32(CharIt inputBegin, CharIt inputEnd)
165 {
166 std::u32string outStrUtf32;
167 outStrUtf32.reserve((inputEnd - inputBegin) + 1);
168
169 auto it = inputBegin;
170 while (it < inputEnd)
171 it = decodeCharUtf8(it, inputEnd, outStrUtf32);
172
173 return outStrUtf32;
174 }
175
176
183 template <typename U16CharIt>
184 std::u32string convertUtf16toUtf32(U16CharIt inputBegin, U16CharIt inputEnd)
185 {
186 std::u32string outStrUtf32;
187 outStrUtf32.reserve((inputEnd - inputBegin) + 1);
188
189 auto it = inputBegin;
190 while (it < inputEnd)
191 {
192 const char16_t first = *it++;
193
194 // Copy the character if it isn't a surrogate pair
195 if ((first < 0xD800) || (first > 0xDBFF))
196 {
197 outStrUtf32.push_back(static_cast<char32_t>(first));
198 continue;
199 }
200
201 // We need to read another character
202 if (it == inputEnd)
203 break;
204
205 const char16_t second = *it++;
206 if ((second >= 0xDC00) && (second <= 0xDFFF))
207 outStrUtf32.push_back(((static_cast<char32_t>(first) - 0xD800) << 10) + (static_cast<char32_t>(second) - 0xDC00) + 0x0010000);
208 }
209
210
211 return outStrUtf32;
212 }
213
214
220 template <typename WCharIt>
221 std::u32string convertWidetoUtf32(WCharIt inputBegin, WCharIt inputEnd)
222 {
223 std::u32string outStrUtf32;
224 outStrUtf32.reserve((inputEnd - inputBegin) + 1);
225
226 // std::wstring uses UCS-2 on Windows and UCS-4 on unix, so we can be cast directly
227 for (auto it = inputBegin; it != inputEnd; ++it)
228 outStrUtf32.push_back(static_cast<char32_t>(*it));
229
230
231 return outStrUtf32;
232 }
233
234
240 inline std::string convertUtf32toLatin1(const std::u32string& strUtf32)
241 {
242 std::string outStr;
243 outStr.reserve(strUtf32.length() + 1);
244 for (const char32_t codepoint : strUtf32)
245 {
246 if (codepoint < 256)
247 outStr.push_back(static_cast<char>(codepoint));
248 }
249
250 return outStr;
251 }
252
253
259 inline std::string convertUtf32toStdStringUtf8(const std::u32string& strUtf32)
260 {
261 std::string outStrUtf8;
262 outStrUtf8.reserve(strUtf32.length() + 1);
263 for (const char32_t codepoint : strUtf32)
264 encodeCharUtf8(codepoint, outStrUtf8);
265
266 return outStrUtf8;
267 }
268
269
275 inline std::wstring convertUtf32toWide(const std::u32string& strUtf32)
276 {
277 std::wstring outStr;
278 outStr.reserve(strUtf32.length() + 1);
279
280#if defined(__cpp_if_constexpr) && (__cpp_if_constexpr >= 201606L)
281 if constexpr (sizeof(wchar_t) == 4)
282#else
283 if (sizeof(wchar_t) == 4)
284#endif
285 {
286 // On Unix, wide characters are UCS-4 and we can just copy the characters
287 for (const char32_t codepoint : strUtf32)
288 outStr.push_back(static_cast<wchar_t>(codepoint));
289 }
290 else
291 {
292 // On Windows, wide characters are UCS-2. We just drop the characters that don't fit within a single wide character here.
293 for (const char32_t codepoint : strUtf32)
294 {
295 if ((codepoint < 0xD800) || ((codepoint > 0xDFFF) && (codepoint <= 0xFFFF)))
296 outStr.push_back(static_cast<wchar_t>(codepoint));
297 }
298 }
299
300 return outStr;
301 }
302
303
309 inline std::u16string convertUtf32toUtf16(const std::u32string& strUtf32)
310 {
311 std::u16string outStrUtf16;
312 outStrUtf16.reserve(strUtf32.length() + 1);
313
314 for (const char32_t codepoint : strUtf32)
315 {
316 // If the codepoint fitst inside 2 bytes and it would represent a valid character then just copy it
317 if (codepoint <= 0xFFFF)
318 {
319 if ((codepoint < 0xD800) || (codepoint > 0xDFFF))
320 outStrUtf16.push_back(static_cast<char16_t>(codepoint));
321
322 continue;
323 }
324 else if (codepoint > 0x0010FFFF)
325 continue; // Invalid character (greater than the maximum Unicode value)
326
327 // The input character needs be converted to two UTF-16 elements
328 outStrUtf16.push_back(static_cast<char16_t>(((codepoint - 0x0010000) >> 10) + 0xD800));
329 outStrUtf16.push_back(static_cast<char16_t>(((codepoint - 0x0010000) & 0x3FFUL) + 0xDC00));
330 }
331
332 return outStrUtf16;
333 }
334
336 }
337}
338
339#if defined TGUI_SYSTEM_WINDOWS && defined _MSC_VER
340 #pragma warning(pop)
341#endif
342
344
345#endif // TGUI_UTF_HPP
Namespace that contains all TGUI functions and classes.
Definition AbsoluteOrRelativeValue.hpp:36