OpenShot Library | libopenshot-audio 0.2.0
juce_CharPointer_UTF16.h
1
2/** @weakgroup juce_core-text
3 * @{
4 */
5/*
6 ==============================================================================
7
8 This file is part of the JUCE library.
9 Copyright (c) 2017 - ROLI Ltd.
10
11 JUCE is an open source library subject to commercial or open-source
12 licensing.
13
14 The code included in this file is provided under the terms of the ISC license
15 http://www.isc.org/downloads/software-support-policy/isc-license. Permission
16 To use, copy, modify, and/or distribute this software for any purpose with or
17 without fee is hereby granted provided that the above copyright notice and
18 this permission notice appear in all copies.
19
20 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
21 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
22 DISCLAIMED.
23
24 ==============================================================================
25*/
26
27namespace juce
28{
29
30//==============================================================================
31/**
32 Wraps a pointer to a null-terminated UTF-16 character string, and provides
33 various methods to operate on the data.
34 @see CharPointer_UTF8, CharPointer_UTF32
35
36 @tags{Core}
37*/
39{
40public:
41 #if JUCE_NATIVE_WCHAR_IS_UTF16
42 using CharType = wchar_t;
43 #else
44 using CharType = int16;
45 #endif
46
47 inline explicit CharPointer_UTF16 (const CharType* rawPointer) noexcept
48 : data (const_cast<CharType*> (rawPointer))
49 {
50 }
51
52 inline CharPointer_UTF16 (const CharPointer_UTF16& other) = default;
53
55 {
56 data = other.data;
57 return *this;
58 }
59
60 inline CharPointer_UTF16 operator= (const CharType* text) noexcept
61 {
62 data = const_cast<CharType*> (text);
63 return *this;
64 }
65
66 /** This is a pointer comparison, it doesn't compare the actual text. */
67 inline bool operator== (CharPointer_UTF16 other) const noexcept { return data == other.data; }
68 inline bool operator!= (CharPointer_UTF16 other) const noexcept { return data != other.data; }
69 inline bool operator<= (CharPointer_UTF16 other) const noexcept { return data <= other.data; }
70 inline bool operator< (CharPointer_UTF16 other) const noexcept { return data < other.data; }
71 inline bool operator>= (CharPointer_UTF16 other) const noexcept { return data >= other.data; }
72 inline bool operator> (CharPointer_UTF16 other) const noexcept { return data > other.data; }
73
74 /** Returns the address that this pointer is pointing to. */
75 inline CharType* getAddress() const noexcept { return data; }
76
77 /** Returns the address that this pointer is pointing to. */
78 inline operator const CharType*() const noexcept { return data; }
79
80 /** Returns true if this pointer is pointing to a null character. */
81 inline bool isEmpty() const noexcept { return *data == 0; }
82
83 /** Returns true if this pointer is not pointing to a null character. */
84 inline bool isNotEmpty() const noexcept { return *data != 0; }
85
86 /** Returns the unicode character that this pointer is pointing to. */
88 {
89 auto n = (uint32) (uint16) *data;
90
91 if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) data[1]) >= 0xdc00)
92 n = 0x10000 + (((n - 0xd800) << 10) | (((uint32) (uint16) data[1]) - 0xdc00));
93
94 return (juce_wchar) n;
95 }
96
97 /** Moves this pointer along to the next character in the string. */
99 {
100 auto n = (uint32) (uint16) *data++;
101
102 if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00)
103 ++data;
104
105 return *this;
106 }
107
108 /** Moves this pointer back to the previous character in the string. */
110 {
111 auto n = (uint32) (uint16) (*--data);
112
113 if (n >= 0xdc00 && n <= 0xdfff)
114 --data;
115
116 return *this;
117 }
118
119 /** Returns the character that this pointer is currently pointing to, and then
120 advances the pointer to point to the next character. */
122 {
123 auto n = (uint32) (uint16) *data++;
124
125 if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00)
126 n = 0x10000 + ((((n - 0xd800) << 10) | (((uint32) (uint16) *data++) - 0xdc00)));
127
128 return (juce_wchar) n;
129 }
130
131 /** Moves this pointer along to the next character in the string. */
133 {
134 auto temp (*this);
135 ++*this;
136 return temp;
137 }
138
139 /** Moves this pointer forwards by the specified number of characters. */
140 void operator+= (int numToSkip) noexcept
141 {
142 if (numToSkip < 0)
143 {
144 while (++numToSkip <= 0)
145 --*this;
146 }
147 else
148 {
149 while (--numToSkip >= 0)
150 ++*this;
151 }
152 }
153
154 /** Moves this pointer backwards by the specified number of characters. */
155 void operator-= (int numToSkip) noexcept
156 {
158 }
159
160 /** Returns the character at a given character index from the start of the string. */
162 {
163 auto p (*this);
164 p += characterIndex;
165 return *p;
166 }
167
168 /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
170 {
171 auto p (*this);
172 p += numToSkip;
173 return p;
174 }
175
176 /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
178 {
179 auto p (*this);
180 p += -numToSkip;
181 return p;
182 }
183
184 /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
186 {
187 if (charToWrite >= 0x10000)
188 {
189 charToWrite -= 0x10000;
190 *data++ = (CharType) (0xd800 + (charToWrite >> 10));
191 *data++ = (CharType) (0xdc00 + (charToWrite & 0x3ff));
192 }
193 else
194 {
195 *data++ = (CharType) charToWrite;
196 }
197 }
198
199 /** Writes a null character to this string (leaving the pointer's position unchanged). */
201 {
202 *data = 0;
203 }
204
205 /** Returns the number of characters in this string. */
207 {
208 auto* d = data;
209 size_t count = 0;
210
211 for (;;)
212 {
213 auto n = (uint32) (uint16) *d++;
214
215 if (n >= 0xd800 && n <= 0xdfff)
216 {
217 if (*d++ == 0)
218 break;
219 }
220 else if (n == 0)
221 break;
222
223 ++count;
224 }
225
226 return count;
227 }
228
229 /** Returns the number of characters in this string, or the given value, whichever is lower. */
230 size_t lengthUpTo (size_t maxCharsToCount) const noexcept
231 {
233 }
234
235 /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
236 size_t lengthUpTo (CharPointer_UTF16 end) const noexcept
237 {
238 return CharacterFunctions::lengthUpTo (*this, end);
239 }
240
241 /** Returns the number of bytes that are used to represent this string.
242 This includes the terminating null character.
243 */
245 {
246 return sizeof (CharType) * (findNullIndex (data) + 1);
247 }
248
249 /** Returns the number of bytes that would be needed to represent the given
250 unicode character in this encoding format.
251 */
253 {
254 return (charToWrite >= 0x10000) ? (sizeof (CharType) * 2) : sizeof (CharType);
255 }
256
257 /** Returns the number of bytes that would be needed to represent the given
258 string in this encoding format.
259 The value returned does NOT include the terminating null character.
260 */
261 template <class CharPointer>
262 static size_t getBytesRequiredFor (CharPointer text) noexcept
263 {
264 size_t count = 0;
266
267 while ((n = text.getAndAdvance()) != 0)
268 count += getBytesRequiredFor (n);
269
270 return count;
271 }
272
273 /** Returns a pointer to the null character that terminates this string. */
275 {
276 auto* t = data;
277
278 while (*t != 0)
279 ++t;
280
281 return CharPointer_UTF16 (t);
282 }
283
284 /** Copies a source string to this pointer, advancing this pointer as it goes. */
285 template <typename CharPointer>
286 void writeAll (CharPointer src) noexcept
287 {
289 }
290
291 /** Copies a source string to this pointer, advancing this pointer as it goes. */
293 {
294 auto* s = src.data;
295
296 while ((*data = *s) != 0)
297 {
298 ++data;
299 ++s;
300 }
301 }
302
303 /** Copies a source string to this pointer, advancing this pointer as it goes.
304 The maxDestBytes parameter specifies the maximum number of bytes that can be written
305 to the destination buffer before stopping.
306 */
307 template <typename CharPointer>
312
313 /** Copies a source string to this pointer, advancing this pointer as it goes.
314 The maxChars parameter specifies the maximum number of characters that can be
315 written to the destination buffer before stopping (including the terminating null).
316 */
317 template <typename CharPointer>
322
323 /** Compares this string with another one. */
324 template <typename CharPointer>
325 int compare (CharPointer other) const noexcept
326 {
327 return CharacterFunctions::compare (*this, other);
328 }
329
330 /** Compares this string with another one, up to a specified number of characters. */
331 template <typename CharPointer>
332 int compareUpTo (CharPointer other, int maxChars) const noexcept
333 {
335 }
336
337 /** Compares this string with another one. */
338 template <typename CharPointer>
340 {
342 }
343
344 /** Compares this string with another one, up to a specified number of characters. */
345 template <typename CharPointer>
350
351 #if JUCE_MSVC && ! DOXYGEN
352 int compareIgnoreCase (CharPointer_UTF16 other) const noexcept
353 {
354 return _wcsicmp (data, other.data);
355 }
356
357 int compareIgnoreCaseUpTo (CharPointer_UTF16 other, int maxChars) const noexcept
358 {
359 return _wcsnicmp (data, other.data, (size_t) maxChars);
360 }
361
362 int indexOf (CharPointer_UTF16 stringToFind) const noexcept
363 {
364 const CharType* const t = wcsstr (data, stringToFind.getAddress());
365 return t == nullptr ? -1 : (int) (t - data);
366 }
367 #endif
368
369 /** Returns the character index of a substring, or -1 if it isn't found. */
370 template <typename CharPointer>
371 int indexOf (CharPointer stringToFind) const noexcept
372 {
374 }
375
376 /** Returns the character index of a unicode character, or -1 if it isn't found. */
377 int indexOf (juce_wchar charToFind) const noexcept
378 {
380 }
381
382 /** Returns the character index of a unicode character, or -1 if it isn't found. */
383 int indexOf (juce_wchar charToFind, bool ignoreCase) const noexcept
384 {
385 return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
387 }
388
389 /** Returns true if the first character of this string is whitespace. */
390 bool isWhitespace() const noexcept { return CharacterFunctions::isWhitespace (operator*()) != 0; }
391 /** Returns true if the first character of this string is a digit. */
392 bool isDigit() const noexcept { return CharacterFunctions::isDigit (operator*()) != 0; }
393 /** Returns true if the first character of this string is a letter. */
394 bool isLetter() const noexcept { return CharacterFunctions::isLetter (operator*()) != 0; }
395 /** Returns true if the first character of this string is a letter or digit. */
397 /** Returns true if the first character of this string is upper-case. */
398 bool isUpperCase() const noexcept { return CharacterFunctions::isUpperCase (operator*()) != 0; }
399 /** Returns true if the first character of this string is lower-case. */
400 bool isLowerCase() const noexcept { return CharacterFunctions::isLowerCase (operator*()) != 0; }
401
402 /** Returns an upper-case version of the first character of this string. */
404 /** Returns a lower-case version of the first character of this string. */
406
407 /** Parses this string as a 32-bit integer. */
409 {
410 #if JUCE_MSVC
411 return _wtoi (data);
412 #else
413 return CharacterFunctions::getIntValue<int, CharPointer_UTF16> (*this);
414 #endif
415 }
416
417 /** Parses this string as a 64-bit integer. */
419 {
420 #if JUCE_MSVC
421 return _wtoi64 (data);
422 #else
423 return CharacterFunctions::getIntValue<int64, CharPointer_UTF16> (*this);
424 #endif
425 }
426
427 /** Parses this string as a floating point double. */
429
430 /** Returns the first non-whitespace character in the string. */
432
433 /** Returns true if the given unicode character can be represented in this encoding. */
434 static bool canRepresent (juce_wchar character) noexcept
435 {
436 auto n = (uint32) character;
437 return n < 0x10ffff && (n < 0xd800 || n > 0xdfff);
438 }
439
440 /** Returns true if this data contains a valid string in this encoding. */
441 static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
442 {
443 maxBytesToRead /= (int) sizeof (CharType);
444
445 while (--maxBytesToRead >= 0 && *dataToTest != 0)
446 {
447 auto n = (uint32) (uint16) *dataToTest++;
448
449 if (n >= 0xd800)
450 {
451 if (n > 0x10ffff)
452 return false;
453
454 if (n <= 0xdfff)
455 {
456 if (n > 0xdc00)
457 return false;
458
459 auto nextChar = (uint32) (uint16) *dataToTest++;
460
462 return false;
463 }
464 }
465 }
466
467 return true;
468 }
469
470 /** Atomically swaps this pointer for a new value, returning the previous value. */
472 {
473 return CharPointer_UTF16 (reinterpret_cast<Atomic<CharType*>&> (data).exchange (newValue.data));
474 }
475
476 /** These values are the byte-order-mark (BOM) values for a UTF-16 stream. */
477 enum
478 {
479 byteOrderMarkBE1 = 0xfe,
480 byteOrderMarkBE2 = 0xff,
481 byteOrderMarkLE1 = 0xff,
482 byteOrderMarkLE2 = 0xfe
483 };
484
485 /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
486 The pointer must not be null, and must contain at least two valid bytes.
487 */
488 static bool isByteOrderMarkBigEndian (const void* possibleByteOrder) noexcept
489 {
490 jassert (possibleByteOrder != nullptr);
491 auto c = static_cast<const uint8*> (possibleByteOrder);
492
493 return c[0] == (uint8) byteOrderMarkBE1
494 && c[1] == (uint8) byteOrderMarkBE2;
495 }
496
497 /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian).
498 The pointer must not be null, and must contain at least two valid bytes.
499 */
500 static bool isByteOrderMarkLittleEndian (const void* possibleByteOrder) noexcept
501 {
502 jassert (possibleByteOrder != nullptr);
503 auto c = static_cast<const uint8*> (possibleByteOrder);
504
505 return c[0] == (uint8) byteOrderMarkLE1
506 && c[1] == (uint8) byteOrderMarkLE2;
507 }
508
509private:
510 CharType* data;
511
512 static unsigned int findNullIndex (const CharType* t) noexcept
513 {
514 unsigned int n = 0;
515
516 while (t[n] != 0)
517 ++n;
518
519 return n;
520 }
521};
522
523} // namespace juce
524
525/** @}*/
Holds a resizable array of primitive or copy-by-value objects.
Definition juce_Array.h:60
ElementType * data() const noexcept
Returns a pointer to the first element in the array.
Definition juce_Array.h:325
Wraps a pointer to a null-terminated UTF-16 character string, and provides various methods to operate...
juce_wchar toUpperCase() const noexcept
Returns an upper-case version of the first character of this string.
int getIntValue32() const noexcept
Parses this string as a 32-bit integer.
juce_wchar operator*() const noexcept
Returns the unicode character that this pointer is pointing to.
CharPointer_UTF16 operator-(int numToSkip) const noexcept
Returns a pointer which is moved backwards from this one by the specified number of characters.
bool isNotEmpty() const noexcept
Returns true if this pointer is not pointing to a null character.
CharPointer_UTF16 operator++() noexcept
Moves this pointer along to the next character in the string.
size_t writeWithDestByteLimit(CharPointer src, size_t maxDestBytes) noexcept
Copies a source string to this pointer, advancing this pointer as it goes.
static size_t getBytesRequiredFor(juce_wchar charToWrite) noexcept
Returns the number of bytes that would be needed to represent the given unicode character in this enc...
void writeAll(CharPointer src) noexcept
Copies a source string to this pointer, advancing this pointer as it goes.
static bool canRepresent(juce_wchar character) noexcept
Returns true if the given unicode character can be represented in this encoding.
bool operator==(CharPointer_UTF16 other) const noexcept
This is a pointer comparison, it doesn't compare the actual text.
void writeNull() const noexcept
Writes a null character to this string (leaving the pointer's position unchanged).
juce_wchar toLowerCase() const noexcept
Returns a lower-case version of the first character of this string.
bool isUpperCase() const noexcept
Returns true if the first character of this string is upper-case.
int compareIgnoreCaseUpTo(CharPointer other, int maxChars) const noexcept
Compares this string with another one, up to a specified number of characters.
int64 getIntValue64() const noexcept
Parses this string as a 64-bit integer.
int compare(CharPointer other) const noexcept
Compares this string with another one.
bool isLetter() const noexcept
Returns true if the first character of this string is a letter.
size_t lengthUpTo(size_t maxCharsToCount) const noexcept
Returns the number of characters in this string, or the given value, whichever is lower.
CharPointer_UTF16 atomicSwap(CharPointer_UTF16 newValue)
Atomically swaps this pointer for a new value, returning the previous value.
static size_t getBytesRequiredFor(CharPointer text) noexcept
Returns the number of bytes that would be needed to represent the given string in this encoding forma...
CharPointer_UTF16 operator--() noexcept
Moves this pointer back to the previous character in the string.
juce_wchar operator[](int characterIndex) const noexcept
Returns the character at a given character index from the start of the string.
double getDoubleValue() const noexcept
Parses this string as a floating point double.
size_t length() const noexcept
Returns the number of characters in this string.
CharPointer_UTF16 findTerminatingNull() const noexcept
Returns a pointer to the null character that terminates this string.
size_t lengthUpTo(CharPointer_UTF16 end) const noexcept
Returns the number of characters in this string, or up to the given end pointer, whichever is lower.
int compareIgnoreCase(CharPointer other) const noexcept
Compares this string with another one.
CharType * getAddress() const noexcept
Returns the address that this pointer is pointing to.
size_t sizeInBytes() const noexcept
Returns the number of bytes that are used to represent this string.
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
CharPointer_UTF16 operator+(int numToSkip) const noexcept
Returns a pointer which is moved forwards from this one by the specified number of characters.
CharPointer_UTF16 findEndOfWhitespace() const noexcept
Returns the first non-whitespace character in the string.
juce_wchar getAndAdvance() noexcept
Returns the character that this pointer is currently pointing to, and then advances the pointer to po...
bool isDigit() const noexcept
Returns true if the first character of this string is a digit.
int compareUpTo(CharPointer other, int maxChars) const noexcept
Compares this string with another one, up to a specified number of characters.
void writeAll(CharPointer_UTF16 src) noexcept
Copies a source string to this pointer, advancing this pointer as it goes.
bool isLowerCase() const noexcept
Returns true if the first character of this string is lower-case.
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
int indexOf(juce_wchar charToFind) const noexcept
Returns the character index of a unicode character, or -1 if it isn't found.
void writeWithCharLimit(CharPointer src, int maxChars) noexcept
Copies a source string to this pointer, advancing this pointer as it goes.
bool isLetterOrDigit() const noexcept
Returns true if the first character of this string is a letter or digit.
int indexOf(CharPointer stringToFind) const noexcept
Returns the character index of a substring, or -1 if it isn't found.
void write(juce_wchar charToWrite) noexcept
Writes a unicode character to this string, and advances this pointer to point to the next position.
static bool isValidString(const CharType *dataToTest, int maxBytesToRead)
Returns true if this data contains a valid string in this encoding.
bool isWhitespace() const noexcept
Returns true if the first character of this string is whitespace.
void operator-=(int numToSkip) noexcept
Moves this pointer backwards by the specified number of characters.
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian)...
void operator+=(int numToSkip) noexcept
Moves this pointer forwards by the specified number of characters.
int indexOf(juce_wchar charToFind, bool ignoreCase) const noexcept
Returns the character index of a unicode character, or -1 if it isn't found.
static int compare(juce_wchar char1, juce_wchar char2) noexcept
Compares two characters.
static juce_wchar toLowerCase(juce_wchar character) noexcept
Converts a character to lower-case.
static size_t copyWithDestByteLimit(DestCharPointerType &dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
Copies characters from one string to another, up to a null terminator or a given byte size limit.
static int indexOfCharIgnoreCase(Type text, juce_wchar charToFind) noexcept
Finds the character index of a given character in another string, using a case-independent match.
static bool isDigit(char character) noexcept
Checks whether a character is a digit.
static int compareIgnoreCaseUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, using a case-independent match.
static int indexOfChar(Type text, const juce_wchar charToFind) noexcept
Finds the character index of a given character in another string.
static int compareIgnoreCase(juce_wchar char1, juce_wchar char2) noexcept
Compares two characters, using a case-independant match.
static bool isLowerCase(juce_wchar character) noexcept
Checks whether a unicode character is lower-case.
static bool isLetter(char character) noexcept
Checks whether a character is alphabetic.
static int indexOf(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
Finds the character index of a given substring in another string.
static size_t lengthUpTo(CharPointerType text, const size_t maxCharsToCount) noexcept
Counts the number of characters in a given string, stopping if the count exceeds a specified limit.
static Type findEndOfWhitespace(Type text) noexcept
Returns a pointer to the first non-whitespace character in a string.
static void copyWithCharLimit(DestCharPointerType &dest, SrcCharPointerType src, int maxChars) noexcept
Copies characters from one string to another, up to a null terminator or a given maximum number of ch...
static bool isWhitespace(char character) noexcept
Checks whether a character is whitespace.
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
static juce_wchar toUpperCase(juce_wchar character) noexcept
Converts a character to upper-case.
static bool isUpperCase(juce_wchar character) noexcept
Checks whether a unicode character is upper-case.
static double getDoubleValue(CharPointerType text) noexcept
Parses a character string, to read a floating-point value.
static void copyAll(DestCharPointerType &dest, SrcCharPointerType src) noexcept
Copies null-terminated characters from one string to another.
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, up to a given number of characters.