Update ICU to 78.1

This commit is contained in:
Pāvels Nadtočajevs 2025-11-05 09:02:44 +02:00
parent 08e6cd181f
commit 1ca8f1d7f6
No known key found for this signature in database
GPG Key ID: 8413210218EF35D2
86 changed files with 11845 additions and 7479 deletions

View File

@ -1,4 +1,5 @@
const π = PI
@warning_ignore("confusable_identifier")
var = π
func test():

View File

@ -272,6 +272,7 @@ if env["builtin_icu4c"]:
"common/errorcode.cpp",
"common/filteredbrk.cpp",
"common/filterednormalizer2.cpp",
"common/fixedstring.cpp",
"common/icudataver.cpp",
"common/icuplug.cpp",
"common/loadednormalizer2impl.cpp",

View File

@ -515,6 +515,7 @@ thirdparty_icu_sources = [
"common/errorcode.cpp",
"common/filteredbrk.cpp",
"common/filterednormalizer2.cpp",
"common/fixedstring.cpp",
"common/icudataver.cpp",
"common/icuplug.cpp",
"common/loadednormalizer2impl.cpp",

View File

@ -480,7 +480,7 @@ The files of hidapi are stored in `thirdparty/sdl/hidapi/` folder.
## icu4c
- Upstream: https://github.com/unicode-org/icu
- Version: 77.1 (457157a92aa053e632cc7fcfd0e12f8a943b2d11, 2025)
- Version: 78.1 (049e0d6a420629ac7db77256987d083a563287b5, 2025)
- License: Unicode
Files extracted from upstream source:

View File

@ -540,3 +540,29 @@ publicity pertaining to distribution of the software without specific,
written prior permission. M.I.T. makes no representations about the
suitability of this software for any purpose. It is provided "as is"
without express or implied warranty.
----------------------------------------------------------------------
File: sorttable.js (only for ICU4J)
The MIT Licence, for code from kryogenix.org
Code downloaded from the Browser Experiments section of kryogenix.org is
licenced under the so-called MIT licence. The licence is below.
Copyright (c) 1997-date Stuart Langridge
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -121,11 +121,9 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != nullptr) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actual.data(), status);
LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status);
result->actualLocale = Locale(actual.data());
result->validLocale = Locale(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status));
result->requestLocale = loc;
}
ures_close(b);
@ -204,33 +202,28 @@ BreakIterator::getAvailableLocales(int32_t& count)
//-------------------------------------------
BreakIterator::BreakIterator()
: actualLocale(Locale::getRoot()), validLocale(Locale::getRoot()), requestLocale(Locale::getRoot())
{
}
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, *this);
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
U_ASSERT(U_SUCCESS(status));
BreakIterator::BreakIterator(const BreakIterator &other)
: UObject(other),
actualLocale(other.actualLocale),
validLocale(other.validLocale),
requestLocale(other.requestLocale) {
}
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, *this);
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
U_ASSERT(U_SUCCESS(status));
actualLocale = other.actualLocale;
validLocale = other.validLocale;
requestLocale = other.requestLocale;
}
return *this;
}
BreakIterator::~BreakIterator()
{
delete validLocale;
delete actualLocale;
delete requestLocale;
}
// ------------------------------------------
@ -398,8 +391,8 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
// THIS LONG is a sign of bad code -- so the action item is to
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
result->actualLocale = actualLoc;
result->validLocale = actualLoc;
}
return result;
}
@ -506,8 +499,7 @@ BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
return Locale::getRoot();
}
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale == nullptr ?
Locale::getRoot() : Locale(requestLocale->data());
return requestLocale;
}
return LocaleBased::getLocale(validLocale, actualLocale, type, status);
}
@ -518,7 +510,7 @@ BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
return nullptr;
}
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale == nullptr ? "" : requestLocale->data();
return requestLocale.getName();
}
return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
}
@ -546,11 +538,8 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
return 1;
}
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, (*this));
locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
U_ASSERT(U_SUCCESS(status));
BreakIterator::BreakIterator(const Locale& valid, const Locale& actual)
: actualLocale(actual), validLocale(valid), requestLocale(Locale::getRoot()) {
}
U_NAMESPACE_END

View File

@ -21,12 +21,6 @@
U_NAMESPACE_BEGIN
// Windows needs us to DLL-export the MaybeStackArray template specialization,
// but MacOS X cannot handle it. Same as in digitlst.h.
#if !U_PLATFORM_IS_DARWIN_BASED
template class U_COMMON_API MaybeStackArray<char, 40>;
#endif
/**
* ICU-internal char * string class.
* This class does not assume or enforce any particular character encoding.
@ -38,34 +32,34 @@ template class U_COMMON_API MaybeStackArray<char, 40>;
* For example:
* cs.data()[5]='a'; // no need for setCharAt(5, 'a')
*/
class U_COMMON_API CharString : public UMemory {
class U_COMMON_API_CLASS CharString : public UMemory {
public:
CharString() : len(0) { buffer[0]=0; }
CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
U_COMMON_API CharString() : len(0) { buffer[0]=0; }
U_COMMON_API CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, errorCode);
}
CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
U_COMMON_API CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, errorCode);
}
CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
U_COMMON_API CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, sLength, errorCode);
}
~CharString() {}
U_COMMON_API ~CharString() {}
/**
* Move constructor; might leave src in an undefined state.
* This string will have the same contents and state that the source string had.
*/
CharString(CharString &&src) noexcept;
U_COMMON_API CharString(CharString &&src) noexcept;
/**
* Move assignment operator; might leave src in an undefined state.
* This string will have the same contents and state that the source string had.
* The behavior is undefined if *this and src are the same object.
*/
CharString &operator=(CharString &&src) noexcept;
U_COMMON_API CharString &operator=(CharString &&src) noexcept;
/**
* Replaces this string's contents with the other string's contents.
@ -73,21 +67,21 @@ public:
* the assignment operator, to make copies explicit and to
* use a UErrorCode where memory allocations might be needed.
*/
CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
CharString &copyFrom(StringPiece s, UErrorCode &errorCode);
U_COMMON_API CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
U_COMMON_API CharString &copyFrom(StringPiece s, UErrorCode &errorCode);
UBool isEmpty() const { return len==0; }
int32_t length() const { return len; }
char operator[](int32_t index) const { return buffer[index]; }
StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
U_COMMON_API UBool isEmpty() const { return len==0; }
U_COMMON_API int32_t length() const { return len; }
U_COMMON_API char operator[](int32_t index) const { return buffer[index]; }
U_COMMON_API StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
const char *data() const { return buffer.getAlias(); }
char *data() { return buffer.getAlias(); }
U_COMMON_API const char *data() const { return buffer.getAlias(); }
U_COMMON_API char *data() { return buffer.getAlias(); }
/**
* Allocates length()+1 chars and copies the NUL-terminated data().
* The caller must uprv_free() the result.
*/
char *cloneData(UErrorCode &errorCode) const;
U_COMMON_API char *cloneData(UErrorCode &errorCode) const;
/**
* Copies the contents of the string into dest.
* Checks if there is enough space in dest, extracts the entire string if possible,
@ -103,40 +97,40 @@ public:
* @param errorCode ICU error code.
* @return length()
*/
int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
U_COMMON_API int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
bool operator==(const CharString& other) const {
U_COMMON_API bool operator==(const CharString& other) const {
return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
}
bool operator!=(const CharString& other) const {
U_COMMON_API bool operator!=(const CharString& other) const {
return !operator==(other);
}
bool operator==(StringPiece other) const {
U_COMMON_API bool operator==(StringPiece other) const {
return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
}
bool operator!=(StringPiece other) const {
U_COMMON_API bool operator!=(StringPiece other) const {
return !operator==(other);
}
/** @return last index of c, or -1 if c is not in this string */
int32_t lastIndexOf(char c) const;
U_COMMON_API int32_t lastIndexOf(char c) const;
bool contains(StringPiece s) const;
U_COMMON_API bool contains(StringPiece s) const;
CharString &clear() { len=0; buffer[0]=0; return *this; }
CharString &truncate(int32_t newLength);
U_COMMON_API CharString &clear() { len=0; buffer[0]=0; return *this; }
U_COMMON_API CharString &truncate(int32_t newLength);
CharString &append(char c, UErrorCode &errorCode);
CharString &append(StringPiece s, UErrorCode &errorCode) {
U_COMMON_API CharString &append(char c, UErrorCode &errorCode);
U_COMMON_API CharString &append(StringPiece s, UErrorCode &errorCode) {
return append(s.data(), s.length(), errorCode);
}
CharString &append(const CharString &s, UErrorCode &errorCode) {
U_COMMON_API CharString &append(const CharString &s, UErrorCode &errorCode) {
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
U_COMMON_API CharString &append(const char *s, int32_t sLength, UErrorCode &status);
CharString &appendNumber(int64_t number, UErrorCode &status);
U_COMMON_API CharString &appendNumber(int64_t number, UErrorCode &status);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
@ -158,26 +152,28 @@ public:
* @param errorCode in/out error code
* @return a buffer with resultCapacity>=min_capacity
*/
char *getAppendBuffer(int32_t minCapacity,
U_COMMON_API char *getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
UErrorCode &errorCode);
CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
CharString &appendInvariantChars(const char16_t* uchars, int32_t ucharsLen, UErrorCode& errorCode);
U_COMMON_API CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
U_COMMON_API CharString &appendInvariantChars(const char16_t* uchars,
int32_t ucharsLen,
UErrorCode& errorCode);
/**
* Appends a filename/path part, e.g., a directory name.
* First appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if necessary.
* Does nothing if s is empty.
*/
CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
U_COMMON_API CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
/**
* Appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if this string is not empty
* and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
*/
CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
U_COMMON_API CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
private:
MaybeStackArray<char, 40> buffer;

View File

@ -334,9 +334,7 @@ public:
// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
#endif
/**
* Default constructor initializes with internal T[stackCapacity] buffer.
@ -570,9 +568,7 @@ public:
// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
#endif
/**
* Default constructor initializes with internal H+T[stackCapacity] buffer.

View File

@ -43,11 +43,11 @@
U_NAMESPACE_BEGIN
class U_COMMON_API CStr : public UMemory {
class U_COMMON_API_CLASS CStr : public UMemory {
public:
CStr(const UnicodeString &in);
~CStr();
const char * operator ()() const;
U_COMMON_API CStr(const UnicodeString &in);
U_COMMON_API ~CStr();
U_COMMON_API const char * operator ()() const;
private:
CharString s;

29
thirdparty/icu4c/common/fixedstring.cpp vendored Normal file
View File

@ -0,0 +1,29 @@
// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html
#include "fixedstring.h"
#include "unicode/unistr.h"
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
U_EXPORT void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (src.isEmpty()) {
dst.clear();
return;
}
int32_t length = src.length();
if (!dst.reserve(length + 1)) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
src.extract(0, length, dst.getAlias(), length + 1, US_INV);
}
U_NAMESPACE_END

104
thirdparty/icu4c/common/fixedstring.h vendored Normal file
View File

@ -0,0 +1,104 @@
// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html
#ifndef FIXEDSTRING_H
#define FIXEDSTRING_H
#include <string_view>
#include <utility>
#include "unicode/uobject.h"
#include "unicode/utypes.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
class UnicodeString;
/**
* ICU-internal fixed-length char* string class.
* This is a complement to CharString to store fixed-length strings efficiently
* (not allocating any unnecessary storage for future additions to the string).
*
* A terminating NUL is always stored, but the length of the string isn't.
* An empty string is stored as nullptr, allocating no storage at all.
*
* This class wants to be convenient but is also deliberately minimalist.
* Please do not add methods if they only add minor convenience.
*/
class FixedString : public UMemory {
public:
FixedString() = default;
~FixedString() { operator delete[](ptr); }
FixedString(const FixedString& other) : FixedString(other.data()) {}
FixedString(std::string_view init) {
size_t size = init.size();
if (size > 0 && reserve(size + 1)) {
uprv_memcpy(ptr, init.data(), size);
ptr[size] = '\0';
}
}
FixedString& operator=(const FixedString& other) {
*this = other.data();
return *this;
}
FixedString& operator=(std::string_view init) {
if (init.empty()) {
operator delete[](ptr);
ptr = nullptr;
} else {
size_t size = init.size();
if (reserve(size + 1)) {
uprv_memcpy(ptr, init.data(), size);
ptr[size] = '\0';
}
}
return *this;
}
FixedString(FixedString&& other) noexcept : ptr(std::exchange(other.ptr, nullptr)) {}
FixedString& operator=(FixedString&& other) noexcept {
operator delete[](ptr);
ptr = other.ptr;
other.ptr = nullptr;
return *this;
}
void clear() {
operator delete[](ptr);
ptr = nullptr;
}
const char* data() const {
return isEmpty() ? "" : ptr;
}
char* getAlias() {
return ptr;
}
bool isEmpty() const {
return ptr == nullptr;
}
/** Allocate storage for a new string, without initializing it. */
bool reserve(size_t size) {
operator delete[](ptr);
ptr = static_cast<char*>(operator new[](size));
return ptr != nullptr;
}
private:
char* ptr = nullptr;
};
U_COMMON_API void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status);
U_NAMESPACE_END
#endif

View File

@ -8,6 +8,7 @@
#include "bytesinkutil.h" // StringByteSink<CharString>
#include "charstr.h"
#include "cstring.h"
#include "fixedstring.h"
#include "ulocimp.h"
#include "unicode/localebuilder.h"
#include "unicode/locid.h"
@ -131,14 +132,13 @@ LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
variant_ = nullptr;
return *this;
}
CharString* new_variant = new CharString(variant, status_);
if (U_FAILURE(status_)) { return *this; }
if (new_variant == nullptr) {
FixedString* new_variant = new FixedString(variant);
if (new_variant == nullptr || new_variant->isEmpty()) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
transform(new_variant->data(), new_variant->length());
if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
transform(new_variant->getAlias(), variant.length());
if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
delete new_variant;
status_ = U_ILLEGAL_ARGUMENT_ERROR;
return *this;

File diff suppressed because it is too large Load Diff

View File

@ -11,85 +11,36 @@
**********************************************************************
*/
#include "locbased.h"
#include "cstring.h"
#include "charstr.h"
#include "uresimp.h"
U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status) {
const char* id = getLocaleID(valid, actual, type, status);
return Locale(id != nullptr ? id : "");
}
const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual,
const Locale& LocaleBased::getLocale(const Locale& valid, const Locale& actual,
ULocDataLocaleType type, UErrorCode& status) {
if (U_FAILURE(status)) {
return nullptr;
return Locale::getRoot();
}
switch(type) {
case ULOC_VALID_LOCALE:
return valid == nullptr ? "" : valid->data();
return valid;
case ULOC_ACTUAL_LOCALE:
return actual == nullptr ? "" : actual->data();
return actual;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
return Locale::getRoot();
}
}
const char* LocaleBased::getLocaleID(const Locale& valid, const Locale& actual,
ULocDataLocaleType type, UErrorCode& status) {
const Locale& locale = getLocale(valid, actual, type, status);
if (U_FAILURE(status)) {
return nullptr;
}
}
void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) {
setValidLocaleID(validID, status);
setActualLocaleID(actualID,status);
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) {
setValidLocaleID(validID, status);
setActualLocaleID(actualID,status);
}
void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (id == nullptr || *id == 0) {
delete dest;
dest = nullptr;
} else {
if (dest == nullptr) {
dest = new CharString(id, status);
if (dest == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
dest->copyFrom(id, status);
}
}
}
void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (id == nullptr || id->isEmpty()) {
delete dest;
dest = nullptr;
} else {
if (dest == nullptr) {
dest = new CharString(*id, status);
if (dest == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
dest->copyFrom(*id, status);
}
}
}
bool LocaleBased::equalIDs(const CharString* left, const CharString* right) {
// true if both are nullptr
if (left == nullptr && right == nullptr) return true;
// false if only one is nullptr
if (left == nullptr || right == nullptr) return false;
return *left == *right;
return locale == Locale::getRoot() ? kRootLocaleName : locale.getName();
}
U_NAMESPACE_END

View File

@ -16,17 +16,8 @@
#include "unicode/locid.h"
#include "unicode/uobject.h"
/**
* Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and
* `actualLocale' of which are pointers to the internal icu::CharString.
*/
#define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale)
U_NAMESPACE_BEGIN
class CharString;
/**
* A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the
@ -38,12 +29,6 @@ class U_COMMON_API LocaleBased : public UMemory {
public:
/**
* Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object.
*/
inline LocaleBased(CharString*& validAlias, CharString*& actualAlias);
/**
* Return locale meta-data for the service object wrapped by this
* object. Either the valid or the actual locale may be
@ -54,8 +39,8 @@ class U_COMMON_API LocaleBased : public UMemory {
* @param status input-output error code
* @return the indicated locale
*/
static Locale getLocale(
const CharString* valid, const CharString* actual,
static const Locale& getLocale(
const Locale& valid, const Locale& actual,
ULocDataLocaleType type, UErrorCode& status);
/**
@ -69,51 +54,11 @@ class U_COMMON_API LocaleBased : public UMemory {
* @return the indicated locale ID
*/
static const char* getLocaleID(
const CharString* valid, const CharString* actual,
const Locale& valid, const Locale& actual,
ULocDataLocaleType type, UErrorCode& status);
/**
* Set the locale meta-data for the service object wrapped by this
* object. If either parameter is zero, it is ignored.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status);
void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status);
static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status);
static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status);
static bool equalIDs(const CharString* left, const CharString* right);
private:
void setValidLocaleID(const CharString* id, UErrorCode& status);
void setActualLocaleID(const CharString* id, UErrorCode& status);
void setValidLocaleID(const char* id, UErrorCode& status);
void setActualLocaleID(const char* id, UErrorCode& status);
CharString*& valid;
CharString*& actual;
};
inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) :
valid(validAlias), actual(actualAlias) {
}
inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) {
setLocaleID(id, valid, status);
}
inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) {
setLocaleID(id, actual, status);
}
inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) {
setLocaleID(id, valid, status);
}
inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) {
setLocaleID(id, actual, status);
}
U_NAMESPACE_END
#endif

View File

@ -66,7 +66,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
return result;
}
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
length=uloc_getDisplayLanguage(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -78,7 +78,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
length=uloc_getDisplayLanguage(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -106,7 +106,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
return result;
}
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
length=uloc_getDisplayScript(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -118,7 +118,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
length=uloc_getDisplayScript(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -146,7 +146,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
return result;
}
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
length=uloc_getDisplayCountry(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -158,7 +158,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
length=uloc_getDisplayCountry(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -186,7 +186,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
return result;
}
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
length=uloc_getDisplayVariant(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -198,7 +198,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
length=uloc_getDisplayVariant(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -226,7 +226,7 @@ Locale::getDisplayName(const Locale &displayLocale,
return result;
}
length=uloc_getDisplayName(fullName, displayLocale.fullName,
length=uloc_getDisplayName(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@ -238,7 +238,7 @@ Locale::getDisplayName(const Locale &displayLocale,
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayName(fullName, displayLocale.fullName,
length=uloc_getDisplayName(getName(), displayLocale.getName(),
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

View File

@ -31,13 +31,16 @@
******************************************************************************
*/
#include <cstddef>
#include <optional>
#include <string_view>
#include <type_traits>
#include <utility>
#include "unicode/bytestream.h"
#include "unicode/locid.h"
#include "unicode/localebuilder.h"
#include "unicode/localpointer.h"
#include "unicode/strenum.h"
#include "unicode/stringpiece.h"
#include "unicode/uloc.h"
@ -48,6 +51,7 @@
#include "charstrmap.h"
#include "cmemory.h"
#include "cstring.h"
#include "fixedstring.h"
#include "mutex.h"
#include "putilimp.h"
#include "uassert.h"
@ -232,9 +236,214 @@ locale_get_default()
return Locale::getDefault().getName();
}
namespace {
template <auto FIELD, typename T>
void copyToArray(std::string_view sv, T* that) {
auto& field = that->*FIELD;
constexpr size_t capacity = std::extent_v<std::remove_reference_t<decltype(field)>>;
static_assert(capacity > 0);
if (!sv.empty()) {
U_ASSERT(sv.size() < capacity);
uprv_memcpy(field, sv.data(), sv.size());
}
field[sv.size()] = '\0';
}
} // namespace
U_NAMESPACE_BEGIN
void Locale::Nest::init(std::string_view language,
std::string_view script,
std::string_view region,
uint8_t variantBegin) {
copyToArray<&Nest::language>(language, this);
copyToArray<&Nest::script>(script, this);
copyToArray<&Nest::region>(region, this);
this->variantBegin = variantBegin;
}
Locale::Nest::Nest(Heap&& heap, uint8_t variantBegin) {
// When moving from Heap to Nest the language field can be left untouched
// (as it has the same offset in both) and only the script and region fields
// need to be copied to their new locations, which is safe to do because the
// new locations come before the old locations in memory and don't overlap.
static_assert(offsetof(Nest, region) <= offsetof(Heap, script));
static_assert(offsetof(Nest, variantBegin) <= offsetof(Heap, region));
U_ASSERT(this == reinterpret_cast<Nest*>(&heap));
copyToArray<&Nest::script>(heap.script, this);
copyToArray<&Nest::region>(heap.region, this);
this->variantBegin = variantBegin;
*this->baseName = '\0';
}
struct Locale::Heap::Alloc : public UMemory {
FixedString fullName;
FixedString baseName;
int32_t variantBegin;
const char* getVariant() const { return variantBegin == 0 ? "" : getBaseName() + variantBegin; }
const char* getFullName() const { return fullName.data(); }
const char* getBaseName() const {
if (baseName.isEmpty()) {
if (const char* name = fullName.data(); *name != '@') {
return name;
}
}
return baseName.data();
}
Alloc(int32_t variantBegin) : fullName(), baseName(), variantBegin(variantBegin) {}
Alloc(const Alloc& other, UErrorCode& status)
: fullName(), baseName(), variantBegin(other.variantBegin) {
if (U_SUCCESS(status)) {
if (!other.fullName.isEmpty()) {
fullName = other.fullName;
if (fullName.isEmpty()) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
if (!other.baseName.isEmpty()) {
baseName = other.baseName;
if (baseName.isEmpty()) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
}
}
}
}
// Move should be done on the owner of the pointer to this object.
Alloc(Alloc&&) noexcept = delete;
~Alloc() = default;
};
const char* Locale::Heap::getVariant() const { return ptr->getVariant(); }
const char* Locale::Heap::getFullName() const { return ptr->getFullName(); }
const char* Locale::Heap::getBaseName() const { return ptr->getBaseName(); }
Locale::Heap::Heap(std::string_view language,
std::string_view script,
std::string_view region,
int32_t variantBegin) {
ptr = new Alloc(variantBegin);
if (ptr == nullptr) {
type = eBOGUS;
} else {
type = eHEAP;
copyToArray<&Heap::language>(language, this);
copyToArray<&Heap::script>(script, this);
copyToArray<&Heap::region>(region, this);
}
}
Locale::Heap::~Heap() {
U_ASSERT(type == eHEAP);
delete ptr;
}
Locale::Heap& Locale::Heap::operator=(const Heap& other) {
U_ASSERT(type == eBOGUS);
UErrorCode status = U_ZERO_ERROR;
ptr = new Alloc(*other.ptr, status);
if (ptr == nullptr || U_FAILURE(status)) {
delete ptr;
} else {
type = eHEAP;
uprv_memcpy(language, other.language, sizeof language);
uprv_memcpy(script, other.script, sizeof script);
uprv_memcpy(region, other.region, sizeof region);
}
return *this;
}
Locale::Heap& Locale::Heap::operator=(Heap&& other) noexcept {
U_ASSERT(type == eBOGUS);
ptr = other.ptr;
type = eHEAP;
other.type = eBOGUS;
uprv_memcpy(language, other.language, sizeof language);
uprv_memcpy(script, other.script, sizeof script);
uprv_memcpy(region, other.region, sizeof region);
return *this;
}
template <typename BogusFn, typename NestFn, typename HeapFn, typename... Args>
auto Locale::Payload::visit(BogusFn bogusFn, NestFn nestFn, HeapFn heapFn, Args... args) const {
switch (type) {
case eBOGUS:
return bogusFn(args...);
case eNEST:
return nestFn(nest, args...);
case eHEAP:
return heapFn(heap, args...);
default:
UPRV_UNREACHABLE_EXIT;
};
}
void Locale::Payload::copy(const Payload& other) {
other.visit([](Payload*) {},
[](const Nest& nest, Payload* dst) { dst->nest = nest; },
[](const Heap& heap, Payload* dst) { dst->heap = heap; },
this);
}
void Locale::Payload::move(Payload&& other) noexcept {
other.visit(
[](Payload*) {},
[](const Nest& nest, Payload* dst) { dst->nest = nest; },
[](const Heap& heap, Payload* dst) { dst->heap = std::move(const_cast<Heap&>(heap)); },
this);
}
Locale::Payload::~Payload() {
if (type == eHEAP) { heap.~Heap(); }
}
Locale::Payload::Payload(const Payload& other) : type{eBOGUS} { copy(other); }
Locale::Payload::Payload(Payload&& other) noexcept : type{eBOGUS} { move(std::move(other)); }
Locale::Payload& Locale::Payload::operator=(const Payload& other) {
if (this != &other) {
setToBogus();
copy(other);
}
return *this;
}
Locale::Payload& Locale::Payload::operator=(Payload&& other) noexcept {
if (this != &other) {
setToBogus();
move(std::move(other));
}
return *this;
}
void Locale::Payload::setToBogus() {
this->~Payload();
type = eBOGUS;
}
template <typename T, typename... Args> T& Locale::Payload::emplace(Args&&... args) {
if constexpr (std::is_same_v<T, Nest>) {
this->~Payload();
::new (&nest) Nest(std::forward<Args>(args)...);
return nest;
}
if constexpr (std::is_same_v<T, Heap>) {
U_ASSERT(type != eHEAP);
::new (&heap) Heap(std::forward<Args>(args)...);
return heap;
}
}
template <> Locale::Nest* Locale::Payload::get() { return type == eNEST ? &nest : nullptr; }
template <> Locale::Heap* Locale::Payload::get() { return type == eHEAP ? &heap : nullptr; }
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
/*Character separating the posix id fields*/
@ -243,22 +452,10 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
#define SEP_CHAR '_'
#define NULL_CHAR '\0'
Locale::~Locale()
{
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = nullptr;
/*if fullName is on the heap, we free it*/
if (fullName != fullNameBuffer)
{
uprv_free(fullName);
fullName = nullptr;
}
}
Locale::~Locale() = default;
Locale::Locale()
: UObject(), fullName(fullNameBuffer), baseName(nullptr)
: UObject(), payload()
{
init(nullptr, false);
}
@ -269,9 +466,8 @@ Locale::Locale()
* the default locale.)
*/
Locale::Locale(Locale::ELocaleType)
: UObject(), fullName(fullNameBuffer), baseName(nullptr)
: UObject(), payload()
{
setToBogus();
}
@ -279,7 +475,7 @@ Locale::Locale( const char * newLanguage,
const char * newCountry,
const char * newVariant,
const char * newKeywords)
: UObject(), fullName(fullNameBuffer), baseName(nullptr)
: UObject(), payload()
{
if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) )
{
@ -300,7 +496,6 @@ Locale::Locale( const char * newLanguage,
{
lsize = static_cast<int32_t>(uprv_strlen(newLanguage));
if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
}
}
@ -312,7 +507,6 @@ Locale::Locale( const char * newLanguage,
{
csize = static_cast<int32_t>(uprv_strlen(newCountry));
if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
}
}
@ -329,7 +523,6 @@ Locale::Locale( const char * newLanguage,
// remove trailing _'s
vsize = static_cast<int32_t>(uprv_strlen(newVariant));
if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
}
while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
@ -342,7 +535,6 @@ Locale::Locale( const char * newLanguage,
{
ksize = static_cast<int32_t>(uprv_strlen(newKeywords));
if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
setToBogus();
return;
}
}
@ -383,7 +575,6 @@ Locale::Locale( const char * newLanguage,
if (U_FAILURE(status)) {
// Something went wrong with appending, etc.
setToBogus();
return;
}
// Parse it, because for example 'language' might really be a complete
@ -392,82 +583,11 @@ Locale::Locale( const char * newLanguage,
}
}
Locale::Locale(const Locale &other)
: UObject(other), fullName(fullNameBuffer), baseName(nullptr)
{
*this = other;
}
Locale::Locale(const Locale&) = default;
Locale::Locale(Locale&&) noexcept = default;
Locale::Locale(Locale&& other) noexcept
: UObject(other), fullName(fullNameBuffer), baseName(fullName) {
*this = std::move(other);
}
Locale& Locale::operator=(const Locale& other) {
if (this == &other) {
return *this;
}
setToBogus();
if (other.fullName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
} else if (other.fullName == nullptr) {
fullName = nullptr;
} else {
fullName = uprv_strdup(other.fullName);
if (fullName == nullptr) return *this;
}
if (other.baseName == other.fullName) {
baseName = fullName;
} else if (other.baseName != nullptr) {
baseName = uprv_strdup(other.baseName);
if (baseName == nullptr) return *this;
}
uprv_strcpy(language, other.language);
uprv_strcpy(script, other.script);
uprv_strcpy(country, other.country);
variantBegin = other.variantBegin;
fIsBogus = other.fIsBogus;
return *this;
}
Locale& Locale::operator=(Locale&& other) noexcept {
if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
if (fullName != fullNameBuffer) uprv_free(fullName);
if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
}
if (other.fullName == other.fullNameBuffer) {
fullName = fullNameBuffer;
} else {
fullName = other.fullName;
}
if (other.baseName == other.fullNameBuffer) {
baseName = fullNameBuffer;
} else if (other.baseName == other.fullName) {
baseName = fullName;
} else {
baseName = other.baseName;
}
uprv_strcpy(language, other.language);
uprv_strcpy(script, other.script);
uprv_strcpy(country, other.country);
variantBegin = other.variantBegin;
fIsBogus = other.fIsBogus;
other.baseName = other.fullName = other.fullNameBuffer;
return *this;
}
Locale& Locale::operator=(const Locale&) = default;
Locale& Locale::operator=(Locale&&) noexcept = default;
Locale *
Locale::clone() const {
@ -477,7 +597,7 @@ Locale::clone() const {
bool
Locale::operator==( const Locale& other) const
{
return (uprv_strcmp(other.fullName, fullName) == 0);
return uprv_strcmp(other.getName(), getName()) == 0;
}
namespace {
@ -1073,7 +1193,7 @@ public:
}
// Check the fields inside locale, if need to replace fields,
// place the the replaced locale ID in out and return true.
// place the replaced locale ID in out and return true.
// Otherwise return false for no replacement or error.
bool replace(
const Locale& locale, CharString& out, UErrorCode& status);
@ -1836,16 +1956,8 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(StringPiece localeID, UBool canonicalize)
{
fIsBogus = false;
/* Free our current storage */
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = nullptr;
if(fullName != fullNameBuffer) {
uprv_free(fullName);
fullName = fullNameBuffer;
}
Nest& nest = payload.emplace<Nest>();
// not a loop:
// just an easy way to have a common error-exit
@ -1859,9 +1971,6 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
int32_t length;
UErrorCode err;
/* preset all fields to empty */
language[0] = script[0] = country[0] = 0;
const auto parse = [canonicalize](std::string_view localeID,
char* name,
int32_t nameCapacity,
@ -1879,17 +1988,17 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
};
// "canonicalize" the locale ID to ICU/Java format
char* fullName = nest.baseName;
err = U_ZERO_ERROR;
length = parse(localeID, fullName, sizeof fullNameBuffer, err);
length = parse(localeID, fullName, sizeof Nest::baseName, err);
if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
U_ASSERT(baseName == nullptr);
FixedString fullNameBuffer;
if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof Nest::baseName)) {
/*Go to heap for the fullName if necessary*/
char* newFullName = static_cast<char*>(uprv_malloc(sizeof(char) * (length + 1)));
if (newFullName == nullptr) {
if (!fullNameBuffer.reserve(length + 1)) {
break; // error: out of memory
}
fullName = newFullName;
fullName = fullNameBuffer.getAlias();
err = U_ZERO_ERROR;
length = parse(localeID, fullName, length + 1, err);
}
@ -1898,7 +2007,10 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
break;
}
variantBegin = length;
std::string_view language;
std::string_view script;
std::string_view region;
int32_t variantBegin = length;
/* after uloc_getName/canonicalize() we know that only '_' are separators */
/* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
@ -1923,8 +2035,9 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
} else {
fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
}
bool hasKeywords = at != nullptr && uprv_strchr(at + 1, '=') != nullptr;
if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
if (fieldLen[0] >= ULOC_LANG_CAPACITY)
{
break; // error: the language field is too long
}
@ -1932,22 +2045,19 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
if (fieldLen[0] > 0) {
/* We have a language */
uprv_memcpy(language, fullName, fieldLen[0]);
language[fieldLen[0]] = 0;
language = {fullName, static_cast<std::string_view::size_type>(fieldLen[0])};
}
if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
uprv_isASCIILetter(field[1][3])) {
/* We have at least a script */
uprv_memcpy(script, field[1], fieldLen[1]);
script[fieldLen[1]] = 0;
script = {field[1], static_cast<std::string_view::size_type>(fieldLen[1])};
variantField++;
}
if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
/* We have a country */
uprv_memcpy(country, field[variantField], fieldLen[variantField]);
country[fieldLen[variantField]] = 0;
region = {field[variantField], static_cast<std::string_view::size_type>(fieldLen[variantField])};
variantField++;
} else if (fieldLen[variantField] == 0) {
variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
@ -1956,16 +2066,52 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
if (fieldLen[variantField] > 0) {
/* We have a variant */
variantBegin = static_cast<int32_t>(field[variantField] - fullName);
} else if (hasKeywords) {
// The original computation of variantBegin leaves it equal to the length
// of fullName if there is no variant. It should instead be
// the length of the baseName.
variantBegin = static_cast<int32_t>(at - fullName);
}
err = U_ZERO_ERROR;
initBaseName(err);
if (U_FAILURE(err)) {
break;
if (!hasKeywords && Nest::fits(length, language, script, region)) {
U_ASSERT(fullName == nest.baseName);
U_ASSERT(fullNameBuffer.isEmpty());
nest.init(language, script, region, variantBegin);
} else {
if (fullName == nest.baseName) {
U_ASSERT(fullNameBuffer.isEmpty());
fullNameBuffer = {fullName, static_cast<std::string_view::size_type>(length)};
if (fullNameBuffer.isEmpty()) {
break; // error: out of memory
}
if (!language.empty()) {
language = {fullNameBuffer.data(), language.size()};
}
if (!script.empty()) {
script = {fullNameBuffer.data() + (script.data() - fullName), script.size()};
}
if (!region.empty()) {
region = {fullNameBuffer.data() + (region.data() - fullName), region.size()};
}
}
Heap& heap = payload.emplace<Heap>(language, script, region, variantBegin);
if (isBogus()) {
break; // error: out of memory
}
U_ASSERT(!fullNameBuffer.isEmpty());
heap.ptr->fullName = std::move(fullNameBuffer);
if (hasKeywords) {
if (std::string_view::size_type baseNameLength = at - fullName; baseNameLength > 0) {
heap.ptr->baseName = {heap.ptr->fullName.data(), baseNameLength};
if (heap.ptr->baseName.isEmpty()) {
break; // error: out of memory
}
}
}
}
if (canonicalize) {
if (!isKnownCanonicalizedLocale(fullName, err)) {
if (!isKnownCanonicalizedLocale(getName(), err)) {
CharString replaced;
// Not sure it is already canonicalized
if (canonicalizeLocale(*this, replaced, err)) {
@ -1989,67 +2135,16 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
return *this;
}
/*
* Set up the base name.
* If there are no key words, it's exactly the full name.
* If key words exist, it's the full name truncated at the '@' character.
* Need to set up both at init() and after setting a keyword.
*/
void
Locale::initBaseName(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
U_ASSERT(baseName==nullptr || baseName==fullName);
const char *atPtr = uprv_strchr(fullName, '@');
const char *eqPtr = uprv_strchr(fullName, '=');
if (atPtr && eqPtr && atPtr < eqPtr) {
// Key words exist.
int32_t baseNameLength = static_cast<int32_t>(atPtr - fullName);
char* newBaseName = static_cast<char*>(uprv_malloc(baseNameLength + 1));
if (newBaseName == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
baseName = newBaseName;
uprv_strncpy(baseName, fullName, baseNameLength);
baseName[baseNameLength] = 0;
// The original computation of variantBegin leaves it equal to the length
// of fullName if there is no variant. It should instead be
// the length of the baseName.
if (variantBegin > baseNameLength) {
variantBegin = baseNameLength;
}
} else {
baseName = fullName;
}
}
int32_t
Locale::hashCode() const
{
return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
return ustr_hashCharsN(getName(), static_cast<int32_t>(uprv_strlen(getName())));
}
void
Locale::setToBogus() {
/* Free our current storage */
if((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = nullptr;
if(fullName != fullNameBuffer) {
uprv_free(fullName);
fullName = fullNameBuffer;
}
*fullNameBuffer = 0;
*language = 0;
*script = 0;
*country = 0;
fIsBogus = true;
variantBegin = 0;
payload.setToBogus();
}
const Locale& U_EXPORT2
@ -2088,9 +2183,12 @@ Locale::addLikelySubtags(UErrorCode& status) {
return;
}
CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status);
CharString maximizedLocaleID = ulocimp_addLikelySubtags(getName(), status);
if (U_FAILURE(status)) {
if (status == U_MEMORY_ALLOCATION_ERROR) {
setToBogus();
}
return;
}
@ -2110,9 +2208,12 @@ Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
return;
}
CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status);
CharString minimizedLocaleID = ulocimp_minimizeSubtags(getName(), favorScript, status);
if (U_FAILURE(status)) {
if (status == U_MEMORY_ALLOCATION_ERROR) {
setToBogus();
}
return;
}
@ -2131,8 +2232,11 @@ Locale::canonicalize(UErrorCode& status) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
CharString uncanonicalized(fullName, status);
CharString uncanonicalized(getName(), status);
if (U_FAILURE(status)) {
if (status == U_MEMORY_ALLOCATION_ERROR) {
setToBogus();
}
return;
}
init(uncanonicalized.data(), /*canonicalize=*/true);
@ -2191,12 +2295,12 @@ Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
return;
}
if (fIsBogus) {
if (isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, status);
ulocimp_toLanguageTag(getName(), sink, /*strict=*/false, status);
}
Locale U_EXPORT2
@ -2229,14 +2333,14 @@ Locale::createCanonical(const char* name) {
const char *
Locale::getISO3Language() const
{
return uloc_getISO3Language(fullName);
return uloc_getISO3Language(getName());
}
const char *
Locale::getISO3Country() const
{
return uloc_getISO3Country(fullName);
return uloc_getISO3Country(getName());
}
/**
@ -2249,7 +2353,7 @@ Locale::getISO3Country() const
uint32_t
Locale::getLCID() const
{
return uloc_getLCID(fullName);
return uloc_getLCID(getName());
}
const char* const* U_EXPORT2 Locale::getISOCountries()
@ -2428,8 +2532,9 @@ Locale::getLocaleCache()
class KeywordEnumeration : public StringEnumeration {
protected:
CharString keywords;
FixedString keywords;
private:
int32_t length;
const char *current;
static const char fgClassID;
@ -2438,16 +2543,20 @@ public:
virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
public:
KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
: keywords(), current(keywords.data()) {
: keywords(), length(keywordLen), current(nullptr) {
if(U_SUCCESS(status) && keywordLen != 0) {
if(keys == nullptr || keywordLen < 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
keywords.append(keys, keywordLen, status);
keywords = {keys, static_cast<std::string_view::size_type>(length)};
if (keywords.isEmpty()) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
current = keywords.data() + currentIndex;
}
}
}
}
virtual ~KeywordEnumeration();
@ -2455,7 +2564,7 @@ public:
{
UErrorCode status = U_ZERO_ERROR;
return new KeywordEnumeration(
keywords.data(), keywords.length(),
keywords.data(), length,
static_cast<int32_t>(current - keywords.data()), status);
}
@ -2556,8 +2665,8 @@ Locale::createKeywords(UErrorCode &status) const
return result;
}
const char* variantStart = uprv_strchr(fullName, '@');
const char* assignment = uprv_strchr(fullName, '=');
const char* variantStart = uprv_strchr(getName(), '@');
const char* assignment = uprv_strchr(getName(), '=');
if(variantStart) {
if(assignment > variantStart) {
CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
@ -2583,8 +2692,8 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
return result;
}
const char* variantStart = uprv_strchr(fullName, '@');
const char* assignment = uprv_strchr(fullName, '=');
const char* variantStart = uprv_strchr(getName(), '@');
const char* assignment = uprv_strchr(getName(), '=');
if(variantStart) {
if(assignment > variantStart) {
CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
@ -2604,7 +2713,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
int32_t
Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
{
return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
return uloc_getKeywordValue(getName(), keywordName, buffer, bufLen, &status);
}
void
@ -2613,12 +2722,12 @@ Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& sta
return;
}
if (fIsBogus) {
if (isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
ulocimp_getKeywordValue(fullName, keywordName, sink, status);
ulocimp_getKeywordValue(getName(), keywordName, sink, status);
}
void
@ -2664,51 +2773,77 @@ Locale::setKeywordValue(StringPiece keywordName,
status = U_ZERO_ERROR;
}
int32_t length = static_cast<int32_t>(uprv_strlen(fullName));
int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1;
CharString localeID(getName(), -1, status);
ulocimp_setKeywordValue(keywordName, keywordValue, localeID, status);
if (U_FAILURE(status)) {
if (status == U_MEMORY_ALLOCATION_ERROR) {
setToBogus();
}
return;
}
const char* start = locale_getKeywordsStart(fullName);
int32_t offset = start == nullptr ? length : start - fullName;
const char* at = locale_getKeywordsStart(localeID.toStringPiece());
bool hasKeywords = at != nullptr && uprv_strchr(at + 1, '=') != nullptr;
for (;;) {
// Remove -1 from the capacity so that this function can guarantee NUL termination.
CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);
int32_t reslen = ulocimp_setKeywordValue(
{fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
keywordName,
keywordValue,
sink,
status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
capacity = reslen + offset + 1;
char* newFullName = static_cast<char*>(uprv_malloc(capacity));
if (newFullName == nullptr) {
Nest* nest = payload.get<Nest>();
if (!hasKeywords) {
if (nest == nullptr) {
// There are no longer any keywords left, so it might now be
// possible to move the payload from Heap to Nest.
Heap* heap = payload.get<Heap>();
U_ASSERT(heap != nullptr);
if (Nest::fits(localeID.length(), heap->language, heap->script, heap->region)) {
int32_t variantBegin = heap->ptr->variantBegin;
U_ASSERT(variantBegin >= 0);
U_ASSERT(static_cast<size_t>(variantBegin) < sizeof Nest::baseName);
nest = &payload.emplace<Nest>(std::move(*heap), static_cast<uint8_t>(variantBegin));
localeID.extract(nest->baseName, sizeof Nest::baseName, status);
} else {
heap->ptr->baseName.clear();
heap->ptr->fullName = localeID.toStringPiece();
if (heap->ptr->fullName.isEmpty()) {
status = U_MEMORY_ALLOCATION_ERROR;
setToBogus();
return;
}
}
}
} else {
Heap* heap = nullptr;
if (nest != nullptr) {
// A keyword has been added, so the payload now needs to be moved
// from Nest to Heap so that it can get a baseName.
Nest copy(*nest);
heap = &payload.emplace<Heap>(copy.language,
copy.script,
copy.region,
copy.variantBegin);
if (isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(newFullName, fullName, length + 1);
if (fullName != fullNameBuffer) {
if (baseName == fullName) {
baseName = newFullName; // baseName should not point to freed memory.
} else {
heap = payload.get<Heap>();
}
// if fullName is already on the heap, need to free it.
uprv_free(fullName);
}
fullName = newFullName;
status = U_ZERO_ERROR;
continue;
U_ASSERT(heap != nullptr);
heap->ptr->fullName = localeID.toStringPiece();
if (heap->ptr->fullName.isEmpty()) {
status = U_MEMORY_ALLOCATION_ERROR;
setToBogus();
return;
}
if (U_FAILURE(status)) { return; }
u_terminateChars(fullName, capacity, reslen + offset, &status);
break;
if (heap->ptr->baseName.isEmpty()) {
// Has added the first keyword, meaning that the fullName is no longer also the baseName.
if (std::string_view::size_type baseNameLength = at - localeID.data(); baseNameLength > 0) {
heap->ptr->baseName = {heap->ptr->fullName.data(), baseNameLength};
if (heap->ptr->baseName.isEmpty()) {
status = U_MEMORY_ALLOCATION_ERROR;
setToBogus();
return;
}
}
}
if (baseName == fullName) {
// May have added the first keyword, meaning that the fullName is no longer also the baseName.
initBaseName(status);
}
}
@ -2741,9 +2876,42 @@ Locale::setUnicodeKeywordValue(StringPiece keywordName,
setKeywordValue(*legacy_key, value, status);
}
const char*
Locale::getCountry() const {
return getField<&Nest::getRegion, &Heap::getRegion>();
}
const char*
Locale::getLanguage() const {
return getField<&Nest::getLanguage, &Heap::getLanguage>();
}
const char*
Locale::getScript() const {
return getField<&Nest::getScript, &Heap::getScript>();
}
const char*
Locale::getVariant() const {
return getField<&Nest::getVariant, &Heap::getVariant>();
}
const char*
Locale::getName() const {
return getField<&Nest::getBaseName, &Heap::getFullName>();
}
const char*
Locale::getBaseName() const {
return baseName;
return getField<&Nest::getBaseName, &Heap::getBaseName>();
}
template <const char* (Locale::Nest::*const NEST)() const,
const char* (Locale::Heap::*const HEAP)() const>
const char* Locale::getField() const {
return payload.visit([] { return ""; },
[](const Nest& nest) { return (nest.*NEST)(); },
[](const Heap& heap) { return (heap.*HEAP)(); });
}
Locale::Iterator::~Iterator() = default;

View File

@ -495,7 +495,7 @@ bool RegionValidateMap::equals(const RegionValidateMap& that) const {
// The code transform two letter a-z to a integer valued between -1, 26x26.
// -1 indicate the region is outside the range of two letter a-z
// the rest of value is between 0 and 676 (= 26x26) and used as an index
// the the bigmap in map. The map is an array of 22 int32_t.
// the bigmap in map. The map is an array of 22 int32_t.
// since 32x21 < 676/32 < 32x22 we store this 676 bits bitmap into 22 int32_t.
int32_t RegionValidateMap::value(const char* region) const {
if (uprv_isASCIILetter(region[0]) && uprv_isASCIILetter(region[1]) &&

View File

@ -715,13 +715,29 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
U_ASSERT(value != 0);
// For the case of und_Latn
if (value < 0) {
retainLanguage = !language.empty();
retainScript = !script.empty();
retainRegion = !region.empty();
// Fallback to und_$region =>
iter.resetToState64(trieUndState); // "und" ("*")
value = trieNext(iter, "", 0);
U_ASSERT(value == 0);
int64_t trieUndEmptyState = iter.getState64();
value = trieNext(iter, region, 0);
// Fallback to und =>
if (value < 0) {
iter.resetToState64(trieUndEmptyState);
value = trieNext(iter, "", 0);
U_ASSERT(value > 0);
}
}
}
}
}
U_ASSERT(value < lsrsLength);
const LSR &matched = lsrs[value];
if (returnInputIfUnmatch &&
(!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching.
@ -731,18 +747,23 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
}
if (!(retainLanguage || retainScript || retainRegion)) {
U_ASSERT(value >= 0);
// Quickly return a copy of the lookup-result LSR
// without new allocation of the subtags.
const LSR &matched = lsrs[value];
return LSR(matched.language, matched.script, matched.region, matched.flags);
}
if (!retainLanguage) {
language = matched.language;
U_ASSERT(value >= 0);
language = lsrs[value].language;
}
if (!retainScript) {
script = matched.script;
U_ASSERT(value >= 0);
script = lsrs[value].script;
}
if (!retainRegion) {
region = matched.region;
U_ASSERT(value >= 0);
region = lsrs[value].region;
}
int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
// retainOldMask flags = LSR explicit-subtag flags

View File

@ -809,7 +809,15 @@ U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(UScriptCode script, UEr
U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(UResourceBundle* rb, UErrorCode& status)
{
return new LSTMData(rb, status);
if (U_FAILURE(status)) {
return nullptr;
}
const LSTMData* result = new LSTMData(rb, status);
if (U_FAILURE(status)) {
delete result;
return nullptr;
}
return result;
}
U_CAPI const LanguageBreakEngine* U_EXPORT2

View File

@ -10,14 +10,14 @@
#ifdef INCLUDED_FROM_NORMALIZER2_CPP
static const UVersionInfo norm2_nfc_data_formatVersion={5,0,0,0};
static const UVersionInfo norm2_nfc_data_dataVersion={0x10,0,0,0};
static const UVersionInfo norm2_nfc_data_dataVersion={0x11,0,0,0};
static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={
0x58,0x4e84,0x8c60,0x8d60,0x8d60,0x8d60,0x8d60,0x8d60,0xc0,0x300,0xb0c,0x2a6a,0x3cf0,0xfbc4,0x12c2,0x3c26,
0x58,0x4eec,0x8cc8,0x8dc8,0x8dc8,0x8dc8,0x8dc8,0x8dc8,0xc0,0x300,0xb0c,0x2a6a,0x3cf0,0xfbc4,0x12c2,0x3c26,
0x3cbe,0x3cf0,0x300,0,0xfb10,0xfb9e
};
static const uint16_t norm2_nfc_data_trieIndex[1869]={
static const uint16_t norm2_nfc_data_trieIndex[1888]={
0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353,
0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3,
0x631,0x65f,0x688,0x6be,0x6fe,0x73b,0x75b,0x79a,0x7d9,0x816,0x835,0x872,0x75b,0x8ab,0x8d9,0x918,
@ -82,7 +82,7 @@ static const uint16_t norm2_nfc_data_trieIndex[1869]={
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x1881,0x18c1,0x1901,0x1941,0x1981,0x19c1,0x1a01,0x1a41,0x1a64,0x1aa4,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x6cf,0x6df,0x6f7,0x716,0x72b,0x72b,0x72b,0x72f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x6e2,0x6f2,0x70a,0x729,0x73e,0x73e,0x73e,0x742,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc0c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c,
@ -91,53 +91,54 @@ static const uint16_t norm2_nfc_data_trieIndex[1869]={
0x1b1a,0x226,0x226,0x1b2a,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xdf8,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x1b3a,0x226,0x226,0x226,0x1b42,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x1608,0x226,0x226,0x226,0x226,0x66b,0x226,0x226,0x226,0x226,0x1b50,0x54f,0x226,0x226,0x1b60,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x81d,0x226,0x226,0x1b70,0x226,0x1b80,0x1b8d,0x1b99,0x226,0x226,
0x226,0x226,0x414,0x226,0x1ba4,0x1bb4,0x226,0x226,0x226,0x812,0x226,0x226,0x226,0x226,0x1bc4,0x226,
0x226,0x226,0x1bcf,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bd6,0x226,0x226,
0x226,0x226,0x1be1,0x1bf0,0x928,0x1bfe,0x412,0x1c0c,0x1c1c,0x226,0x1c24,0x1c32,0x87f,0x226,0x226,0x226,
0x226,0x1c42,0x7ca,0x226,0x226,0x226,0x226,0x226,0x1c52,0x1c61,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x908,0x1c69,0x1c79,0x226,0x226,0x226,0x9ec,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x1c83,0x226,0x226,0x226,0x226,0x226,0x226,0x818,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c80,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c93,0x812,0x226,0x226,0x226,0x226,
0x1608,0x226,0x226,0x226,0x226,0x1b50,0x226,0x226,0x226,0x226,0x1b60,0x54f,0x226,0x226,0x1b70,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x81d,0x226,0x226,0x1b80,0x226,0x1b90,0x1b9d,0x1ba9,0x226,0x226,
0x226,0x226,0x414,0x226,0x1bb4,0x1bc4,0x226,0x226,0x226,0x812,0x226,0x226,0x226,0x226,0x1bd4,0x226,
0x226,0x226,0x1bdf,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1be6,0x226,0x226,
0x226,0x226,0x1bf1,0x1c00,0x928,0x1c0e,0x412,0x1c1c,0x1c2c,0x226,0x1c34,0x1c42,0x87f,0x226,0x226,0x226,
0x226,0x1c52,0x7ca,0x226,0x226,0x226,0x226,0x226,0x1c62,0x1c71,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x908,0x1c79,0x1c89,0x226,0x226,0x226,0x9ec,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x1c93,0x226,0x226,0x226,0x226,0x226,0x226,0x818,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c90,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca3,0x812,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x87f,0x226,0x226,0x226,0x81f,0x81c,0x226,0x226,0x226,0x226,0x81a,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x9ec,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc06,0x226,0x226,0x226,0x226,0x81c,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0xc09,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x1ca2,0x1cb1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x1cc1,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cce,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x1cb2,0x1cc1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x1cd1,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cde,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ce0,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x1cef,0x1cff,0x1d0d,0x1d1a,0x226,0x1d26,0x1d34,0x1d44,0x226,0x226,0x226,0x226,0xd1c,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cee,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cf0,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d54,0x1d5c,0x1d6a,0x226,0x226,0x226,0x226,0x226,
0x1cff,0x1d0f,0x1d1d,0x1d2a,0x226,0x1d36,0x1d44,0x1d54,0x226,0x226,0x226,0x226,0xd1c,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d64,0x1d6c,0x1d7a,0x226,0x226,0x226,0x226,0x226,
0x4f9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7ca,0x226,0x226,0x226,0x4fc,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d75,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d85,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x5c1,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d85,0x226,0x226,0x226,
0x226,0x226,0x226,0x1d91,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1da1,
0x1db1,0x1dc1,0x1dd1,0x1de1,0x1df1,0x1e01,0x1e11,0x1e21,0x1e31,0x1e41,0x1e51,0x1e61,0x1e71,0x1e81,0x1e91,0x1ea1,
0x1eb1,0x1ec1,0x1ed1,0x1ee1,0x1ef1,0x1f01,0x1f11,0x1f21,0x1f31,0x1f41,0x1f51,0x1f61,0x1f71,0x1f81,0x1f91,0x1fa1,
0x1fb1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408,
0x428,0x440,0xc4,0xc4,0x460,0x46f,0x486,0x4a2,0x4bf,0x4dd,0x4fa,0x517,0x536,0x553,0x56d,0xc4,
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x582,
0xc4,0xc4,0xc4,0xc4,0x595,0x5a9,0x5c0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d95,0x7d3,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1da5,
0x226,0x226,0x226,0x226,0x226,0x226,0x1db1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x1dc1,0x1dd1,0x1de1,0x1df1,0x1e01,0x1e11,0x1e21,0x1e31,0x1e41,0x1e51,0x1e61,0x1e71,0x1e81,0x1e91,
0x1ea1,0x1eb1,0x1ec1,0x1ed1,0x1ee1,0x1ef1,0x1f01,0x1f11,0x1f21,0x1f31,0x1f41,0x1f51,0x1f61,0x1f71,0x1f81,0x1f91,
0x1fa1,0x1fb1,0x1fc1,0x1fd1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
0x226,0x226,0x408,0x428,0x440,0xc4,0xc4,0x460,0x46f,0x486,0x4a2,0x4bf,0x4dd,0x4fa,0x517,0x536,
0x553,0x56d,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
0xc4,0xc4,0x582,0xc4,0xc4,0xc4,0xc4,0x595,0x5a9,0x5c0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
0xc4,0xc4,0xc4,0xc4,0xc4,0x5e0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5eb,0x608,
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x628,0x63e,0x650,0xc4,0x66f,0xc4,0xc4,0xc4,0xc4,0xc4,
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5e0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
0xc4,0x5eb,0x608,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x628,0x63e,0x650,0x66f,0x682,0xc4,0xc4,
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x68f,0x6af
0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x6a2,0x6c2
};
static const uint16_t norm2_nfc_data_trieData[8129]={
static const uint16_t norm2_nfc_data_trieData[8162]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@ -371,9 +372,9 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
0xffb8,0xffcc,0xffcc,0xffb8,1,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,
0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
0xffcc,0xffcc,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,
0xffd4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,0x8c4,0x1a65,0x8c8,0x1a6b,0x8cc,0x1a71,0x8d0,0x1a77,0x8d4,0x1a7d,1,
1,0x8d8,0x1a83,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@ -575,6 +576,7 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
1,1,1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,
1,1,0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,
0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
1,1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,1,0xffb8,0xffb8,0xffb8,
1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,
1,1,0xffcc,0xffb8,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,
0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
@ -610,7 +612,8 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,
0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,
0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,
1,0xffd0,0xffd0,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,
1,0xffd0,0xffd0,0xffb8,0xffcc,1,1,1,0xffcc,1,1,0xffcc,1,1,1,1,
1,1,1,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,
1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,1,1,1,
1,0x33e5,0x33e9,0x33ed,0x33f1,0x33f7,0x2fd7,0x33fb,0x33ff,0x3403,0x3407,0x2fdb,0x340b,0x340f,0x3413,0x2fdf,
0x3419,0x341d,0x3421,0x3425,0x342b,0x342f,0x3433,0x3437,0x343d,0x3441,0x3445,0x3449,0x30c9,0x344d,0x3453,0x3457,
@ -646,13 +649,13 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
0x3b5f,0x3b63,0x3b67,0x3b6d,0x3b71,0x3b75,0x3b79,0x3b7d,0x3b83,0x3b89,0x3b8d,0x3b91,0x3b95,0x3b9b,0x3b9f,0x31d1,
0x31d1,0x3ba5,0x3ba9,0x3baf,0x3bb3,0x3bb7,0x3bbb,0x3bbf,0x3bc3,0x3bc7,0x3bcb,0x31d5,0x3bd1,0x3bd5,0x3bd9,0x3bdd,
0x3be1,0x3be5,0x3beb,0x3bef,0x3bf5,0x3bfb,0x3c01,0x3c05,0x3c09,0x3c0d,0x3c11,0x3c15,0x3c19,0x3c1d,0x3c21,1,
1
1,1
};
static const UCPTrie norm2_nfc_data_trie={
norm2_nfc_data_trieIndex,
{ norm2_nfc_data_trieData },
1869, 8129,
1888, 8162,
0x2fc00, 0x30,
0, 0,
0, 0,
@ -1160,7 +1163,7 @@ static const uint16_t norm2_nfc_data_extraData[7918]={
static const uint8_t norm2_nfc_data_smallFCD[256]={
0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xd7,0xe6,0x66,0x46,0x66,0x46,0x66,0x5b,
0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff,
0x12,0,0,4,0,0,0,0x43,0x20,2,0xe9,0xae,0xc2,0xc0,0xff,0xff,
0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0,
0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

View File

@ -243,32 +243,36 @@ private:
* this normalizer2impl.h and in the design doc at
* https://unicode-org.github.io/icu/design/normalization/custom.html
*/
class U_COMMON_API Normalizer2Impl : public UObject {
class U_COMMON_API_CLASS Normalizer2Impl : public UObject {
public:
Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {}
virtual ~Normalizer2Impl();
U_COMMON_API Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {}
U_COMMON_API virtual ~Normalizer2Impl();
void init(const int32_t *inIndexes, const UCPTrie *inTrie,
const uint16_t *inExtraData, const uint8_t *inSmallFCD);
U_COMMON_API void init(const int32_t* inIndexes,
const UCPTrie* inTrie,
const uint16_t* inExtraData,
const uint8_t* inSmallFCD);
void addLcccChars(UnicodeSet &set) const;
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
U_COMMON_API void addLcccChars(UnicodeSet& set) const;
U_COMMON_API void addPropertyStarts(const USetAdder* sa, UErrorCode& errorCode) const;
U_COMMON_API void addCanonIterPropertyStarts(const USetAdder* sa, UErrorCode& errorCode) const;
// low-level properties ------------------------------------------------ ***
UBool ensureCanonIterData(UErrorCode &errorCode) const;
U_COMMON_API UBool ensureCanonIterData(UErrorCode& errorCode) const;
// The trie stores values for lead surrogate code *units*.
// Surrogate code *points* are inert.
uint16_t getNorm16(UChar32 c) const {
U_COMMON_API uint16_t getNorm16(UChar32 c) const {
return U_IS_LEAD(c) ?
static_cast<uint16_t>(INERT) :
UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
}
uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }
U_COMMON_API uint16_t getRawNorm16(UChar32 c) const {
return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
}
UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
U_COMMON_API UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
return UNORM_YES;
} else if(minMaybeNo<=norm16) {
@ -277,11 +281,17 @@ public:
return UNORM_NO;
}
}
UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeNo; }
UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeNo; }
UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
U_COMMON_API UBool isAlgorithmicNoNo(uint16_t norm16) const {
return limitNoNo <= norm16 && norm16 < minMaybeNo;
}
U_COMMON_API UBool isCompNo(uint16_t norm16) const {
return minNoNo <= norm16 && norm16 < minMaybeNo;
}
U_COMMON_API UBool isDecompYes(uint16_t norm16) const {
return norm16 < minYesNo || minMaybeYes <= norm16;
}
uint8_t getCC(uint16_t norm16) const {
U_COMMON_API uint8_t getCC(uint16_t norm16) const {
if(norm16>=MIN_NORMAL_MAYBE_YES) {
return getCCFromNormalYesOrMaybe(norm16);
}
@ -290,13 +300,13 @@ public:
}
return getCCFromNoNo(norm16);
}
static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
U_COMMON_API static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
return static_cast<uint8_t>(norm16 >> OFFSET_SHIFT);
}
static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) {
U_COMMON_API static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
}
uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const {
U_COMMON_API uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const {
if (c < minCompNoMaybeCP) { return 0; }
return getCCFromYesOrMaybeYes(getNorm16(c));
}
@ -306,7 +316,7 @@ public:
* @param c A Unicode code point.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t getFCD16(UChar32 c) const {
U_COMMON_API uint16_t getFCD16(UChar32 c) const {
if(c<minDecompNoCP) {
return 0;
} else if(c<=0xffff) {
@ -322,7 +332,7 @@ public:
* @param limit The end of the string, or NULL.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t nextFCD16(const char16_t *&s, const char16_t *limit) const {
U_COMMON_API uint16_t nextFCD16(const char16_t*& s, const char16_t* limit) const {
UChar32 c=*s++;
if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
return 0;
@ -340,7 +350,7 @@ public:
* @param s A valid pointer into a string. Requires start<s.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t previousFCD16(const char16_t *start, const char16_t *&s) const {
U_COMMON_API uint16_t previousFCD16(const char16_t* start, const char16_t*& s) const {
UChar32 c=*--s;
if(c<minDecompNoCP) {
return 0;
@ -360,16 +370,16 @@ public:
}
/** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
U_COMMON_API UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
// 0<=lead<=0xffff
uint8_t bits=smallFCD[lead>>8];
if(bits==0) { return false; }
return (bits >> ((lead >> 5) & 7)) & 1;
}
/** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const;
U_COMMON_API uint16_t getFCD16FromNormData(UChar32 c) const;
uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const;
U_COMMON_API uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const;
/**
* Gets the decomposition for one code point.
@ -378,7 +388,7 @@ public:
* @param length out-only, takes the length of the decomposition, if any
* @return pointer to the decomposition, or NULL if none
*/
const char16_t *getDecomposition(UChar32 c, char16_t buffer[4], int32_t &length) const;
U_COMMON_API const char16_t* getDecomposition(UChar32 c, char16_t buffer[4], int32_t& length) const;
/**
* Gets the raw decomposition for one code point.
@ -387,12 +397,14 @@ public:
* @param length out-only, takes the length of the decomposition, if any
* @return pointer to the decomposition, or NULL if none
*/
const char16_t *getRawDecomposition(UChar32 c, char16_t buffer[30], int32_t &length) const;
U_COMMON_API const char16_t* getRawDecomposition(UChar32 c,
char16_t buffer[30],
int32_t& length) const;
UChar32 composePair(UChar32 a, UChar32 b) const;
U_COMMON_API UChar32 composePair(UChar32 a, UChar32 b) const;
UBool isCanonSegmentStarter(UChar32 c) const;
UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
U_COMMON_API UBool isCanonSegmentStarter(UChar32 c) const;
U_COMMON_API UBool getCanonStartSet(UChar32 c, UnicodeSet& set) const;
enum {
// Fixed norm16 values.
@ -481,39 +493,51 @@ public:
// higher-level functionality ------------------------------------------ ***
// NFD without an NFD Normalizer2 instance.
UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,
U_COMMON_API UnicodeString& decompose(const UnicodeString& src,
UnicodeString& dest,
UErrorCode& errorCode) const;
/**
* Decomposes [src, limit[ and writes the result to dest.
* limit can be NULL if src is NUL-terminated.
* destLengthEstimate is the initial dest buffer capacity and can be -1.
*/
void decompose(const char16_t *src, const char16_t *limit,
UnicodeString &dest, int32_t destLengthEstimate,
U_COMMON_API void decompose(const char16_t* src,
const char16_t* limit,
UnicodeString& dest,
int32_t destLengthEstimate,
UErrorCode& errorCode) const;
const char16_t *decompose(const char16_t *src, const char16_t *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void decomposeAndAppend(const char16_t *src, const char16_t *limit,
U_COMMON_API const char16_t* decompose(const char16_t* src,
const char16_t* limit,
ReorderingBuffer* buffer,
UErrorCode& errorCode) const;
U_COMMON_API void decomposeAndAppend(const char16_t* src,
const char16_t* limit,
UBool doDecompose,
UnicodeString& safeMiddle,
ReorderingBuffer& buffer,
UErrorCode& errorCode) const;
/** sink==nullptr: isNormalized()/spanQuickCheckYes() */
const uint8_t *decomposeUTF8(uint32_t options,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const;
U_COMMON_API const uint8_t* decomposeUTF8(uint32_t options,
const uint8_t* src,
const uint8_t* limit,
ByteSink* sink,
Edits* edits,
UErrorCode& errorCode) const;
UBool compose(const char16_t *src, const char16_t *limit,
U_COMMON_API UBool compose(const char16_t* src,
const char16_t* limit,
UBool onlyContiguous,
UBool doCompose,
ReorderingBuffer& buffer,
UErrorCode& errorCode) const;
const char16_t *composeQuickCheck(const char16_t *src, const char16_t *limit,
U_COMMON_API const char16_t* composeQuickCheck(const char16_t* src,
const char16_t* limit,
UBool onlyContiguous,
UNormalizationCheckResult* pQCResult) const;
void composeAndAppend(const char16_t *src, const char16_t *limit,
U_COMMON_API void composeAndAppend(const char16_t* src,
const char16_t* limit,
UBool doCompose,
UBool onlyContiguous,
UnicodeString& safeMiddle,
@ -521,31 +545,38 @@ public:
UErrorCode& errorCode) const;
/** sink==nullptr: isNormalized() */
UBool composeUTF8(uint32_t options, UBool onlyContiguous,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
U_COMMON_API UBool composeUTF8(uint32_t options,
UBool onlyContiguous,
const uint8_t* src,
const uint8_t* limit,
ByteSink* sink,
icu::Edits* edits,
UErrorCode& errorCode) const;
const char16_t *makeFCD(const char16_t *src, const char16_t *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void makeFCDAndAppend(const char16_t *src, const char16_t *limit,
U_COMMON_API const char16_t* makeFCD(const char16_t* src,
const char16_t* limit,
ReorderingBuffer* buffer,
UErrorCode& errorCode) const;
U_COMMON_API void makeFCDAndAppend(const char16_t* src,
const char16_t* limit,
UBool doMakeFCD,
UnicodeString& safeMiddle,
ReorderingBuffer& buffer,
UErrorCode& errorCode) const;
UBool hasDecompBoundaryBefore(UChar32 c) const;
UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
UBool hasDecompBoundaryAfter(UChar32 c) const;
UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
U_COMMON_API UBool hasDecompBoundaryBefore(UChar32 c) const;
U_COMMON_API UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
U_COMMON_API UBool hasDecompBoundaryAfter(UChar32 c) const;
U_COMMON_API UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
U_COMMON_API UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
UBool hasCompBoundaryBefore(UChar32 c) const {
U_COMMON_API UBool hasCompBoundaryBefore(UChar32 c) const {
return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
}
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
U_COMMON_API UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
}
UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
U_COMMON_API UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
uint16_t norm16=getNorm16(c);
return isCompYesAndZeroCC(norm16) &&
(norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
@ -553,9 +584,10 @@ public:
// The last check fetches the mapping's first unit and checks tccc<=1.
}
UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
U_COMMON_API UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
U_COMMON_API UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
U_COMMON_API UBool isFCDInert(UChar32 c) const { return getFCD16(c) <= 1; }
private:
friend class InitCanonIterData;
friend class LcccContext;

File diff suppressed because it is too large Load Diff

View File

@ -135,7 +135,7 @@ struct RBBIStateTable {
uint32_t fNumStates; // Number of states.
uint32_t fRowLen; // Length of a state table row, in bytes.
uint32_t fDictCategoriesStart; // Char category number of the first dictionary
// char class, or the the largest category number + 1
// char class, or the largest category number + 1
// if there are no dictionary categories.
uint32_t fLookAheadResultsSize; // Size of run-time array required for holding
// look-ahead results. Indexed by row.fLookAhead.

View File

@ -1020,7 +1020,7 @@ void RBBIRuleScanner::parse() {
// Main loop for the rule parsing state machine.
// Runs once per state transition.
// Each time through optionally performs, depending on the state table,
// - an advance to the the next input char
// - an advance to the next input char
// - an action to be performed.
// - pushing or popping a state to/from the local state return stack.
//

View File

@ -328,9 +328,10 @@ int32_t RBBISetBuilder::getTrieSize() {
UCPTRIE_TYPE_FAST,
use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16,
fStatus);
fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, fStatus);
if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
*fStatus = U_ZERO_ERROR;
UErrorCode bufferStatus = *fStatus;
fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, &bufferStatus);
if (bufferStatus != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(bufferStatus)) {
*fStatus = bufferStatus;
}
}
return fTrieSize;

View File

@ -46,7 +46,7 @@ public:
int32_t fNum {0}; // runtime-mapped input value for this range.
bool fIncludesDict {false}; // True if the range includes $dictionary.
bool fFirstInGroup {false}; // True if first range in a group with the same fNum.
UVector *fIncludesSets {nullptr}; // vector of the the original
UVector *fIncludesSets {nullptr}; // vector of the original
// Unicode sets that include this range.
// (Contains ptrs to uset nodes)
RangeDescriptor *fNext {nullptr}; // Next RangeDescriptor in the linked list.

View File

@ -1442,7 +1442,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
// 1. Identify pairs of character classes that are "safe." Safe means that boundaries
// following the pair do not depend on context or state before the pair. To test
// whether a pair is safe, run it through the main forward state table, starting
// from each state. If the the final state is the same, no matter what the starting state,
// from each state. If the final state is the same, no matter what the starting state,
// the pair is safe.
//
// 2. Build a state table that recognizes the safe pairs. It's similar to their

View File

@ -422,7 +422,7 @@ public:
******************************************************************
*/
class U_COMMON_API ICULocaleService : public ICUService
class U_COMMON_API_CLASS ICULocaleService : public ICUService
{
private:
Locale fallbackLocale;
@ -432,17 +432,17 @@ class U_COMMON_API ICULocaleService : public ICUService
/**
* Construct an ICULocaleService.
*/
ICULocaleService();
U_COMMON_API ICULocaleService();
/**
* Construct an ICULocaleService with a name (useful for debugging).
*/
ICULocaleService(const UnicodeString& name);
U_COMMON_API ICULocaleService(const UnicodeString& name);
/**
* Destructor.
*/
virtual ~ICULocaleService();
U_COMMON_API virtual ~ICULocaleService();
#if 0
// redeclare because of overload resolution rules?
@ -462,19 +462,19 @@ class U_COMMON_API ICULocaleService : public ICUService
* get(Locale, int, Locale[]) with KIND_ANY for kind and null for
* actualReturn.
*/
UObject* get(const Locale& locale, UErrorCode& status) const;
U_COMMON_API UObject* get(const Locale& locale, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This calls
* get(Locale, int, Locale[]) with a null actualReturn.
*/
UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
U_COMMON_API UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This calls
* get(Locale, String, Locale[]) with a null kind.
*/
UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
U_COMMON_API UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This uses
@ -482,27 +482,27 @@ class U_COMMON_API ICULocaleService : public ICUService
* if actualReturn is not null, returns the actualResult from
* getKey (stripping any prefix) into a Locale.
*/
UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
U_COMMON_API UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This calls
* registerObject(Object, Locale, int32_t kind, int coverage)
* passing KIND_ANY for the kind, and VISIBLE for the coverage.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
/**
* Convenience function for callers using locales. This calls
* registerObject(Object, Locale, int kind, int coverage)
* passing VISIBLE for the coverage.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
/**
* Convenience function for callers using locales. This instantiates
* a SimpleLocaleKeyFactory, and registers the factory.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
/**
@ -512,13 +512,13 @@ class U_COMMON_API ICULocaleService : public ICUService
* We really need a flag that is understood by all compilers that will suppress the warning about
* hidden overrides.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) override;
U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) override;
/**
* Convenience method for callers using locales. This returns the standard
* service ID enumeration.
*/
virtual StringEnumeration* getAvailableLocales() const;
U_COMMON_API virtual StringEnumeration* getAvailableLocales() const;
protected:
@ -526,17 +526,17 @@ class U_COMMON_API ICULocaleService : public ICUService
* Return the name of the current fallback locale. If it has changed since this was
* last accessed, the service cache is cleared.
*/
const UnicodeString& validateFallbackLocale() const;
U_COMMON_API const UnicodeString& validateFallbackLocale() const;
/**
* Override superclass createKey method.
*/
virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const override;
U_COMMON_API virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const override;
/**
* Additional createKey that takes a kind.
*/
virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
U_COMMON_API virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
friend class ServiceEnumeration;
};

View File

@ -51,28 +51,28 @@ private:
* Either stack-allocate, use LocalPointer, or use addRef()/removeRef().
* Sharing requires reference-counting.
*/
class U_COMMON_API SharedObject : public UObject {
class U_COMMON_API_CLASS SharedObject : public UObject {
public:
/** Initializes totalRefCount, softRefCount to 0. */
SharedObject() :
U_COMMON_API SharedObject() :
softRefCount(0),
hardRefCount(0),
cachePtr(nullptr) {}
/** Initializes totalRefCount, softRefCount to 0. */
SharedObject(const SharedObject &other) :
U_COMMON_API SharedObject(const SharedObject &other) :
UObject(other),
softRefCount(0),
hardRefCount(0),
cachePtr(nullptr) {}
virtual ~SharedObject();
U_COMMON_API virtual ~SharedObject();
/**
* Increments the number of hard references to this object. Thread-safe.
* Not for use from within the Unified Cache implementation.
*/
void addRef() const;
U_COMMON_API void addRef() const;
/**
* Decrements the number of hard references to this object, and
@ -81,32 +81,32 @@ public:
*
* Not for use from within the UnifiedCache implementation.
*/
void removeRef() const;
U_COMMON_API void removeRef() const;
/**
* Returns the number of hard references for this object.
* Uses a memory barrier.
*/
int32_t getRefCount() const;
U_COMMON_API int32_t getRefCount() const;
/**
* If noHardReferences() == true then this object has no hard references.
* Must be called only from within the internals of UnifiedCache.
*/
inline UBool noHardReferences() const { return getRefCount() == 0; }
U_COMMON_API inline UBool noHardReferences() const { return getRefCount() == 0; }
/**
* If hasHardReferences() == true then this object has hard references.
* Must be called only from within the internals of UnifiedCache.
*/
inline UBool hasHardReferences() const { return getRefCount() != 0; }
U_COMMON_API inline UBool hasHardReferences() const { return getRefCount() != 0; }
/**
* Deletes this object if it has no references.
* Available for non-cached SharedObjects only. Ownership of cached objects
* is with the UnifiedCache, which is solely responsible for eviction and deletion.
*/
void deleteIfZeroRefCount() const;
U_COMMON_API void deleteIfZeroRefCount() const;
/**

View File

@ -187,7 +187,13 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
// The following don't currently have parseLenients in data.
U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr);
gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr);
// This set of characters was manually curated from the
// values of the approximatelySign element of CLDR common/main/*.xml files.
gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[~≈≃約]", status);
if (U_FAILURE(status)) { return; }
U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);

View File

@ -56,6 +56,7 @@ enum Key {
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_SIGN,
APPROXIMATELY_SIGN,
// Currency Symbols
DOLLAR_SIGN,

File diff suppressed because it is too large Load Diff

View File

@ -9,11 +9,11 @@
#ifdef INCLUDED_FROM_UCASE_CPP
static const UVersionInfo ucase_props_dataVersion={0x10,0,0,0};
static const UVersionInfo ucase_props_dataVersion={0x11,0,0,0};
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x78bc,0x6888,0x688,0x172,0,0,0,0,0,0,0,0,0,0,3};
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x7a14,0x69e0,0x688,0x172,0,0,0,0,0,0,0,0,0,0,3};
static const uint16_t ucase_props_trieIndex[13372]={
static const uint16_t ucase_props_trieIndex[13544]={
0x363,0x36b,0x373,0x37b,0x389,0x391,0x399,0x3a1,0x3a9,0x3b1,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,
0x3e6,0x3ee,0x3f6,0x3fe,0x406,0x40e,0x416,0x41e,0x426,0x42e,0x436,0x43e,0x446,0x44e,0x456,0x45e,
0x466,0x46e,0x476,0x47e,0x486,0x48e,0x496,0x49e,0x49a,0x4a2,0x4a7,0x4af,0x4b6,0x4be,0x4c6,0x4ce,
@ -27,18 +27,18 @@ static const uint16_t ucase_props_trieIndex[13372]={
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x693,0x699,0x581,0x581,0x382,0x69f,0x6a7,0x382,
0x6af,0x382,0x6b7,0x382,0x6be,0x6c4,0x382,0x382,0x382,0x6cc,0x382,0x382,0x382,0x382,0x382,0x382,
0x6d3,0x382,0x6da,0x6e2,0x382,0x6ea,0x6f2,0x382,0x5b1,0x6f6,0x6fe,0x704,0x5f3,0x70c,0x382,0x713,
0x382,0x718,0x382,0x71e,0x726,0x72a,0x732,0x73a,0x742,0x747,0x74a,0x752,0x762,0x75a,0x772,0x76a,
0x3a9,0x77a,0x3a9,0x782,0x785,0x3a9,0x78d,0x3a9,0x795,0x79d,0x7a5,0x7ad,0x7b5,0x7bd,0x7c5,0x7cd,
0x7d5,0x7dc,0x382,0x7e4,0x7ec,0x382,0x7f4,0x7fc,0x804,0x80c,0x814,0x81c,0x824,0x382,0x382,0x382,
0x6d3,0x382,0x6da,0x6e2,0x382,0x6ea,0x6fa,0x6f2,0x5b1,0x702,0x70a,0x710,0x5f3,0x718,0x382,0x71f,
0x382,0x724,0x382,0x72a,0x732,0x736,0x73e,0x746,0x74e,0x753,0x756,0x75e,0x76e,0x766,0x77e,0x776,
0x3a9,0x786,0x3a9,0x78e,0x791,0x3a9,0x799,0x3a9,0x7a1,0x7a9,0x7b1,0x7b9,0x7c1,0x7c9,0x7d1,0x7d9,
0x7e1,0x7e8,0x382,0x7f0,0x7f8,0x382,0x800,0x808,0x810,0x818,0x820,0x828,0x830,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x827,0x82d,0x833,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x833,0x839,0x83f,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x83b,0x83f,0x843,0x84b,0x3a9,0x3a9,0x3a9,0x853,0x85b,0x862,0x382,0x867,0x382,0x382,0x382,0x86f,
0x847,0x84b,0x84f,0x857,0x3a9,0x3a9,0x3a9,0x85f,0x867,0x86e,0x382,0x873,0x382,0x382,0x382,0x87b,
0x382,0x6b4,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x573,0x877,0x382,0x382,0x87e,0x382,0x382,0x886,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x573,0x883,0x382,0x382,0x88a,0x382,0x382,0x892,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
@ -94,12 +94,12 @@ static const uint16_t ucase_props_trieIndex[13372]={
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x88e,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x89a,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x71e,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x894,0x382,0x89c,0x8a1,0x8a9,0x382,0x382,0x8b1,0x8b9,0x8c1,0x3a9,0x8c6,0x8ce,0x8d4,0x8db,0x8e3,
0x8eb,0x8f2,0x382,0x382,0x382,0x382,0x8f9,0x901,0x382,0x909,0x910,0x382,0x55e,0x915,0x91d,0x6be,
0x382,0x923,0x92b,0x92f,0x382,0x937,0x93f,0x947,0x382,0x94d,0x951,0x959,0x969,0x961,0x382,0x971,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x72a,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x8a0,0x382,0x8a8,0x8ad,0x8b5,0x382,0x382,0x8bd,0x8c5,0x8cd,0x3a9,0x8d2,0x8da,0x8e0,0x8e7,0x8ef,
0x8f7,0x8fe,0x382,0x382,0x382,0x382,0x905,0x90d,0x382,0x915,0x91c,0x382,0x55e,0x921,0x929,0x6be,
0x382,0x92f,0x937,0x93b,0x382,0x943,0x94b,0x953,0x382,0x959,0x95d,0x965,0x975,0x96d,0x382,0x97d,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
@ -139,9 +139,9 @@ static const uint16_t ucase_props_trieIndex[13372]={
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x979,0x382,0x382,0x382,0x382,0x981,0x55e,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x985,0x382,0x382,0x382,0x382,0x98d,0x55e,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x986,0x98e,0x992,0x382,0x382,0x382,0x382,0x365,0x36b,0x99a,0x9a2,0x9a9,0x519,0x382,0x382,0x9b1,
0x992,0x99a,0x99e,0x382,0x382,0x382,0x382,0x365,0x36b,0x9a6,0x9ae,0x9b5,0x519,0x382,0x382,0x9bd,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0xe0c,0xe0c,0xe24,0xe64,0xea4,0xee0,0xf20,0xf60,0xf98,0xfd8,0x1018,0x1058,0x1098,0x10d8,0x1118,0x1158,
@ -173,61 +173,61 @@ static const uint16_t ucase_props_trieIndex[13372]={
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0xd4b,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x9b8,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x9c0,0x382,0x382,0x382,0x9c3,0x382,0x382,0x382,
0x382,0x9cb,0x9d1,0x9d5,0x382,0x382,0x9d9,0x9dd,0x9e3,0x382,0x382,0x382,0x9ea,0x9ee,0x9f6,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa06,0x9fe,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa0e,
0xa12,0x382,0x382,0x382,0x382,0x382,0xa1a,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0xa22,0xa26,0xa2e,0xa32,0x382,0xa39,0xa3e,0xa45,0xa4c,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0xa52,0x382,0xa56,0x382,0x382,0xa5e,0x382,0xa66,0x382,0x382,0x382,0x574,
0xa68,0xa6f,0xa73,0x5f3,0xa7b,0xa83,0x382,0xa8b,0xa92,0x382,0xa98,0x5f3,0xa9d,0xaa5,0x382,0x382,
0xaaa,0x574,0x382,0x382,0x382,0x365,0xab2,0x5f3,0x5f5,0xaba,0xac1,0x382,0xa8d,0xac9,0x58d,0x382,
0xa68,0xad1,0x382,0x382,0xad9,0xae1,0x382,0x382,0x382,0x382,0x382,0x382,0xae5,0xaed,0x382,0x382,
0xaf5,0x4dd,0x382,0x382,0xafd,0x382,0x382,0xb03,0xb0b,0x382,0x382,0x382,0x382,0x382,0x382,0xb10,
0x382,0x382,0x382,0xb18,0xb20,0x382,0x382,0xb28,0xb30,0x382,0x382,0x382,0xb33,0x9c0,0xb3b,0xb3f,
0xb47,0x382,0xb4e,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xb55,
0x382,0x382,0x981,0xb5d,0x382,0x382,0x382,0xb63,0xb6b,0x382,0xb6f,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0xb75,0x5f3,0xb7b,0xb83,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x9c4,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x9cc,0x382,0x382,0x382,0x9cf,0x382,0x382,0x382,
0x382,0x9d7,0x9dd,0x9e1,0x382,0x382,0x9e5,0x9e9,0x9ef,0x382,0x382,0x382,0x9f6,0x9fa,0xa02,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa12,0xa0a,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa1a,
0xa1e,0x382,0x382,0x382,0x382,0x382,0xa26,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0xa2e,0xa32,0xa3a,0xa3e,0x382,0xa45,0xa4a,0xa51,0xa58,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0xa5e,0x573,0xa62,0x382,0x382,0xa6a,0x382,0xa72,0x382,0x382,0x382,0x574,
0xa74,0xa7b,0xa7f,0x5f3,0xa87,0xa8f,0x382,0xa97,0xa9e,0x382,0xaa4,0x5f3,0xaa9,0xab1,0x382,0x382,
0xab6,0x574,0x382,0x382,0x382,0x365,0xabe,0x5f3,0x5f5,0xac6,0xacd,0x382,0xa99,0xad5,0x58d,0x382,
0xa74,0xadd,0x382,0x382,0xae5,0xaed,0x382,0x382,0x382,0x382,0x382,0x382,0xaf1,0xaf9,0x382,0x382,
0xb01,0x4dd,0x382,0x382,0xb09,0x382,0x382,0xb0f,0xb17,0x382,0x382,0x382,0x382,0x382,0x382,0xb1c,
0x382,0x382,0x382,0xb24,0xb2c,0x382,0x382,0xb34,0xb3c,0x382,0x382,0x382,0xb3f,0x9cc,0xb47,0xb4b,
0xb53,0x382,0xb5a,0x382,0x382,0x382,0x382,0x382,0x382,0xb62,0x382,0x382,0x382,0x382,0x382,0xb66,
0x382,0x382,0x98d,0xb6e,0x382,0x382,0x382,0xb74,0xb7c,0x382,0xb80,0x382,0x94c,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0xb86,0x5f3,0xb8c,0xb94,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xb8a,0xb92,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xb9b,0xba3,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x519,0xb9a,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x519,0xbab,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0xb9e,0x382,0xba4,0x5b1,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x55e,0xb77,0x382,0x382,0x382,0x382,0x382,0x382,0xb18,0xb20,0x382,0x382,
0x382,0x382,0x382,0x382,0x6b4,0x382,0xbaa,0x382,0x382,0xbb2,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0xbaf,0x382,0xbb5,0x5b1,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x55e,0xb88,0x382,0x382,0x382,0x382,0x382,0x382,0xb24,0xb2c,0x382,0xbbd,
0xbc4,0x382,0x382,0x382,0x6b4,0x382,0xbc9,0x382,0x382,0xbd1,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbb7,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbd6,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbbf,0x5b1,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbde,0x5b1,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8b9,0xbc7,0xbce,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0xbd5,0xbdd,0xbe3,0x382,0x382,0x382,0x382,0xbeb,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbf3,0xbfb,0xc00,0xc06,0xc0e,
0xc16,0xc1e,0xbf7,0xc26,0xc2e,0xc36,0xc3d,0xbf8,0xbf3,0xbfb,0xbf6,0xc06,0xbf9,0xbf4,0xc45,0xbf7,
0xc4d,0xc55,0xc5d,0xc64,0xc50,0xc58,0xc60,0xc67,0xc53,0xc6f,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8b9,0xc77,0x8b9,0xc7e,0xc85,0xc8d,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8c5,0xbe6,0xbed,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0xbf4,0xbfc,0xc02,0x382,0x382,0x382,0x382,0xc0a,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xc12,0xc1a,0xc1f,0xc25,0xc2d,
0xc35,0xc3d,0xc16,0xc45,0xc4d,0xc55,0xc5c,0xc17,0xc12,0xc1a,0xc15,0xc25,0xc18,0xc13,0xc64,0xc16,
0xc6c,0xc74,0xc7c,0xc83,0xc6f,0xc77,0xc7f,0xc86,0xc72,0xc8e,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8c5,0xc96,0x8c5,0xc9d,0xca4,0xcac,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0xc9d,0xca5,0x382,0x382,0x382,0x382,0x382,0x382,0xc95,0xcad,0xcc0,0xcb3,0xcb8,0x382,
0x382,0x382,0x382,0xcc8,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xad5,
0x382,0xa37,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0xcd0,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xcd5,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xcd9,0x382,0xce1,0xce9,0xcf0,0x382,
0x382,0x382,0xcbc,0xcc4,0x382,0x382,0x382,0x382,0x382,0x382,0xcb4,0xccc,0xcdf,0xcd2,0xcd7,0x382,
0x382,0x382,0x382,0xce7,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xae1,
0x382,0xa43,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0xcef,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xcf4,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0xcfc,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xd04,0x382,0xd0c,0xd14,0xd1b,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0xbef,0xcf8,0xcf8,0xcfe,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa8d,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0xc0e,0xd23,0xd23,0xd29,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa99,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x574,0x8b9,0x8b9,0x8b9,0x382,
0x382,0x382,0x382,0x8b9,0x8b9,0x8b9,0x8b9,0x8b9,0x8b9,0x8b9,0xd06,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x574,0x8c5,0x8c5,0x8c5,0x382,
0x382,0x382,0x382,0x8c5,0x8c5,0x8c5,0x8c5,0x8c5,0x8c5,0x8c5,0xd31,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x362,0,0,0,0,
@ -272,7 +272,7 @@ static const uint16_t ucase_props_trieIndex[13372]={
1,0x9b11,1,0x9a91,0x869,1,1,1,0x9991,0x889,1,0x9891,0x8a9,0x8c9,0x8e9,1,
0x97b1,0x9691,0x8e9,0x909,0x929,1,1,0x9691,1,0x949,0x9591,1,1,0x9511,1,1,
1,1,1,1,1,0x969,1,1,0x9311,1,0x989,0x9311,1,1,1,0x9a9,
0x9311,0xdd91,0x9391,0x9391,0xdc91,1,1,1,1,1,0x9291,1,0,1,1,1,
0x9311,0xdd91,0x9391,0x9391,0xdc91,1,1,1,1,1,0x9291,1,0,0,1,1,
1,1,1,1,1,0x9c9,0x9e9,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,5,5,0x25,5,5,5,5,5,
5,4,4,4,0x14,4,0x14,4,5,5,4,4,4,4,4,4,
@ -458,8 +458,11 @@ static const uint16_t ucase_props_trieIndex[13372]={
4,4,4,4,4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0,0,0x64,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,
0x64,0x64,0x64,0x44,0x44,0x64,4,0x64,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,
0x44,0x44,0x64,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
0x64,0x64,0x64,0x44,0x44,0x64,4,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,
0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,
4,4,4,0,4,0,0,0,0,0,4,0,0x60,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -581,9 +584,9 @@ static const uint16_t ucase_props_trieIndex[13372]={
4,4,4,0x92,0xff91,0x513a,1,0,0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x515a,0x517a,0x519a,0x51ba,0x515a,1,
0x51da,0x51fa,0x521a,0x523a,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
0xe812,0x525a,0x527a,0x92,0xff91,0x92,0xff91,0x529a,0x92,0xff91,0,0,0x92,0xff91,0,1,
0,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x52ba,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,5,
0xe812,0x525a,0x527a,0x92,0xff91,0x92,0xff91,0x529a,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x52ba,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
5,0x92,0xff91,0,5,5,1,0,0,0,0,0,0,0,4,0,
0,0,0x64,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,
@ -677,7 +680,7 @@ static const uint16_t ucase_props_trieIndex[13372]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,0x64,0x64,0x64,0,0,0,0,0,0,0x64,0x64,
0,0,0x64,0x64,4,0x64,0x64,0x64,0,0,0,0,0,0,0x64,0x64,
0x44,0x44,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x64,0x44,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -739,117 +742,125 @@ static const uint16_t ucase_props_trieIndex[13372]={
0,0,0,0x64,0,0,0,0,0,0,0,0,0,4,4,4,
4,4,4,0,0,4,4,4,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,0,4,4,4,4,
4,4,0,0x64,4,4,4,4,4,4,4,4,0,0,4,4,
4,4,4,4,4,0,4,4,0,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
4,4,4,0,0,0,4,0,4,4,0,4,4,4,0x64,4,
0x64,0x64,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,
4,0x64,0,0,0,0,0,0,4,0,4,4,4,0,4,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,0,0,0,0,0,4,0x60,0x64,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,0,
4,4,4,4,4,4,0,0x64,4,4,4,4,4,4,4,4,
0,0,4,4,4,4,4,4,4,0,4,4,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,0,0,0,4,4,0x64,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0,
0,4,4,4,4,4,4,0,0,0,4,0,4,4,0,4,
4,4,0x64,4,0x64,0x64,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,0,4,4,0,0,0,
0,0,0,0,0,0,0,0,0x60,0x60,0,0,0,0,0,0,
4,4,0,0,0,4,0,0x64,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,0,
4,0x60,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
0,4,4,4,4,4,4,4,0,4,4,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,0,0,0,4,4,0x64,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0,0,0,0,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,
0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0,0,0xf291,
0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,
0xf291,0xf291,0xf291,0xf291,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,0,4,4,0,0,0,0,0,0,0,
0,0,0,0,0x60,0x60,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,0,4,4,4,
4,4,4,4,0,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0x64,0,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,0,0,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
0,0,0,0,0,4,0x64,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,
0,0x60,0x60,0x60,0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,
0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,
0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,
0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,
0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,
0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,0,0x21,0x21,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
1,1,1,1,1,0,0x21,0x21,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,0,2,2,0,0,2,0,0,2,2,0,
0,2,2,2,2,0,2,2,2,2,2,2,2,2,1,1,
1,1,0,1,0,1,0x21,0x21,1,1,1,1,0,1,1,1,
1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,2,2,0,2,
2,2,2,0,0,2,2,2,2,2,2,2,2,0,2,2,
2,2,2,2,2,0,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,0,2,2,2,2,0,2,2,2,2,2,0,2,0,
0,0,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
2,0,2,2,0,0,2,0,0,2,2,0,0,2,2,2,
2,0,2,2,2,2,2,2,2,2,1,1,1,1,0,1,
0,1,0x21,0x21,1,1,1,1,0,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,1,1,1,1,1,1,0,0,2,2,2,2,
2,2,2,2,1,1,1,1,2,2,0,2,2,2,2,0,
0,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,0,2,
2,2,2,0,2,2,2,2,2,0,2,0,0,0,2,2,
2,2,2,2,2,0,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,0,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,0,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,
1,1,1,1,1,1,2,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,0,0,0,0,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,0,0,0,0,0,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
4,4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,
0x44,0x44,0x44,0x44,1,1,1,1,1,1,1,1,1,1,0,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x21,1,
1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,
0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,0x25,5,5,5,5,5,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
5,5,5,5,5,5,5,5,5,5,5,5,0x25,0x25,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,0x64,0x64,0x64,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,
0,0,0,0,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
0xef11,0xef11,0xef11,0xef11,0x44,0x44,0x44,0x44,0x44,0x44,0x64,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,
1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,0,1,1,1,1,
1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,
1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,0x21,1,1,1,1,0,
0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
0x25,5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
5,5,5,5,5,5,5,5,0x25,0x25,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,0x64,0x64,0x64,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x64,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x44,0,0,0x44,0,0,0,0,0,0,0,0x44,0x44,
0,0,0,0,0,0x44,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,
0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
0x1112,0x1112,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
0x44,0x44,0x44,0x44,0x44,0x44,0x64,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0
};
static const uint16_t ucase_props_exceptions[1672]={
@ -997,13 +1008,13 @@ static const UCaseProps ucase_props_singleton={
ucase_props_trieIndex+3468,
nullptr,
3468,
9904,
10076,
0x188,
0xe08,
0x0,
0x0,
0xe0800,
0x3438,
0x34e4,
nullptr, 0, false, false, 0, nullptr
},
{ 4,0,0,0 }

View File

@ -102,9 +102,9 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
return;
}
int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
*pErrorCode=U_ZERO_ERROR;
UErrorCode bufferStatus = U_ZERO_ERROR;
int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), &bufferStatus);
if(bufferStatus==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
/* we only really need the language code for case mappings */
length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
}

View File

@ -616,6 +616,33 @@ uscript_getScriptExtensions(UChar32 c,
return length;
}
namespace {
UBool U_CALLCONV
_scxRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
// From u_getUnicodeProperties(start, 0).
uint32_t vecWord = propsVectors[value]; // vecIndex=value, column 0
uint32_t scriptX = vecWord & UPROPS_SCRIPT_X_MASK;
if (scriptX >= UPROPS_SCRIPT_X_WITH_COMMON) {
// Code points start..end have Script_Extensions.
const USetAdder* sa = static_cast<const USetAdder*>(context);
sa->addRange(sa->set, start, end);
}
(void) value;
return true;
}
}
// for icuexportdata
U_CAPI void U_EXPORT2
uprv_addScriptExtensionsCodePoints(const USetAdder *sa, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return;
}
utrie2_enum(&propsVectorsTrie, nullptr, _scxRange, sa);
}
U_CAPI UBlockCode U_EXPORT2
ublock_getCode(UChar32 c) {
// We store Block values indexed by the code point shifted right 4 bits

File diff suppressed because it is too large Load Diff

View File

@ -1752,20 +1752,24 @@ ucnv_fromUChars(UConverter *cnv,
destLimit=dest+destCapacity;
/* perform the conversion */
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
UErrorCode bufferStatus = U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
destLength=(int32_t)(dest-originalDest);
/* if an overflow occurs, then get the preflighting length */
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) {
char buffer[1024];
destLimit=buffer+sizeof(buffer);
do {
dest=buffer;
*pErrorCode=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
bufferStatus=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
destLength+=(int32_t)(dest-buffer);
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
} while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
}
if (U_FAILURE(bufferStatus)) {
*pErrorCode = bufferStatus;
}
} else {
destLength=0;
@ -1808,22 +1812,26 @@ ucnv_toUChars(UConverter *cnv,
destLimit=dest+destCapacity;
/* perform the conversion */
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
UErrorCode bufferStatus = U_ZERO_ERROR;
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
destLength=(int32_t)(dest-originalDest);
/* if an overflow occurs, then get the preflighting length */
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
if(bufferStatus==U_BUFFER_OVERFLOW_ERROR)
{
char16_t buffer[1024];
destLimit=buffer+UPRV_LENGTHOF(buffer);
do {
dest=buffer;
*pErrorCode=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
bufferStatus=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
destLength+=(int32_t)(dest-buffer);
}
while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
}
if (U_FAILURE(bufferStatus)) {
*pErrorCode = bufferStatus;
}
} else {
destLength=0;

View File

@ -597,7 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
/* open the required converters and cache them */
myConverterData->myConverterArray[GB2312_1] =
ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
if(version==1) {
if(version>=1) {
myConverterData->myConverterArray[ISO_IR_165] =
ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
}

View File

@ -40,6 +40,7 @@
#include "uarrsort.h"
#include "uassert.h"
#include "udataswp.h"
#include "udatamem.h"
#include "cstring.h"
#include "cmemory.h"
#include "ucnv_io.h"
@ -235,6 +236,8 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
const uint32_t *sectionSizes;
uint32_t tableStart;
uint32_t currOffset;
int32_t sizeOfData;
int32_t sizeOfTOC;
ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
@ -245,13 +248,17 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
}
sectionSizes = static_cast<const uint32_t*>(udata_getMemory(data));
int32_t dataLength = udata_getLength(data); // This is the length minus the UDataInfo size
if (dataLength <= int32_t(sizeof(sectionSizes[0]))) {
// We don't even have a TOC!
goto invalidFormat;
}
table = reinterpret_cast<const uint16_t*>(sectionSizes);
tableStart = sectionSizes[0];
if (tableStart < minTocLength) {
errCode = U_INVALID_FORMAT_ERROR;
udata_close(data);
return;
sizeOfTOC = int32_t((tableStart + 1) * sizeof(sectionSizes[0]));
if (tableStart < minTocLength || dataLength <= sizeOfTOC) {
// We don't have a whole TOC!
goto invalidFormat;
}
gAliasData = data;
@ -264,11 +271,21 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
gMainTable.optionTableSize = sectionSizes[7];
gMainTable.stringTableSize = sectionSizes[8];
if (tableStart > 8) {
if (tableStart > minTocLength) {
gMainTable.normalizedStringTableSize = sectionSizes[9];
}
currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
sizeOfData = sizeOfTOC;
for (uint32_t section = 1; section <= tableStart; section++) {
sizeOfData += sectionSizes[section] * sizeof(table[0]);
}
if (dataLength < sizeOfData) {
// Truncated file!
goto invalidFormat;
}
// There may be some extra padding at the end, or this is a new file format with extra data that we can't read yet.
currOffset = (tableStart + 1) * (sizeof(uint32_t)/sizeof(uint16_t));
gMainTable.converterList = table + currOffset;
currOffset += gMainTable.converterListSize;
@ -306,6 +323,12 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
currOffset += gMainTable.stringTableSize;
gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
? gMainTable.stringTable : (table + currOffset));
return;
invalidFormat:
errCode = U_INVALID_FORMAT_ERROR;
udata_close(data);
}

View File

@ -1004,6 +1004,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
* and return it. */
pEntryData->mapAddr = dataMemory.mapAddr;
pEntryData->map = dataMemory.map;
pEntryData->length = dataMemory.length;
#ifdef UDATA_DEBUG
fprintf(stderr, "** Mapped file: %s\n", pathBuffer);

View File

@ -248,9 +248,10 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
if(srcIsASCII == false){
// step 2
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
UErrorCode bufferStatus = U_ZERO_ERROR;
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, &bufferStatus);
if(*status == U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
// we do not have enough room so grow the buffer
if(b1 != b1Stack){
@ -262,9 +263,12 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
bufferStatus = U_ZERO_ERROR; // reset error
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, &bufferStatus);
}
if (U_FAILURE(bufferStatus)) {
*status = bufferStatus;
}
}
// error bail out
@ -333,9 +337,10 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
// caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
// uprv_memset(caseFlags,true,b1Len);
b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
UErrorCode bufferStatus = U_ZERO_ERROR;
b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags,&bufferStatus);
if(*status == U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR));
@ -344,12 +349,13 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
bufferStatus = U_ZERO_ERROR; // reset error
b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags,&bufferStatus);
}
//error bail out
if(U_FAILURE(*status)){
if(U_FAILURE(bufferStatus)){
*status = bufferStatus;
goto CLEANUP;
}
// TODO : Reconsider while implementing the case preserve RFE
@ -454,8 +460,9 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
if(srcIsASCII == false){
// step 2: process the string
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
UErrorCode bufferStatus = U_ZERO_ERROR;
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, &bufferStatus);
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b1 = static_cast<char16_t*>(uprv_malloc(b1Len * U_SIZEOF_UCHAR));
@ -464,12 +471,13 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
bufferStatus = U_ZERO_ERROR; // reset error
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, &bufferStatus);
}
//bail out on error
if(U_FAILURE(*status)){
if(U_FAILURE(bufferStatus)){
*status = bufferStatus;
goto CLEANUP;
}
}else{
@ -493,9 +501,10 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
//step 5: Decode using punycode
b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
UErrorCode bufferStatus = U_ZERO_ERROR;
b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags, &bufferStatus);
if(*status == U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR));
@ -504,16 +513,16 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
bufferStatus = U_ZERO_ERROR; // reset error
b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, &bufferStatus);
}
//step 6:Apply toASCII
b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, &bufferStatus);
if(*status == U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b3 = static_cast<char16_t*>(uprv_malloc(b3Len * U_SIZEOF_UCHAR));
@ -522,13 +531,14 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
bufferStatus = U_ZERO_ERROR; // reset error
b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError,&bufferStatus);
}
//bail out on error
if(U_FAILURE(*status)){
if(U_FAILURE(bufferStatus)){
*status = bufferStatus;
goto CLEANUP;
}
@ -707,22 +717,19 @@ uidna_IDNToASCII( const char16_t *src, int32_t srcLength,
labelReqLength = 0;
if(!(labelLen==0 && done)){// make sure this is not a root label separator.
UErrorCode bufferStatus = U_ZERO_ERROR;
labelReqLength = _internal_toASCII( labelStart, labelLen,
currentDest, remainingDestCapacity,
options, nameprep,
parseError, status);
parseError, &bufferStatus);
if(*status == U_BUFFER_OVERFLOW_ERROR){
*status = U_ZERO_ERROR; // reset error
if (bufferStatus == U_BUFFER_OVERFLOW_ERROR) {
remainingDestCapacity = 0;
}
}
if(U_FAILURE(*status)){
} else if (U_FAILURE(bufferStatus)) {
*status = bufferStatus;
break;
}
}
reqLength +=labelReqLength;
// adjust the destination pointer
@ -877,8 +884,9 @@ uidna_compare( const char16_t *s1, int32_t length1,
UParseError parseError;
b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
UErrorCode bufferStatus = U_ZERO_ERROR;
b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, &bufferStatus);
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
if(b1==nullptr){
@ -886,14 +894,13 @@ uidna_compare( const char16_t *s1, int32_t length1,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
bufferStatus = U_ZERO_ERROR; // reset error
b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, &bufferStatus);
}
b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, &bufferStatus);
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
if(b2==nullptr){
@ -901,11 +908,15 @@ uidna_compare( const char16_t *s1, int32_t length1,
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
bufferStatus = U_ZERO_ERROR; // reset error
b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, &bufferStatus);
}
if (U_FAILURE(bufferStatus)) {
*status = bufferStatus;
}
// when toASCII is applied all label separators are replaced with FULL_STOP
result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);

View File

@ -627,7 +627,7 @@ ulocimp_getKeywords(std::string_view localeID,
do {
bool duplicate = false;
/* skip leading spaces */
while (localeID.front() == ' ') {
while (!localeID.empty() && localeID.front() == ' ') {
localeID.remove_prefix(1);
}
if (localeID.empty()) { /* handle trailing "; " */
@ -1102,7 +1102,21 @@ ulocimp_setKeywordValue(std::string_view keywords,
/* if input key/value specified removal of a keyword not present in locale, or
* there was an error in CharString.append, leave original locale alone. */
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
return static_cast<int32_t>(keywords.size());
// The sink is expected to be a buffer which already contains the full
// locale string, so when it isn't going to be modified there's no need
// to actually write any data to it, as the data is already there. Only
// the first character needs to be overwritten (changing '\0' to '@').
needLen = static_cast<int32_t>(keywords.size());
int32_t capacity = 0;
char* buffer = sink.GetAppendBuffer(
needLen, needLen, nullptr, needLen, &capacity);
if (capacity < needLen || buffer == nullptr) {
status = U_BUFFER_OVERFLOW_ERROR;
} else {
*buffer = '@';
sink.Append(buffer, needLen);
}
return needLen;
}
needLen = updatedKeysAndValues.length();

View File

@ -14,9 +14,9 @@
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "fixedstring.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uhash.h"
@ -53,7 +53,7 @@ struct TypeAlias : public icu::UMemory {
std::string_view from;
};
static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = nullptr;
static icu::MemoryPool<icu::FixedString>* gKeyTypeStringPool = nullptr;
static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = nullptr;
static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = nullptr;
static icu::MemoryPool<TypeAlias>* gTypeAliasEntries = nullptr;
@ -108,7 +108,7 @@ initFromResourceBundle(UErrorCode& sts) {
LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", nullptr, &tmpSts));
// initialize pools storing dynamically allocated objects
gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
gKeyTypeStringPool = new icu::MemoryPool<icu::FixedString>;
if (gKeyTypeStringPool == nullptr) {
sts = U_MEMORY_ALLOCATION_ERROR;
return;
@ -146,12 +146,12 @@ initFromResourceBundle(UErrorCode& sts) {
// empty value indicates that BCP key is same with the legacy key.
const char* bcpKeyId = legacyKeyId;
if (!uBcpKeyId.isEmpty()) {
icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
icu::FixedString* bcpKeyIdBuf = gKeyTypeStringPool->create();
if (bcpKeyIdBuf == nullptr) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
copyInvariantChars(uBcpKeyId, *bcpKeyIdBuf, sts);
if (U_FAILURE(sts)) {
break;
}
@ -220,18 +220,16 @@ initFromResourceBundle(UErrorCode& sts) {
// a timezone key uses a colon instead of a slash in the resource.
// e.g. America:Los_Angeles
if (uprv_strchr(legacyTypeId, ':') != nullptr) {
icu::CharString* legacyTypeIdBuf =
gKeyTypeStringPool->create(legacyTypeId, sts);
if (legacyTypeIdBuf == nullptr) {
U_ASSERT(legacyTypeId != nullptr && *legacyTypeId != '\0');
std::string_view legacyTypeIdView = legacyTypeId;
icu::FixedString* legacyTypeIdBuf = gKeyTypeStringPool->create(legacyTypeIdView);
if (legacyTypeIdBuf == nullptr || legacyTypeIdBuf->isEmpty()) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (U_FAILURE(sts)) {
break;
}
std::replace(
legacyTypeIdBuf->data(),
legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
legacyTypeIdBuf->getAlias(),
legacyTypeIdBuf->getAlias() + legacyTypeIdView.length(),
':', '/');
legacyTypeId = legacyTypeIdBuf->data();
}
@ -245,12 +243,12 @@ initFromResourceBundle(UErrorCode& sts) {
// empty value indicates that BCP type is same with the legacy type.
const char* bcpTypeId = legacyTypeId;
if (!uBcpTypeId.isEmpty()) {
icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
icu::FixedString* bcpTypeIdBuf = gKeyTypeStringPool->create();
if (bcpTypeIdBuf == nullptr) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
copyInvariantChars(uBcpTypeId, *bcpTypeIdBuf, sts);
if (U_FAILURE(sts)) {
break;
}
@ -302,20 +300,18 @@ initFromResourceBundle(UErrorCode& sts) {
if (isTZ) {
// replace colon with slash if necessary
if (uprv_strchr(from, ':') != nullptr) {
icu::CharString* fromBuf =
gKeyTypeStringPool->create(from, sts);
if (fromBuf == nullptr) {
U_ASSERT(from != nullptr && *from != '\0');
std::string_view fromView = from;
icu::FixedString* fromBuf = gKeyTypeStringPool->create(fromView);
if (fromBuf == nullptr || fromBuf->isEmpty()) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (U_FAILURE(sts)) {
break;
}
std::replace(
fromBuf->data(),
fromBuf->data() + fromBuf->length(),
fromBuf->getAlias(),
fromBuf->getAlias() + fromView.length(),
':', '/');
alias->from = fromBuf->toStringPiece();
alias->from = {fromBuf->data(), fromView.length()};
}
}
uhash_put(typeDataMap, &alias->from, t, &sts);

View File

@ -55,95 +55,95 @@ uloc_getCurrentCountryID(const char* oldID);
U_CFUNC const char*
uloc_getCurrentLanguageID(const char* oldID);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toBcpKeyWithFallback(std::string_view keyword);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toLegacyKeyWithFallback(std::string_view keyword);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getKeywords(std::string_view localeID,
char prev,
bool valuesToo,
UErrorCode& status);
U_EXPORT void
U_COMMON_API void
ulocimp_getKeywords(std::string_view localeID,
char prev,
icu::ByteSink& sink,
bool valuesToo,
UErrorCode& status);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getName(std::string_view localeID,
UErrorCode& err);
U_EXPORT void
U_COMMON_API void
ulocimp_getName(std::string_view localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getBaseName(std::string_view localeID,
UErrorCode& err);
U_EXPORT void
U_COMMON_API void
ulocimp_getBaseName(std::string_view localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_canonicalize(std::string_view localeID,
UErrorCode& err);
U_EXPORT void
U_COMMON_API void
ulocimp_canonicalize(std::string_view localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getKeywordValue(const char* localeID,
std::string_view keywordName,
UErrorCode& status);
U_EXPORT void
U_COMMON_API void
ulocimp_getKeywordValue(const char* localeID,
std::string_view keywordName,
icu::ByteSink& sink,
UErrorCode& status);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getScript(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getRegion(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getVariant(std::string_view localeID, UErrorCode& status);
U_EXPORT void
U_COMMON_API void
ulocimp_setKeywordValue(std::string_view keywordName,
std::string_view keywordValue,
icu::CharString& localeID,
UErrorCode& status);
U_EXPORT int32_t
U_COMMON_API int32_t
ulocimp_setKeywordValue(std::string_view keywords,
std::string_view keywordName,
std::string_view keywordValue,
icu::ByteSink& sink,
UErrorCode& status);
U_EXPORT void
U_COMMON_API void
ulocimp_getSubtags(
std::string_view localeID,
icu::CharString* language,
@ -153,7 +153,7 @@ ulocimp_getSubtags(
const char** pEnd,
UErrorCode& status);
U_EXPORT void
U_COMMON_API void
ulocimp_getSubtags(
std::string_view localeID,
icu::ByteSink* language,
@ -182,16 +182,16 @@ ulocimp_getSubtags(
status);
}
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getParent(const char* localeID,
UErrorCode& err);
U_EXPORT void
U_COMMON_API void
ulocimp_getParent(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_toLanguageTag(const char* localeID,
bool strict,
UErrorCode& status);
@ -215,13 +215,13 @@ ulocimp_toLanguageTag(const char* localeID,
*
* @internal ICU 64
*/
U_EXPORT void
U_COMMON_API void
ulocimp_toLanguageTag(const char* localeID,
icu::ByteSink& sink,
bool strict,
UErrorCode& err);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
int32_t* parsedLength,
@ -253,7 +253,7 @@ ulocimp_forLanguageTag(const char* langtag,
* failed.
* @internal ICU 63
*/
U_EXPORT void
U_COMMON_API void
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
icu::ByteSink& sink,
@ -280,11 +280,11 @@ ulocimp_forLanguageTag(const char* langtag,
* The region code found, empty if none found.
* @internal ICU 57
*/
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
UErrorCode& status);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_addLikelySubtags(const char* localeID,
UErrorCode& status);
@ -317,12 +317,12 @@ ulocimp_addLikelySubtags(const char* localeID,
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
* @internal ICU 64
*/
U_EXPORT void
U_COMMON_API void
ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode& err);
U_EXPORT icu::CharString
U_COMMON_API icu::CharString
ulocimp_minimizeSubtags(const char* localeID,
bool favorScript,
UErrorCode& status);
@ -357,7 +357,7 @@ ulocimp_minimizeSubtags(const char* localeID,
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
* @internal ICU 64
*/
U_EXPORT void
U_COMMON_API void
ulocimp_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
bool favorScript,
@ -405,24 +405,24 @@ ultag_isVariantSubtags(const char* s, int32_t len);
const char*
ultag_getTKeyStart(const char* localeID);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toBcpKey(std::string_view key);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toLegacyKey(std::string_view key);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toBcpType(std::string_view key, std::string_view type);
U_EXPORT std::optional<std::string_view>
U_COMMON_API std::optional<std::string_view>
ulocimp_toLegacyType(std::string_view key, std::string_view type);
/* Function for testing purpose */
U_EXPORT const char* const*
U_COMMON_API const char* const*
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
// Return true if the value is already canonicalized.
U_EXPORT bool
U_COMMON_API bool
ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
#ifdef __cplusplus

View File

@ -119,6 +119,7 @@ typedef HANDLE MemoryMap;
HANDLE map = nullptr;
HANDLE file = INVALID_HANDLE_VALUE;
DWORD fileLength = 0;
UDataMemory_init(pData); /* Clear the output struct. */
@ -159,6 +160,8 @@ typedef HANDLE MemoryMap;
return false;
}
fileLength = GetFileSize(file, nullptr);
// Note: We use nullptr/nullptr for lpAttributes parameter below.
// This means our handle cannot be inherited and we will get the default security descriptor.
/* create an unnamed Windows file-mapping object for the specified file */
@ -181,6 +184,8 @@ typedef HANDLE MemoryMap;
return false;
}
pData->map = map;
pData->length = fileLength;
return true;
}
@ -237,6 +242,7 @@ typedef HANDLE MemoryMap;
pData->map = (char *)data + length;
pData->pHeader=(const DataHeader *)data;
pData->mapAddr = data;
pData->length = length;
#if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID
// Apparently supported from Android 23 and higher:
// https://github.com/ggml-org/llama.cpp/pull/3631
@ -320,6 +326,7 @@ typedef HANDLE MemoryMap;
pData->map=p;
pData->pHeader=(const DataHeader *)p;
pData->mapAddr=p;
pData->length = fileLength;
return true;
}

View File

@ -37,31 +37,6 @@
#error U_USER_ATOMICS and U_USER_MUTEX_H are not supported
#endif
// Export an explicit template instantiation of std::atomic<int32_t>.
// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
//
// Similar story for std::atomic<std::mutex *>, and the exported UMutex class.
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
#if defined(__clang__) || defined(_MSC_VER)
#if defined(__clang__)
// Suppress the warning that the explicit instantiation after explicit specialization has no effect.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winstantiation-after-specialization"
#endif
template struct U_COMMON_API std::atomic<int32_t>;
template struct U_COMMON_API std::atomic<std::mutex *>;
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#elif defined(__GNUC__)
// For GCC this class is already exported/visible, so no need for U_COMMON_API.
template struct std::atomic<int32_t>;
template struct std::atomic<std::mutex *>;
#endif
#endif
U_NAMESPACE_BEGIN
/****************************************************************************
@ -95,11 +70,22 @@ inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
*
*************************************************************************************************/
struct U_COMMON_API UInitOnce {
struct U_COMMON_API_CLASS UInitOnce {
private:
friend U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce&);
friend U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce&);
template <typename T> friend void umtx_initOnce(UInitOnce&, T*, void (T::*)());
friend void umtx_initOnce(UInitOnce&, void (*)());
friend void umtx_initOnce(UInitOnce&, void (*)(UErrorCode&), UErrorCode&);
template <typename T> friend void umtx_initOnce(UInitOnce&, void (*)(T), T);
template <typename T> friend void umtx_initOnce(UInitOnce&, void (*)(T, UErrorCode&), T, UErrorCode&);
u_atomic_int32_t fState{0};
UErrorCode fErrCode{U_ZERO_ERROR};
void reset() {fState = 0;}
UBool isReset() {return umtx_loadAcquire(fState) == 0;}
public:
U_COMMON_API void reset() { fState = 0; }
U_COMMON_API UBool isReset() { return umtx_loadAcquire(fState) == 0; }
// Note: isReset() is used by service registration code.
// Thread safety of this usage needs review.
};
@ -216,24 +202,24 @@ template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UE
* } // myMutex is released when lock goes out of scope.
*/
class U_COMMON_API UMutex {
class U_COMMON_API_CLASS UMutex {
public:
UMUTEX_CONSTEXPR UMutex() {}
~UMutex() = default;
U_COMMON_API UMUTEX_CONSTEXPR UMutex() {}
U_COMMON_API ~UMutex() = default;
UMutex(const UMutex &other) = delete;
UMutex &operator =(const UMutex &other) = delete;
void *operator new(size_t) = delete;
U_COMMON_API UMutex(const UMutex& other) = delete;
U_COMMON_API UMutex& operator=(const UMutex& other) = delete;
U_COMMON_API void* operator new(size_t) = delete;
// requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
void lock() {
U_COMMON_API void lock() {
std::mutex *m = fMutex.load(std::memory_order_acquire);
if (m == nullptr) { m = getMutex(); }
m->lock();
}
void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
U_COMMON_API void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
static void cleanup();
U_COMMON_API static void cleanup();
private:
alignas(std::mutex) char fStorage[sizeof(std::mutex)] {};

View File

@ -58,8 +58,6 @@ U_NAMESPACE_END
U_NAMESPACE_BEGIN
class CharString;
/**
* The BreakIterator class implements methods for finding the location
* of boundaries in text. BreakIterator is an abstract base class.
@ -105,13 +103,13 @@ class CharString;
* and in the sample program icu/source/samples/break/break.cpp
*
*/
class U_COMMON_API BreakIterator : public UObject {
class U_COMMON_API_CLASS BreakIterator : public UObject {
public:
/**
* destructor
* @stable ICU 2.0
*/
virtual ~BreakIterator();
U_COMMON_API virtual ~BreakIterator();
/**
* Return true if another object is semantically equal to this
@ -126,7 +124,7 @@ public:
* object, and styles are not considered.
* @stable ICU 2.0
*/
virtual bool operator==(const BreakIterator&) const = 0;
U_COMMON_API virtual bool operator==(const BreakIterator&) const = 0;
/**
* Returns the complement of the result of operator==
@ -134,27 +132,27 @@ public:
* @return the complement of the result of operator==
* @stable ICU 2.0
*/
bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
U_COMMON_API bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
/**
* Return a polymorphic copy of this object. This is an abstract
* method which subclasses implement.
* @stable ICU 2.0
*/
virtual BreakIterator* clone() const = 0;
U_COMMON_API virtual BreakIterator* clone() const = 0;
/**
* Return a polymorphic class ID for this object. Different subclasses
* will return distinct unequal values.
* @stable ICU 2.0
*/
virtual UClassID getDynamicClassID() const override = 0;
U_COMMON_API virtual UClassID getDynamicClassID() const override = 0;
/**
* Return a CharacterIterator over the text being analyzed.
* @stable ICU 2.0
*/
virtual CharacterIterator& getText() const = 0;
U_COMMON_API virtual CharacterIterator& getText() const = 0;
/**
* Get a UText for the text being analyzed.
@ -170,7 +168,7 @@ public:
* UText was provided, it will always be returned.
* @stable ICU 3.4
*/
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
U_COMMON_API virtual UText* getUText(UText* fillIn, UErrorCode& status) const = 0;
/**
* Change the text over which this operates. The text boundary is
@ -183,7 +181,7 @@ public:
* @param text The UnicodeString used to change the text.
* @stable ICU 2.0
*/
virtual void setText(const UnicodeString &text) = 0;
U_COMMON_API virtual void setText(const UnicodeString& text) = 0;
/**
* Reset the break iterator to operate over the text represented by
@ -203,7 +201,7 @@ public:
* @param status receives any error codes.
* @stable ICU 3.4
*/
virtual void setText(UText *text, UErrorCode &status) = 0;
U_COMMON_API virtual void setText(UText* text, UErrorCode& status) = 0;
/**
* Change the text over which this operates. The text boundary is
@ -213,7 +211,7 @@ public:
* @param it The CharacterIterator used to change the text.
* @stable ICU 2.0
*/
virtual void adoptText(CharacterIterator* it) = 0;
U_COMMON_API virtual void adoptText(CharacterIterator* it) = 0;
enum {
/**
@ -229,14 +227,14 @@ public:
* @return The offset of the beginning of the text, zero.
* @stable ICU 2.0
*/
virtual int32_t first() = 0;
U_COMMON_API virtual int32_t first() = 0;
/**
* Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
* @return The index immediately BEYOND the last character in the text being scanned.
* @stable ICU 2.0
*/
virtual int32_t last() = 0;
U_COMMON_API virtual int32_t last() = 0;
/**
* Set the iterator position to the boundary preceding the current boundary.
@ -244,7 +242,7 @@ public:
* boundaries have been returned.
* @stable ICU 2.0
*/
virtual int32_t previous() = 0;
U_COMMON_API virtual int32_t previous() = 0;
/**
* Advance the iterator to the boundary following the current boundary.
@ -252,14 +250,14 @@ public:
* boundaries have been returned.
* @stable ICU 2.0
*/
virtual int32_t next() = 0;
U_COMMON_API virtual int32_t next() = 0;
/**
* Return character index of the current iterator position within the text.
* @return The boundary most recently returned.
* @stable ICU 2.0
*/
virtual int32_t current() const = 0;
U_COMMON_API virtual int32_t current() const = 0;
/**
* Advance the iterator to the first boundary following the specified offset.
@ -269,7 +267,7 @@ public:
* @return The first boundary after the specified offset.
* @stable ICU 2.0
*/
virtual int32_t following(int32_t offset) = 0;
U_COMMON_API virtual int32_t following(int32_t offset) = 0;
/**
* Set the iterator position to the first boundary preceding the specified offset.
@ -279,7 +277,7 @@ public:
* @return The first boundary before the specified offset.
* @stable ICU 2.0
*/
virtual int32_t preceding(int32_t offset) = 0;
U_COMMON_API virtual int32_t preceding(int32_t offset) = 0;
/**
* Return true if the specified position is a boundary position.
@ -289,7 +287,7 @@ public:
* @return True if "offset" is a boundary position.
* @stable ICU 2.0
*/
virtual UBool isBoundary(int32_t offset) = 0;
U_COMMON_API virtual UBool isBoundary(int32_t offset) = 0;
/**
* Set the iterator position to the nth boundary from the current boundary
@ -300,7 +298,7 @@ public:
* DONE if there are fewer than |n| boundaries in the specified direction.
* @stable ICU 2.0
*/
virtual int32_t next(int32_t n) = 0;
U_COMMON_API virtual int32_t next(int32_t n) = 0;
/**
* For RuleBasedBreakIterators, return the status tag from the break rule
@ -315,7 +313,7 @@ public:
* @see UWordBreak
* @stable ICU 52
*/
virtual int32_t getRuleStatus() const;
U_COMMON_API virtual int32_t getRuleStatus() const;
/**
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
@ -345,7 +343,9 @@ public:
* @see getRuleStatus
* @stable ICU 52
*/
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
U_COMMON_API virtual int32_t getRuleStatusVec(int32_t* fillInVec,
int32_t capacity,
UErrorCode& status);
/**
* Create BreakIterator for word-breaks using the given locale.
@ -366,7 +366,7 @@ public:
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
U_COMMON_API static BreakIterator* U_EXPORT2
createWordInstance(const Locale& where, UErrorCode& status);
/**
@ -390,7 +390,7 @@ public:
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
U_COMMON_API static BreakIterator* U_EXPORT2
createLineInstance(const Locale& where, UErrorCode& status);
/**
@ -412,7 +412,7 @@ public:
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
U_COMMON_API static BreakIterator* U_EXPORT2
createCharacterInstance(const Locale& where, UErrorCode& status);
/**
@ -433,7 +433,7 @@ public:
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
U_COMMON_API static BreakIterator* U_EXPORT2
createSentenceInstance(const Locale& where, UErrorCode& status);
#ifndef U_HIDE_DEPRECATED_API
@ -459,7 +459,7 @@ public:
* The caller owns the returned object and is responsible for deleting it.
* @deprecated ICU 64 Use createWordInstance instead.
*/
static BreakIterator* U_EXPORT2
U_COMMON_API static BreakIterator* U_EXPORT2
createTitleInstance(const Locale& where, UErrorCode& status);
#endif /* U_HIDE_DEPRECATED_API */
@ -472,7 +472,7 @@ public:
* @return available locales
* @stable ICU 2.0
*/
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
U_COMMON_API static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/**
* Get name of the object for the desired Locale, in the desired language.
@ -483,7 +483,7 @@ public:
* @return user-displayable name
* @stable ICU 2.0
*/
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
U_COMMON_API static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
const Locale& displayLocale,
UnicodeString& name);
@ -495,7 +495,7 @@ public:
* @return user-displayable name
* @stable ICU 2.0
*/
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
U_COMMON_API static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
UnicodeString& name);
#ifndef U_FORCE_HIDE_DEPRECATED_API
@ -518,7 +518,7 @@ public:
*
* @deprecated ICU 52. Use clone() instead.
*/
virtual BreakIterator * createBufferClone(void *stackBuffer,
U_COMMON_API virtual BreakIterator* createBufferClone(void* stackBuffer,
int32_t& BufferSize,
UErrorCode& status) = 0;
#endif // U_FORCE_HIDE_DEPRECATED_API
@ -531,7 +531,7 @@ public:
* must be closed by an explicit call to the destructor (not delete).
* @deprecated ICU 52. Always delete the BreakIterator.
*/
inline UBool isBufferClone();
U_COMMON_API inline UBool isBufferClone();
#endif /* U_HIDE_DEPRECATED_API */
@ -551,7 +551,7 @@ public:
* @return a registry key that can be used to unregister this instance
* @stable ICU 2.4
*/
static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
U_COMMON_API static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
const Locale& locale,
UBreakIteratorType kind,
UErrorCode& status);
@ -568,7 +568,7 @@ public:
* @return true if the iterator for the key was successfully unregistered
* @stable ICU 2.4
*/
static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
U_COMMON_API static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
/**
* Return a StringEnumeration over the locales available at the time of the call,
@ -576,7 +576,7 @@ public:
* @return a StringEnumeration over the locales available at the time of the call
* @stable ICU 2.4
*/
static StringEnumeration* U_EXPORT2 getAvailableLocales();
U_COMMON_API static StringEnumeration* U_EXPORT2 getAvailableLocales();
#endif
/**
@ -584,7 +584,7 @@ public:
* actual locale.
* @stable ICU 2.8
*/
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
U_COMMON_API Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
#ifndef U_HIDE_INTERNAL_API
/** Get the locale for this break iterator object. You can choose between valid and actual locale.
@ -593,7 +593,7 @@ public:
* @return the locale
* @internal
*/
const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
U_COMMON_API const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
#endif /* U_HIDE_INTERNAL_API */
/**
@ -621,7 +621,7 @@ public:
*
* @stable ICU 49
*/
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
U_COMMON_API virtual BreakIterator& refreshInputText(UText* input, UErrorCode& status) = 0;
private:
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
@ -635,22 +635,21 @@ protected:
// Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
// or else the compiler will create a public ones.
/** @internal */
BreakIterator();
U_COMMON_API BreakIterator();
/** @internal */
BreakIterator (const BreakIterator &other);
U_COMMON_API BreakIterator(const BreakIterator& other);
#ifndef U_HIDE_INTERNAL_API
/** @internal */
BreakIterator (const Locale& valid, const Locale &actual);
U_COMMON_API BreakIterator(const Locale& valid, const Locale& actual);
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
BreakIterator &operator = (const BreakIterator &other);
U_COMMON_API BreakIterator& operator=(const BreakIterator& other);
#endif /* U_HIDE_INTERNAL_API */
private:
/** @internal (private) */
CharString* actualLocale = nullptr;
CharString* validLocale = nullptr;
CharString* requestLocale = nullptr;
Locale actualLocale;
Locale validLocale;
Locale requestLocale;
};
#ifndef U_HIDE_DEPRECATED_API

View File

@ -41,6 +41,8 @@
#if U_SHOW_CPLUSPLUS_API
#include <type_traits>
#include "unicode/uobject.h"
#include "unicode/std_string.h"
@ -258,13 +260,36 @@ private:
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
};
namespace prv {
/** @internal */
template<typename StringClass, typename = void>
struct value_type_or_char {
/** @internal */
using type = char;
};
/** @internal */
template<typename StringClass>
struct value_type_or_char<StringClass, std::void_t<typename StringClass::value_type>> {
/** @internal */
using type = typename StringClass::value_type;
};
/** @internal */
template<typename StringClass>
using value_type_or_char_t = typename value_type_or_char<StringClass>::type;
}
/**
* Implementation of ByteSink that writes to a "string".
* The StringClass is usually instantiated with a std::string.
* The StringClass is usually instantiated with a std::string or a std::u8string.
* StringClass must have public member functions reserve(integer type), capacity(), length(), and
* append(value type, integer type) with the same semantics as those of std::basic_string, and must
* have an 8-bit value type. If the value type is not char, it must be a public member type
* StringClass::value_type.
* @stable ICU 4.2
*/
template<typename StringClass>
class StringByteSink : public ByteSink {
using Unit = typename prv::value_type_or_char_t<StringClass>;
public:
/**
* Constructs a ByteSink that will append bytes to the dest string.
@ -291,7 +316,13 @@ class StringByteSink : public ByteSink {
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* data, int32_t n) override { dest_->append(data, n); }
virtual void Append(const char* data, int32_t n) override {
if constexpr (std::is_same_v<Unit, char>) {
dest_->append(data, n);
} else {
dest_->append(reinterpret_cast<const Unit*>(data), n);
}
}
private:
StringClass* dest_;

View File

@ -150,9 +150,6 @@ private:
virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; }
virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; }
/**
* @internal (private)
*/
class BTLinearMatchNode : public LinearMatchNode {
public:
BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);

View File

@ -156,13 +156,11 @@ private:
/**
* Copy constructor. Private for now.
* @internal (private)
*/
CanonicalIterator(const CanonicalIterator& other) = delete;
/**
* Assignment operator. Private for now.
* @internal (private)
*/
CanonicalIterator& operator=(const CanonicalIterator& other) = delete;

View File

@ -75,7 +75,8 @@
* <tr>
* <td>Strings and Character Iteration</td>
* <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
* <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
* <td>icu::UnicodeString, utfiterator.h (ICU 78+), icu::CharacterIterator, icu::Appendable,<br>
* icu::StringPiece, icu::ByteSink</td>
* </tr>
* <tr>
* <td>Unicode Character<br/>Properties and Names</td>
@ -160,7 +161,8 @@
* <tr>
* <td>Number Formatting<br/>(includes currency and unit formatting)</td>
* <td>unumberformatter.h, unum.h, usimplenumberformatter.h</td>
* <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)<br>icu::number::SimpleNumberFormatter (ICU 73+)</td>
* <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)<br>
* icu::number::SimpleNumberFormatter (ICU 73+)</td>
* </tr>
* <tr>
* <td>Number Range Formatting<br />(includes currency and unit ranges)</td>

View File

@ -18,7 +18,7 @@
*/
U_NAMESPACE_BEGIN
class CharString;
class FixedString;
/**
* <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
@ -297,7 +297,7 @@ private:
char language_[9];
char script_[5];
char region_[4];
CharString *variant_; // Pointer not object so we need not #include internal charstr.h.
FixedString *variant_; // Pointer not object so we need not #include internal fixedstring.h.
icu::Locale *extensions_; // Pointer not object. Storage for all other fields.
};

View File

@ -70,9 +70,7 @@ public:
// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) = delete;
static void* U_EXPORT2 operator new[](size_t) = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) = delete;
#endif
/**
* Constructor takes ownership.

View File

@ -35,6 +35,9 @@
#if U_SHOW_CPLUSPLUS_API
#include <cstdint>
#include <string_view>
#include "unicode/bytestream.h"
#include "unicode/localpointer.h"
#include "unicode/strenum.h"
@ -192,53 +195,53 @@ class UnicodeString;
* @stable ICU 2.0
* @see ResourceBundle
*/
class U_COMMON_API Locale : public UObject {
class U_COMMON_API_CLASS Locale : public UObject {
public:
/** Useful constant for the Root locale. @stable ICU 4.4 */
static const Locale& U_EXPORT2 getRoot();
U_COMMON_API static const Locale& U_EXPORT2 getRoot();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getEnglish();
U_COMMON_API static const Locale& U_EXPORT2 getEnglish();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getFrench();
U_COMMON_API static const Locale& U_EXPORT2 getFrench();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getGerman();
U_COMMON_API static const Locale& U_EXPORT2 getGerman();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getItalian();
U_COMMON_API static const Locale& U_EXPORT2 getItalian();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getJapanese();
U_COMMON_API static const Locale& U_EXPORT2 getJapanese();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getKorean();
U_COMMON_API static const Locale& U_EXPORT2 getKorean();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getChinese();
U_COMMON_API static const Locale& U_EXPORT2 getChinese();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getSimplifiedChinese();
U_COMMON_API static const Locale& U_EXPORT2 getSimplifiedChinese();
/** Useful constant for this language. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getTraditionalChinese();
U_COMMON_API static const Locale& U_EXPORT2 getTraditionalChinese();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getFrance();
U_COMMON_API static const Locale& U_EXPORT2 getFrance();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getGermany();
U_COMMON_API static const Locale& U_EXPORT2 getGermany();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getItaly();
U_COMMON_API static const Locale& U_EXPORT2 getItaly();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getJapan();
U_COMMON_API static const Locale& U_EXPORT2 getJapan();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getKorea();
U_COMMON_API static const Locale& U_EXPORT2 getKorea();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getChina();
U_COMMON_API static const Locale& U_EXPORT2 getChina();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getPRC();
U_COMMON_API static const Locale& U_EXPORT2 getPRC();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getTaiwan();
U_COMMON_API static const Locale& U_EXPORT2 getTaiwan();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getUK();
U_COMMON_API static const Locale& U_EXPORT2 getUK();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getUS();
U_COMMON_API static const Locale& U_EXPORT2 getUS();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getCanada();
U_COMMON_API static const Locale& U_EXPORT2 getCanada();
/** Useful constant for this country/region. @stable ICU 2.0 */
static const Locale& U_EXPORT2 getCanadaFrench();
U_COMMON_API static const Locale& U_EXPORT2 getCanadaFrench();
/**
* Construct a default locale object, a Locale for the default locale ID.
@ -247,7 +250,7 @@ public:
* @see uloc_getDefault
* @stable ICU 2.0
*/
Locale();
U_COMMON_API Locale();
/**
* Construct a locale from language, country, variant.
@ -273,7 +276,7 @@ public:
* @see uloc_getDefault
* @stable ICU 2.0
*/
Locale(const char* language,
U_COMMON_API Locale(const char* language,
const char* country = nullptr,
const char* variant = nullptr,
const char* keywordsAndValues = nullptr);
@ -284,7 +287,7 @@ public:
* @param other The Locale object being copied in.
* @stable ICU 2.0
*/
Locale(const Locale& other);
U_COMMON_API Locale(const Locale& other);
/**
* Move constructor; might leave source in bogus state.
@ -293,13 +296,13 @@ public:
* @param other The Locale object being moved in.
* @stable ICU 63
*/
Locale(Locale&& other) noexcept;
U_COMMON_API Locale(Locale&& other) noexcept;
/**
* Destructor
* @stable ICU 2.0
*/
virtual ~Locale() ;
U_COMMON_API virtual ~Locale();
/**
* Replaces the entire contents of *this with the specified value.
@ -308,7 +311,7 @@ public:
* @return *this
* @stable ICU 2.0
*/
Locale& operator=(const Locale& other);
U_COMMON_API Locale& operator=(const Locale& other);
/**
* Move assignment operator; might leave source in bogus state.
@ -319,7 +322,7 @@ public:
* @return *this
* @stable ICU 63
*/
Locale& operator=(Locale&& other) noexcept;
U_COMMON_API Locale& operator=(Locale&& other) noexcept;
/**
* Checks if two locale keys are the same.
@ -328,7 +331,7 @@ public:
* @return true if the two locale keys are the same, false otherwise.
* @stable ICU 2.0
*/
bool operator==(const Locale& other) const;
U_COMMON_API bool operator==(const Locale& other) const;
/**
* Checks if two locale keys are not the same.
@ -338,7 +341,7 @@ public:
* otherwise.
* @stable ICU 2.0
*/
inline bool operator!=(const Locale& other) const;
U_COMMON_API inline bool operator!=(const Locale& other) const;
/**
* Clone this object.
@ -351,7 +354,7 @@ public:
* @see getDynamicClassID
* @stable ICU 2.8
*/
Locale *clone() const;
U_COMMON_API Locale* clone() const;
#ifndef U_HIDE_SYSTEM_API
/**
@ -369,7 +372,7 @@ public:
* @system
* @stable ICU 2.0
*/
static const Locale& U_EXPORT2 getDefault();
U_COMMON_API static const Locale& U_EXPORT2 getDefault();
/**
* Sets the default. Normally set once at the beginning of a process,
@ -383,8 +386,7 @@ public:
* @system
* @stable ICU 2.0
*/
static void U_EXPORT2 setDefault(const Locale& newLocale,
UErrorCode& success);
U_COMMON_API static void U_EXPORT2 setDefault(const Locale& newLocale, UErrorCode& success);
#endif /* U_HIDE_SYSTEM_API */
/**
@ -408,7 +410,7 @@ public:
* @return the Locale for the specified BCP47 language tag.
* @stable ICU 63
*/
static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
U_COMMON_API static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
/**
* Returns a well-formed language tag for this Locale.
@ -423,7 +425,7 @@ public:
* @param status error information if creating the language tag failed.
* @stable ICU 63
*/
void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
U_COMMON_API void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
/**
* Returns a well-formed language tag for this Locale.
@ -447,11 +449,11 @@ public:
* @stable ICU 2.0
* @see uloc_getName
*/
static Locale U_EXPORT2 createFromName(const char *name);
U_COMMON_API static Locale U_EXPORT2 createFromName(const char* name);
#ifndef U_HIDE_INTERNAL_API
/** @internal */
static Locale U_EXPORT2 createFromName(StringPiece name);
U_COMMON_API static Locale U_EXPORT2 createFromName(StringPiece name);
#endif /* U_HIDE_INTERNAL_API */
/**
@ -462,14 +464,14 @@ public:
* @stable ICU 3.0
* @see uloc_canonicalize
*/
static Locale U_EXPORT2 createCanonical(const char* name);
U_COMMON_API static Locale U_EXPORT2 createCanonical(const char* name);
/**
* Returns the locale's ISO-639 language code.
* @return An alias to the code
* @stable ICU 2.0
*/
inline const char * getLanguage( ) const;
U_COMMON_API const char* getLanguage() const;
/**
* Returns the locale's ISO-15924 abbreviation script code.
@ -478,21 +480,21 @@ public:
* @see uscript_getCode
* @stable ICU 2.8
*/
inline const char * getScript( ) const;
U_COMMON_API const char* getScript() const;
/**
* Returns the locale's ISO-3166 country code.
* @return An alias to the code
* @stable ICU 2.0
*/
inline const char * getCountry( ) const;
U_COMMON_API const char* getCountry() const;
/**
* Returns the locale's variant code.
* @return An alias to the code
* @stable ICU 2.0
*/
inline const char * getVariant( ) const;
U_COMMON_API const char* getVariant() const;
/**
* Returns the programmatic name of the entire locale, with the language,
@ -502,7 +504,7 @@ public:
* @return A pointer to "name".
* @stable ICU 2.0
*/
inline const char * getName() const;
U_COMMON_API const char* getName() const;
/**
* Returns the programmatic name of the entire locale as getName() would return,
@ -511,7 +513,7 @@ public:
* @see getName
* @stable ICU 2.8
*/
const char * getBaseName() const;
U_COMMON_API const char* getBaseName() const;
/**
* Add the likely subtags for this Locale, per the algorithm described
@ -542,7 +544,7 @@ public:
* U_ILLEGAL_ARGUMENT_ERROR.
* @stable ICU 63
*/
void addLikelySubtags(UErrorCode& status);
U_COMMON_API void addLikelySubtags(UErrorCode& status);
/**
* Minimize the subtags for this Locale, per the algorithm described
@ -573,7 +575,7 @@ public:
* U_ILLEGAL_ARGUMENT_ERROR.
* @stable ICU 63
*/
void minimizeSubtags(UErrorCode& status);
U_COMMON_API void minimizeSubtags(UErrorCode& status);
/**
* Canonicalize the locale ID of this object according to CLDR.
@ -581,7 +583,7 @@ public:
* @stable ICU 67
* @see createCanonical
*/
void canonicalize(UErrorCode& status);
U_COMMON_API void canonicalize(UErrorCode& status);
/**
* Gets the list of keywords for the specified locale.
@ -592,7 +594,7 @@ public:
* @see getKeywords
* @stable ICU 2.8
*/
StringEnumeration * createKeywords(UErrorCode &status) const;
U_COMMON_API StringEnumeration* createKeywords(UErrorCode& status) const;
/**
* Gets the list of Unicode keywords for the specified locale.
@ -603,7 +605,7 @@ public:
* @see getUnicodeKeywords
* @stable ICU 63
*/
StringEnumeration * createUnicodeKeywords(UErrorCode &status) const;
U_COMMON_API StringEnumeration* createUnicodeKeywords(UErrorCode& status) const;
/**
* Gets the set of keywords for this Locale.
@ -649,7 +651,10 @@ public:
*
* @stable ICU 2.8
*/
int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
U_COMMON_API int32_t getKeywordValue(const char* keywordName,
char* buffer,
int32_t bufferCapacity,
UErrorCode& status) const;
/**
* Gets the value for a keyword.
@ -664,7 +669,7 @@ public:
* @param status error information if getting the value failed.
* @stable ICU 63
*/
void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
U_COMMON_API void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
/**
* Gets the value for a keyword.
@ -695,7 +700,9 @@ public:
* @param status error information if getting the value failed.
* @stable ICU 63
*/
void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
U_COMMON_API void getUnicodeKeywordValue(StringPiece keywordName,
ByteSink& sink,
UErrorCode& status) const;
/**
* Gets the Unicode value for a Unicode keyword.
@ -732,7 +739,9 @@ public:
*
* @stable ICU 49
*/
void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) {
U_COMMON_API void setKeywordValue(const char* keywordName,
const char* keywordValue,
UErrorCode& status) {
setKeywordValue(StringPiece{keywordName}, StringPiece{keywordValue}, status);
}
@ -754,7 +763,9 @@ public:
* @param status Returns any error information while performing this operation.
* @stable ICU 63
*/
void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
U_COMMON_API void setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status);
/**
* Sets or removes the Unicode value for a Unicode keyword.
@ -774,7 +785,9 @@ public:
* @param status Returns any error information while performing this operation.
* @stable ICU 63
*/
void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
U_COMMON_API void setUnicodeKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status);
/**
* returns the locale's three-letter language code, as specified
@ -782,14 +795,14 @@ public:
* @return An alias to the code, or an empty string
* @stable ICU 2.0
*/
const char * getISO3Language() const;
U_COMMON_API const char* getISO3Language() const;
/**
* Fills in "name" with the locale's three-letter ISO-3166 country code.
* @return An alias to the code, or an empty string
* @stable ICU 2.0
*/
const char * getISO3Country() const;
U_COMMON_API const char* getISO3Country() const;
/**
* Returns the Windows LCID value corresponding to this locale.
@ -798,7 +811,7 @@ public:
* there is no Windows LCID value that corresponds to this locale, returns 0.
* @stable ICU 2.0
*/
uint32_t getLCID() const;
U_COMMON_API uint32_t getLCID() const;
/**
* Returns whether this locale's script is written right-to-left.
@ -813,7 +826,7 @@ public:
* @return true if the locale's script is written right-to-left
* @stable ICU 54
*/
UBool isRightToLeft() const;
U_COMMON_API UBool isRightToLeft() const;
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
@ -824,7 +837,7 @@ public:
* @return A reference to "dispLang".
* @stable ICU 2.0
*/
UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
U_COMMON_API UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
@ -839,7 +852,7 @@ public:
* @return A reference to "dispLang".
* @stable ICU 2.0
*/
UnicodeString& getDisplayLanguage( const Locale& displayLocale,
U_COMMON_API UnicodeString& getDisplayLanguage(const Locale& displayLocale,
UnicodeString& dispLang) const;
/**
@ -851,7 +864,7 @@ public:
* @return A reference to "dispScript".
* @stable ICU 2.8
*/
UnicodeString& getDisplayScript( UnicodeString& dispScript) const;
U_COMMON_API UnicodeString& getDisplayScript(UnicodeString& dispScript) const;
/**
* Fills in "dispScript" with the name of this locale's country in a format suitable
@ -867,7 +880,7 @@ public:
* @return A reference to "dispScript".
* @stable ICU 2.8
*/
UnicodeString& getDisplayScript( const Locale& displayLocale,
U_COMMON_API UnicodeString& getDisplayScript(const Locale& displayLocale,
UnicodeString& dispScript) const;
/**
@ -879,7 +892,7 @@ public:
* @return A reference to "dispCountry".
* @stable ICU 2.0
*/
UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
U_COMMON_API UnicodeString& getDisplayCountry(UnicodeString& dispCountry) const;
/**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
@ -895,7 +908,7 @@ public:
* @return A reference to "dispCountry".
* @stable ICU 2.0
*/
UnicodeString& getDisplayCountry( const Locale& displayLocale,
U_COMMON_API UnicodeString& getDisplayCountry(const Locale& displayLocale,
UnicodeString& dispCountry) const;
/**
@ -905,7 +918,7 @@ public:
* @return A reference to "dispVar".
* @stable ICU 2.0
*/
UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
U_COMMON_API UnicodeString& getDisplayVariant(UnicodeString& dispVar) const;
/**
* Fills in "dispVar" with the name of this locale's variant code in a format
@ -915,7 +928,7 @@ public:
* @return A reference to "dispVar".
* @stable ICU 2.0
*/
UnicodeString& getDisplayVariant( const Locale& displayLocale,
U_COMMON_API UnicodeString& getDisplayVariant(const Locale& displayLocale,
UnicodeString& dispVar) const;
/**
@ -929,7 +942,7 @@ public:
* @return A reference to "name".
* @stable ICU 2.0
*/
UnicodeString& getDisplayName( UnicodeString& name) const;
U_COMMON_API UnicodeString& getDisplayName(UnicodeString& name) const;
/**
* Fills in "name" with the name of this locale in a format suitable for user display
@ -943,14 +956,13 @@ public:
* @return A reference to "name".
* @stable ICU 2.0
*/
UnicodeString& getDisplayName( const Locale& displayLocale,
UnicodeString& name) const;
U_COMMON_API UnicodeString& getDisplayName(const Locale& displayLocale, UnicodeString& name) const;
/**
* Generates a hash code for the locale.
* @stable ICU 2.0
*/
int32_t hashCode() const;
U_COMMON_API int32_t hashCode() const;
/**
* Sets the locale to bogus
@ -960,14 +972,14 @@ public:
* instantiated from a locale and from a rule set).
* @stable ICU 2.1
*/
void setToBogus();
U_COMMON_API void setToBogus();
/**
* Gets the bogus state. Locale object can be bogus if it doesn't exist
* @return false if it is a real locale, true if it is a bogus locale
* @stable ICU 2.1
*/
inline UBool isBogus() const;
U_COMMON_API inline UBool isBogus() const;
/**
* Returns a list of all installed locales.
@ -977,7 +989,7 @@ public:
* get ownership of this list, and must NOT delete it.
* @stable ICU 2.0
*/
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
U_COMMON_API static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/**
* Gets a list of all available 2-letter country codes defined in ISO 3166. This is a
@ -987,7 +999,7 @@ public:
* @return a list of all available country codes
* @stable ICU 2.0
*/
static const char* const* U_EXPORT2 getISOCountries();
U_COMMON_API static const char* const* U_EXPORT2 getISOCountries();
/**
* Returns a list of all unique language codes defined in ISO 639.
@ -1000,21 +1012,21 @@ public:
* @return a list of all available language codes
* @stable ICU 2.0
*/
static const char* const* U_EXPORT2 getISOLanguages();
U_COMMON_API static const char* const* U_EXPORT2 getISOLanguages();
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
* @stable ICU 2.2
*/
static UClassID U_EXPORT2 getStaticClassID();
U_COMMON_API static UClassID U_EXPORT2 getStaticClassID();
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
* @stable ICU 2.2
*/
virtual UClassID getDynamicClassID() const override;
U_COMMON_API virtual UClassID getDynamicClassID() const override;
/**
* A Locale iterator interface similar to a Java Iterator<Locale>.
@ -1118,7 +1130,7 @@ protected: /* only protected for testing purposes. DO NOT USE. */
* Set this from a single POSIX style locale string.
* @internal
*/
void setFromPOSIXID(const char *posixID);
U_COMMON_API void setFromPOSIXID(const char* posixID);
/**
* Minimize the subtags for this Locale, per the algorithm described
* @param favorScript favor to keep script if true, to keep region if false.
@ -1127,7 +1139,7 @@ protected: /* only protected for testing purposes. DO NOT USE. */
* U_ILLEGAL_ARGUMENT_ERROR.
* @internal
*/
void minimizeSubtags(bool favorScript, UErrorCode& status);
U_COMMON_API void minimizeSubtags(bool favorScript, UErrorCode& status);
#endif /* U_HIDE_INTERNAL_API */
private:
@ -1139,7 +1151,6 @@ private:
* @param canonicalize whether to call uloc_canonicalize on cLocaleID
*/
Locale& init(const char* localeID, UBool canonicalize);
/** @internal */
Locale& init(StringPiece localeID, UBool canonicalize);
/*
@ -1147,8 +1158,10 @@ private:
* NO side effects. (Default constructor tries to get
* the default locale.)
*/
enum ELocaleType {
eBOGUS
enum ELocaleType : uint8_t {
eBOGUS,
eNEST,
eHEAP,
};
Locale(ELocaleType);
@ -1157,33 +1170,158 @@ private:
*/
static Locale* getLocaleCache();
union Payload;
struct Nest;
struct Heap;
/**
* Locale data that can be nested directly within the union Payload object.
*/
struct Nest {
static constexpr size_t SIZE = 32;
ELocaleType type = eNEST;
char language[4];
char script[5];
char region[4];
uint8_t variantBegin;
char baseName[SIZE -
sizeof type -
sizeof language -
sizeof script -
sizeof region -
sizeof variantBegin];
const char* getLanguage() const { return language; }
const char* getScript() const { return script; }
const char* getRegion() const { return region; }
const char* getVariant() const { return variantBegin == 0 ? "" : getBaseName() + variantBegin; }
const char* getBaseName() const { return baseName; }
// Doesn't inherit from UMemory, shouldn't be heap allocated.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
Nest() : language{'\0'}, script{'\0'}, region{'\0'}, variantBegin{0}, baseName{'\0'} {}
void init(std::string_view language,
std::string_view script,
std::string_view region,
uint8_t variantBegin);
static bool fits(int32_t length,
std::string_view language,
std::string_view script,
std::string_view region) {
return length < static_cast<int32_t>(sizeof Nest::baseName) &&
language.size() < sizeof Nest::language &&
script.size() < sizeof Nest::script &&
region.size() < sizeof Nest::region;
}
private:
friend union Payload;
Nest(Heap&& heap, uint8_t variantBegin);
};
static_assert(sizeof(Nest) == Nest::SIZE);
/**
* Locale data that needs to be heap allocated in the union Payload object.
*/
struct Heap {
struct Alloc;
ELocaleType type;
char language[ULOC_LANG_CAPACITY];
char script[ULOC_SCRIPT_CAPACITY];
char country[ULOC_COUNTRY_CAPACITY];
int32_t variantBegin;
char* fullName;
char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
// name without keywords
char* baseName;
void initBaseName(UErrorCode& status);
char region[ULOC_COUNTRY_CAPACITY];
Alloc* ptr;
UBool fIsBogus;
const char* getLanguage() const { return language; }
const char* getScript() const { return script; }
const char* getRegion() const { return region; }
const char* getVariant() const;
const char* getFullName() const;
const char* getBaseName() const;
// Doesn't inherit from UMemory, shouldn't be heap allocated.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
Heap(std::string_view language,
std::string_view script,
std::string_view region,
int32_t variantBegin);
~Heap();
Heap& operator=(const Heap& other);
Heap& operator=(Heap&& other) noexcept;
};
static_assert(sizeof(Heap) <= sizeof(Nest));
/**
* This is kind of std::variant but customized to not waste any space on the
* discriminator or on any padding, and to copy any heap allocated object.
*/
union Payload {
private:
Nest nest;
Heap heap;
ELocaleType type;
void copy(const Payload& other);
void move(Payload&& other) noexcept;
public:
// Doesn't inherit from UMemory, shouldn't be heap allocated.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
Payload() : type{eBOGUS} {}
~Payload();
Payload(const Payload& other);
Payload(Payload&& other) noexcept;
Payload& operator=(const Payload& other);
Payload& operator=(Payload&& other) noexcept;
void setToBogus();
bool isBogus() const { return type == eBOGUS; }
template <typename T, typename... Args> T& emplace(Args&&... args);
template <typename T> T* get();
template <typename BogusFn, typename NestFn, typename HeapFn, typename... Args>
auto visit(BogusFn bogusFn, NestFn nestFn, HeapFn heapFn, Args... args) const;
} payload;
/**
* Call a field getter function on either Nest or Heap in payload.
* (This is kind of std::visit but simpler and without exceptions.)
*
* @tparam NEST Pointer to the Nest getter function.
* @tparam HEAP Pointer to the Heap getter function.
* @return the result from the getter, or the empty string if isBogus().
*/
template <const char* (Nest::*const NEST)() const,
const char* (Heap::*const HEAP)() const>
const char* getField() const;
static const Locale &getLocale(int locid);
/**
* A friend to allow the default locale to be set by either the C or C++ API.
* @internal (private)
*/
friend Locale *locale_set_default_internal(const char *, UErrorCode& status);
/**
* @internal (private)
*/
friend void U_CALLCONV locale_available_init();
};
inline bool
U_COMMON_API inline bool
Locale::operator!=(const Locale& other) const
{
return !operator==(other);
@ -1199,36 +1337,6 @@ Locale::toLanguageTag(UErrorCode& status) const
return result;
}
inline const char *
Locale::getCountry() const
{
return country;
}
inline const char *
Locale::getLanguage() const
{
return language;
}
inline const char *
Locale::getScript() const
{
return script;
}
inline const char *
Locale::getVariant() const
{
return fIsBogus ? "" : &baseName[variantBegin];
}
inline const char *
Locale::getName() const
{
return fullName;
}
template<typename StringClass, typename OutputIterator> inline void
Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
{
@ -1285,9 +1393,9 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) cons
return result;
}
inline UBool
U_COMMON_API inline UBool
Locale::isBogus() const {
return fIsBogus;
return payload.isBogus();
}
U_NAMESPACE_END

View File

@ -368,19 +368,6 @@
# define U_IS_BIG_ENDIAN 0
#endif
/**
* \def U_HAVE_PLACEMENT_NEW
* Determines whether to override placement new and delete for STL.
* @stable ICU 2.6
*/
#ifdef U_HAVE_PLACEMENT_NEW
/* Use the predefined value. */
#elif defined(__BORLANDC__)
# define U_HAVE_PLACEMENT_NEW 0
#else
# define U_HAVE_PLACEMENT_NEW 1
#endif
/**
* \def U_HAVE_DEBUG_LOCATION_NEW
* Define this to define the MFC debug version of the operator new.
@ -479,6 +466,12 @@
/* Otherwise use the predefined value. */
#elif !defined(__cplusplus)
# define U_CPLUSPLUS_VERSION 0
// The value of _MSVC_LANG for C++23 preview is undocumented, except that it is larger than 202002.
// As of this writing, it is 202004.
#elif __cplusplus >= 202302L || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L)
# define U_CPLUSPLUS_VERSION 23
#elif __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
# define U_CPLUSPLUS_VERSION 20
#elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
# define U_CPLUSPLUS_VERSION 17
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
@ -493,12 +486,10 @@
/**
* \def U_FALLTHROUGH
* Annotate intentional fall-through between switch labels.
* http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
* https://clang.llvm.org/docs/AttributeReference.html#fallthrough
* @internal
*/
#ifndef __cplusplus
// Not for C.
#elif defined(U_FALLTHROUGH)
#if defined(U_FALLTHROUGH)
// Use the predefined value.
#elif defined(__clang__)
// Test for compiler vs. feature separately.

View File

@ -122,7 +122,6 @@ class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
private:
/**
* The UText through which this BreakIterator accesses the text
* @internal (private)
*/
UText fText = UTEXT_INITIALIZER;
@ -172,7 +171,6 @@ private:
* If present, UStack of LanguageBreakEngine objects that might handle
* dictionary characters. Searched from top to bottom to find an object to
* handle a given character.
* @internal (private)
*/
UStack *fLanguageBreakEngines = nullptr;
@ -181,14 +179,12 @@ private:
* If present, the special LanguageBreakEngine used for handling
* characters that are in the dictionary set, but not handled by any
* LanguageBreakEngine.
* @internal (private)
*/
UnhandledEngine *fUnhandledBreakEngine = nullptr;
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
* @internal (private)
*/
uint32_t fDictionaryCharCount = 0;
@ -233,7 +229,6 @@ private:
*
* The break iterator adopts the memory, and will
* free it when done.
* @internal (private)
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
@ -248,20 +243,16 @@ private:
* @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
* @see udata_open
* @see #getBinaryRules
* @internal (private)
*/
RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
/** @internal */
friend class RBBIRuleBuilder;
/** @internal */
friend class BreakIterator;
/**
* Default constructor with an error code parameter.
* Aside from error handling, otherwise identical to the default constructor.
* Internally, handles common initialization for other constructors.
* @internal (private)
*/
RuleBasedBreakIterator(UErrorCode *status);
@ -732,7 +723,6 @@ private:
* will operate correctly. A Safe Position is not necessarily a boundary itself.
*
* @param fromPosition the position in the input text to begin the iteration.
* @internal (private)
*/
int32_t handleSafePrevious(int32_t fromPosition);
@ -745,8 +735,6 @@ private:
* fDictionaryCharCount the number of dictionary characters encountered.
* If > 0, the segment will be further subdivided
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
*
* @internal (private)
*/
int32_t handleNext();
@ -778,7 +766,6 @@ private:
* given character c.
* @param c A character in the dictionary set
* @param locale The locale.
* @internal (private)
*/
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale);

View File

@ -333,7 +333,7 @@ class U_COMMON_API StringPiece : public UMemory {
* @return true if the string data is equal
* @stable ICU 4.8
*/
U_EXPORT UBool U_EXPORT2
U_COMMON_API UBool U_EXPORT2
operator==(const StringPiece& x, const StringPiece& y);
/**

View File

@ -61,7 +61,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
#define U_UNICODE_VERSION "16.0"
#define U_UNICODE_VERSION "17.0"
/**
* \file
@ -552,14 +552,12 @@ typedef enum UProperty {
* @stable ICU 74
*/
UCHAR_ID_COMPAT_MATH_CONTINUE=74,
#ifndef U_HIDE_DRAFT_API
/**
* Binary property Modifier_Combining_Mark.
* Used by the AMTRA algorithm in UAX #53.
* @draft ICU 76
* @stable ICU 76
*/
UCHAR_MODIFIER_COMBINING_MARK=75,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
@ -682,14 +680,12 @@ typedef enum UProperty {
* @stable ICU 75
*/
UCHAR_IDENTIFIER_STATUS=0x1019,
#ifndef U_HIDE_DRAFT_API
/**
* Enumerated property Indic_Conjunct_Break.
* Used in the grapheme cluster break algorithm in UAX #29.
* @draft ICU 76
* @stable ICU 76
*/
UCHAR_INDIC_CONJUNCT_BREAK=0x101A,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for enumerated/integer Unicode properties.
@ -1985,6 +1981,25 @@ enum UBlockCode {
/** @stable ICU 76 */
UBLOCK_TULU_TIGALARI = 338, /*[11380]*/
// New blocks in Unicode 17.0.0
/** @stable ICU 78 */
UBLOCK_BERIA_ERFE = 339, /*[16EA0]*/
/** @stable ICU 78 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J = 340, /*[323B0]*/
/** @stable ICU 78 */
UBLOCK_MISCELLANEOUS_SYMBOLS_SUPPLEMENT = 341, /*[1CEC0]*/
/** @stable ICU 78 */
UBLOCK_SHARADA_SUPPLEMENT = 342, /*[11B60]*/
/** @stable ICU 78 */
UBLOCK_SIDETIC = 343, /*[10940]*/
/** @stable ICU 78 */
UBLOCK_TAI_YO = 344, /*[1E6C0]*/
/** @stable ICU 78 */
UBLOCK_TANGUT_COMPONENTS_SUPPLEMENT = 345, /*[18D80]*/
/** @stable ICU 78 */
UBLOCK_TOLONG_SIKI = 346, /*[11DB0]*/
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBlockCode value.
@ -1992,7 +2007,7 @@ enum UBlockCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UBLOCK_COUNT = 339,
UBLOCK_COUNT = 347,
#endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */
@ -2289,6 +2304,8 @@ typedef enum UJoiningGroup {
U_JG_KASHMIRI_YEH, /**< @stable ICU 76 */
U_JG_THIN_NOON, /**< @stable ICU 78 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UJoiningGroup value.
@ -2536,6 +2553,8 @@ typedef enum ULineBreak {
U_LB_VIRAMA_FINAL = 46, /*[VF]*/
/** @stable ICU 74 */
U_LB_VIRAMA = 47, /*[VI]*/
/** @stable ICU 78 */
U_LB_UNAMBIGUOUS_HYPHEN = 48,/*[HH]*/
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal ULineBreak value.
@ -2543,7 +2562,7 @@ typedef enum ULineBreak {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_LB_COUNT = 48
U_LB_COUNT = 49
#endif // U_HIDE_DEPRECATED_API
} ULineBreak;
@ -2741,12 +2760,11 @@ typedef enum UIndicSyllabicCategory {
U_INSC_REORDERING_KILLER,
} UIndicSyllabicCategory;
#ifndef U_HIDE_DRAFT_API
/**
* Indic Conjunct Break constants.
*
* @see UCHAR_INDIC_CONJUNCT_BREAK
* @draft ICU 76
* @stable ICU 76
*/
typedef enum UIndicConjunctBreak {
/*
@ -2755,16 +2773,15 @@ typedef enum UIndicConjunctBreak {
* U_INCB_<Unicode Indic_Conjunct_Break value name>
*/
/** @draft ICU 76 */
/** @stable ICU 76 */
U_INCB_NONE,
/** @draft ICU 76 */
/** @stable ICU 76 */
U_INCB_CONSONANT,
/** @draft ICU 76 */
/** @stable ICU 76 */
U_INCB_EXTEND,
/** @draft ICU 76 */
/** @stable ICU 76 */
U_INCB_LINKER,
} UIndicConjunctBreak;
#endif // U_HIDE_DRAFT_API
/**
* Vertical Orientation constants.
@ -2918,7 +2935,7 @@ u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which);
* @return the property as a set
* @see UProperty
* @see u_hasBinaryProperty
* @see Unicode::fromUSet
* @see UnicodeSet::fromUSet
* @stable ICU 63
*/
U_CAPI const USet * U_EXPORT2

View File

@ -119,6 +119,28 @@
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
#define U_INTERNAL U_CAPI
/**
* \def U_FORCE_INLINE
* Forces function inlining on compilers that are known to support it.
* Place this before specifiers like "static" and "explicit".
*
* This does not replace the "inline" keyword which suspends the One Definition Rule (ODR)
* in addition to optionally serving as an inlining hint to the compiler.
*
* @internal
*/
#ifdef U_FORCE_INLINE
// already defined
#elif defined(U_IN_DOXYGEN)
# define U_FORCE_INLINE inline
#elif (defined(__clang__) && __clang__) || U_GCC_MAJOR_MINOR != 0
# define U_FORCE_INLINE [[gnu::always_inline]]
#elif defined(U_REAL_MSVC)
# define U_FORCE_INLINE __forceinline
#else
# define U_FORCE_INLINE inline
#endif
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
// series of statements wrapped in { } blocks and the caller could choose to
// either treat them as if they were actual functions and end the invocation

View File

@ -495,7 +495,7 @@ public:
* @return <tt>true</tt> if the specified set is equal to this set.
* @stable ICU 2.0
*/
virtual bool operator==(const UnicodeSet& o) const;
bool operator==(const UnicodeSet& o) const;
/**
* Compares the specified object with this set for equality. Returns
@ -522,7 +522,7 @@ public:
* @see Object#hashCode()
* @stable ICU 2.0
*/
virtual int32_t hashCode() const;
int32_t hashCode() const;
/**
* Get a UnicodeSet pointer from a USet
@ -792,7 +792,7 @@ public:
* @stable ICU 2.0
* @see getRangeCount
*/
virtual int32_t size() const;
int32_t size() const;
/**
* Returns <tt>true</tt> if this set contains no elements.
@ -800,7 +800,7 @@ public:
* @return <tt>true</tt> if this set contains no elements.
* @stable ICU 2.0
*/
virtual UBool isEmpty() const;
UBool isEmpty() const;
/**
* @return true if this set contains multi-character strings or the empty string.
@ -825,7 +825,7 @@ public:
* @return true if the test condition is met
* @stable ICU 2.0
*/
virtual UBool contains(UChar32 start, UChar32 end) const;
UBool contains(UChar32 start, UChar32 end) const;
/**
* Returns <tt>true</tt> if this set contains the given
@ -843,7 +843,7 @@ public:
* @return true if the test condition is met
* @stable ICU 2.4
*/
virtual UBool containsAll(const UnicodeSet& c) const;
UBool containsAll(const UnicodeSet& c) const;
/**
* Returns true if this set contains all the characters
@ -1021,7 +1021,7 @@ public:
* Implement UnicodeMatcher::matches()
* @stable ICU 2.4
*/
virtual UMatchDegree matches(const Replaceable& text,
UMatchDegree matches(const Replaceable& text,
int32_t& offset,
int32_t limit,
UBool incremental) override;
@ -1102,7 +1102,6 @@ public:
*/
UChar32 charAt(int32_t index) const;
#ifndef U_HIDE_DRAFT_API
/**
* Returns a C++ "range" for iterating over the code points of this set.
*
@ -1114,7 +1113,7 @@ public:
* \endcode
*
* @return a "range" object for iterating over the code points of this set.
* @draft ICU 76
* @stable ICU 76
* @see ranges
* @see strings
* @see begin
@ -1140,7 +1139,7 @@ public:
* \endcode
*
* @return a "range" object for iterating over the code point ranges of this set.
* @draft ICU 76
* @stable ICU 76
* @see codePoints
* @see strings
* @see begin
@ -1164,7 +1163,7 @@ public:
* \endcode
*
* @return a "range" object for iterating over the strings of this set.
* @draft ICU 76
* @stable ICU 76
* @see codePoints
* @see ranges
* @see begin
@ -1173,7 +1172,6 @@ public:
inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
}
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
@ -1231,7 +1229,7 @@ public:
* to this set.
* @stable ICU 2.0
*/
virtual UnicodeSet& add(UChar32 start, UChar32 end);
UnicodeSet& add(UChar32 start, UChar32 end);
/**
* Adds the specified character to this set if it is not already
@ -1337,7 +1335,7 @@ public:
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
UnicodeSet& retain(UChar32 start, UChar32 end);
/**
@ -1375,7 +1373,7 @@ public:
* from this set.
* @stable ICU 2.0
*/
virtual UnicodeSet& remove(UChar32 start, UChar32 end);
UnicodeSet& remove(UChar32 start, UChar32 end);
/**
* Removes the specified character from this set if it is present.
@ -1412,7 +1410,7 @@ public:
* A frozen set will not be modified.
* @stable ICU 2.0
*/
virtual UnicodeSet& complement();
UnicodeSet& complement();
/**
* Complements the specified range in this set. Any character in
@ -1426,7 +1424,7 @@ public:
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
UnicodeSet& complement(UChar32 start, UChar32 end);
/**
* Complements the specified character in this set. The character
@ -1463,7 +1461,7 @@ public:
* @see #add(UChar32, UChar32)
* @stable ICU 2.0
*/
virtual UnicodeSet& addAll(const UnicodeSet& c);
UnicodeSet& addAll(const UnicodeSet& c);
/**
* Retains only the elements in this set that are contained in the
@ -1476,7 +1474,7 @@ public:
* @param c set that defines which elements this set will retain.
* @stable ICU 2.0
*/
virtual UnicodeSet& retainAll(const UnicodeSet& c);
UnicodeSet& retainAll(const UnicodeSet& c);
/**
* Removes from this set all of its elements that are contained in the
@ -1489,7 +1487,7 @@ public:
* this set.
* @stable ICU 2.0
*/
virtual UnicodeSet& removeAll(const UnicodeSet& c);
UnicodeSet& removeAll(const UnicodeSet& c);
/**
* Complements in this set all elements contained in the specified
@ -1501,7 +1499,7 @@ public:
* this set.
* @stable ICU 2.4
*/
virtual UnicodeSet& complementAll(const UnicodeSet& c);
UnicodeSet& complementAll(const UnicodeSet& c);
/**
* Removes all of the elements from this set. This set will be
@ -1509,7 +1507,7 @@ public:
* A frozen set will not be modified.
* @stable ICU 2.0
*/
virtual UnicodeSet& clear();
UnicodeSet& clear();
/**
* Close this set over the given attribute. For the attribute
@ -1546,7 +1544,7 @@ public:
* @return a reference to this set.
* @stable ICU 4.2
*/
virtual UnicodeSet &removeAllStrings();
UnicodeSet &removeAllStrings();
/**
* Iteration method that returns the number of ranges contained in
@ -1555,7 +1553,7 @@ public:
* @see #getRangeEnd
* @stable ICU 2.4
*/
virtual int32_t getRangeCount() const;
int32_t getRangeCount() const;
/**
* Iteration method that returns the first character in the
@ -1564,7 +1562,7 @@ public:
* @see #getRangeEnd
* @stable ICU 2.4
*/
virtual UChar32 getRangeStart(int32_t index) const;
UChar32 getRangeStart(int32_t index) const;
/**
* Iteration method that returns the last character in the
@ -1573,7 +1571,7 @@ public:
* @see #getRangeEnd
* @stable ICU 2.4
*/
virtual UChar32 getRangeEnd(int32_t index) const;
UChar32 getRangeEnd(int32_t index) const;
/**
* Serializes this set into an array of 16-bit integers. Serialization
@ -1631,7 +1629,7 @@ public:
* A frozen set will not be modified.
* @stable ICU 2.4
*/
virtual UnicodeSet& compact();
UnicodeSet& compact();
/**
* Return the class ID for this class. This is useful only for

View File

@ -215,6 +215,10 @@ class UnicodeStringAppendable; // unicode/appendable.h
*
* The UnicodeString equivalent of std::strings clear() is remove().
*
* Starting with ICU 78, a UnicodeString is a C++ "range" of char16_t code units.
* utfStringCodePoints() and unsafeUTFStringCodePoints() can be used to iterate over
* the code points.
*
* A UnicodeString may "alias" an external array of characters
* (that is, point to it, rather than own the array)
* whose lifetime must then at least match the lifetime of the aliasing object.
@ -289,12 +293,17 @@ class UnicodeStringAppendable; // unicode/appendable.h
* [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
*
* @see utf.h
* @see utfiterator.h
* @see utfStringCodePoints
* @see unsafeUTFStringCodePoints
* @see CharacterIterator
* @stable ICU 2.0
*/
class U_COMMON_API UnicodeString : public Replaceable
{
public:
/** C++ boilerplate @internal */
using value_type = char16_t;
/**
* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
@ -327,7 +336,6 @@ public:
*/
inline bool operator== (const UnicodeString& text) const;
#ifndef U_HIDE_DRAFT_API
/**
* Equality operator. Performs only bitwise comparison with `text`
* which is, or which is implicitly convertible to,
@ -341,7 +349,7 @@ public:
* \endcode
* @param text The string view to compare to this string.
* @return true if `text` contains the same characters as this one, false otherwise.
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline bool operator==(const S &text) const {
@ -349,7 +357,6 @@ public:
uint32_t len; // unsigned to avoid a compiler warning
return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
}
#endif // U_HIDE_DRAFT_API
/**
* Inequality operator. Performs only bitwise comparison.
@ -360,7 +367,6 @@ public:
*/
inline bool operator!= (const UnicodeString& text) const;
#ifndef U_HIDE_DRAFT_API
/**
* Inequality operator. Performs only bitwise comparison with `text`
* which is, or which is implicitly convertible to,
@ -376,13 +382,12 @@ public:
* \endcode
* @param text The string view to compare to this string.
* @return false if `text` contains the same characters as this one, true otherwise.
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline bool operator!=(const S &text) const {
return !operator==(text);
}
#endif // U_HIDE_DRAFT_API
/**
* Greater than operator. Performs only bitwise comparison.
@ -1767,7 +1772,8 @@ public:
* Unpaired surrogates are replaced with U+FFFD.
* Calls toUTF8().
*
* @param result A standard string (or a compatible object)
* @tparam StringClass A std::string or a std::u8string (or a compatible type)
* @param result A std::string or a std::u8string (or a compatible object)
* to which the UTF-8 version of the string is appended.
* @return The string object.
* @stable ICU 4.2
@ -1780,6 +1786,27 @@ public:
return result;
}
#ifndef U_HIDE_DRAFT_API
/**
* Convert the UnicodeString to a UTF-8 string.
* Unpaired surrogates are replaced with U+FFFD.
* Calls toUTF8().
*
* @tparam StringClass A std::string or a std::u8string (or a compatible type)
* @return A std::string or a std::u8string (or a compatible object)
* with the UTF-8 version of the string.
* @draft ICU 78
* @see toUTF8
*/
template<typename StringClass>
StringClass toUTF8String() const {
StringClass result;
StringByteSink<StringClass> sbs(&result, length());
toUTF8(sbs);
return result;
}
#endif // U_HIDE_DRAFT_API
/**
* Convert the UnicodeString to UTF-32.
* Unpaired surrogates are replaced with U+FFFD.
@ -1892,6 +1919,42 @@ public:
*/
inline UBool isBogus() const;
#ifndef U_HIDE_DRAFT_API
private:
// These type aliases are private; there is no guarantee that they will remain
// aliases to the same types in subsequent versions of ICU.
// Note that whether `std::u16string_view::const_iterator` is a pointer or a
// class that models contiguous_iterator is platform-dependent.
using unspecified_iterator = std::u16string_view::const_iterator;
using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
public:
/**
* @return an iterator to the first code unit in this string.
* The iterator may be a pointer or a contiguous-iterator object.
* @draft ICU 78
*/
unspecified_iterator begin() const { return std::u16string_view(*this).begin(); }
/**
* @return an iterator to just past the last code unit in this string.
* The iterator may be a pointer or a contiguous-iterator object.
* @draft ICU 78
*/
unspecified_iterator end() const { return std::u16string_view(*this).end(); }
/**
* @return a reverse iterator to the last code unit in this string.
* The iterator may be a pointer or a contiguous-iterator object.
* @draft ICU 78
*/
unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); }
/**
* @return a reverse iterator to just before the first code unit in this string.
* The iterator may be a pointer or a contiguous-iterator object.
* @draft ICU 78
*/
unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); }
#endif // U_HIDE_DRAFT_API
//========================================
// Write operations
//========================================
@ -1945,7 +2008,6 @@ public:
*/
UnicodeString &fastCopyFrom(const UnicodeString &src);
#ifndef U_HIDE_DRAFT_API
/**
* Assignment operator. Replaces the characters in this UnicodeString
* with a copy of the characters from the `src`
@ -1954,14 +2016,13 @@ public:
*
* @param src The string view containing the characters to copy.
* @return a reference to this
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString &operator=(const S &src) {
unBogus();
return doReplace(0, length(), internal::toU16StringView(src));
}
#endif // U_HIDE_DRAFT_API
/**
* Move assignment operator; might leave src in bogus state.
@ -2212,7 +2273,6 @@ public:
*/
inline UnicodeString& operator+= (const UnicodeString& srcText);
#ifndef U_HIDE_DRAFT_API
/**
* Append operator. Appends the characters in `src`
* which is, or which is implicitly convertible to,
@ -2221,13 +2281,12 @@ public:
*
* @param src the source for the new characters
* @return a reference to this
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString& operator+=(const S &src) {
return doAppend(internal::toU16StringView(src));
}
#endif // U_HIDE_DRAFT_API
/**
* Append the characters
@ -2285,7 +2344,6 @@ public:
inline UnicodeString& append(ConstChar16Ptr srcChars,
int32_t srcLength);
#ifndef U_HIDE_DRAFT_API
/**
* Appends the characters in `src`
* which is, or which is implicitly convertible to,
@ -2294,13 +2352,12 @@ public:
*
* @param src the source for the new characters
* @return a reference to this
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString& append(const S &src) {
return doAppend(internal::toU16StringView(src));
}
#endif // U_HIDE_DRAFT_API
/**
* Append the code unit `srcChar` to the UnicodeString object.
@ -2318,6 +2375,16 @@ public:
*/
UnicodeString& append(UChar32 srcChar);
#ifndef U_HIDE_DRAFT_API
/**
* Appends the code unit `c` to the UnicodeString object.
* Same as append(c) except does not return *this.
*
* @param c the code unit to append
* @draft ICU 78
*/
inline void push_back(char16_t c) { append(c); }
#endif // U_HIDE_DRAFT_API
/* Insert operations */
@ -3025,12 +3092,11 @@ public:
*/
const char16_t *getTerminatedBuffer();
#ifndef U_HIDE_DRAFT_API
/**
* Converts to a std::u16string_view.
*
* @return a string view of the contents of this string
* @draft ICU 76
* @stable ICU 76
*/
inline operator std::u16string_view() const {
return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
@ -3044,7 +3110,7 @@ public:
* about char16_t vs. wchar_t become clearer.
*
* @return a string view of the contents of this string
* @draft ICU 76
* @stable ICU 76
*/
inline operator std::wstring_view() const {
const char16_t *p = getBuffer();
@ -3054,7 +3120,6 @@ public:
return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
}
#endif // U_SIZEOF_WCHAR_T
#endif // U_HIDE_DRAFT_API
//========================================
// Constructors
@ -3257,7 +3322,6 @@ public:
*/
inline UnicodeString(const std::nullptr_t text, int32_t textLength);
#ifndef U_HIDE_DRAFT_API
/**
* Constructor from `text`
* which is, or which is implicitly convertible to,
@ -3268,14 +3332,13 @@ public:
* then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
*
* @param text UTF-16 string
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
fUnion.fFields.fLengthAndFlags = kShortString;
doAppend(internal::toU16StringViewNullable(text));
}
#endif // U_HIDE_DRAFT_API
/**
* Readonly-aliasing char16_t* constructor.
@ -3573,7 +3636,6 @@ public:
*/
virtual ~UnicodeString();
#ifndef U_HIDE_DRAFT_API
/**
* Readonly-aliasing factory method.
* Aliases the same buffer as the input `text`
@ -3594,7 +3656,7 @@ public:
* so that both strings then alias the same readonly-text.
*
* @param text The string view to alias for the UnicodeString.
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
static inline UnicodeString readOnlyAlias(const S &text) {
@ -3618,12 +3680,11 @@ public:
* so that both strings then alias the same readonly-text.
*
* @param text The UnicodeString to alias.
* @draft ICU 76
* @stable ICU 76
*/
static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
return readOnlyAliasFromUnicodeString(text);
}
#endif // U_HIDE_DRAFT_API
/**
* Create a UnicodeString from a UTF-8 string.
@ -4102,7 +4163,6 @@ private:
U_COMMON_API UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2);
#ifndef U_HIDE_DRAFT_API
/**
* Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
* which is, or which is implicitly convertible to,
@ -4111,13 +4171,12 @@ operator+ (const UnicodeString &s1, const UnicodeString &s2);
* @param s1 The string to be copied to the new one.
* @param s2 The string view to be copied to the new string, after s1.
* @return UnicodeString(s1).append(s2)
* @draft ICU 76
* @stable ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
return unistr_internalConcat(s1, internal::toU16StringView(s2));
}
#endif // U_HIDE_DRAFT_API
#ifndef U_FORCE_HIDE_INTERNAL_API
/** @internal */

View File

@ -157,7 +157,6 @@ public:
*/
static void U_EXPORT2 operator delete[](void *p) noexcept;
#if U_HAVE_PLACEMENT_NEW
/**
* Override for ICU4C C++ memory management for STL.
* See new().
@ -171,7 +170,7 @@ public:
* @stable ICU 2.6
*/
static inline void U_EXPORT2 operator delete(void *, void *) noexcept {}
#endif /* U_HAVE_PLACEMENT_NEW */
#if U_HAVE_DEBUG_LOCATION_NEW
/**
* This method overrides the MFC debug version of the operator new

View File

@ -33,8 +33,9 @@
#if !U_DISABLE_RENAMING
// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work.
#if !(defined(_MSC_VER) && defined(__INTELLISENSE__))
// Disable Renaming for Visual Studio's IntelliSense feature and for LLVM's Clang-Tidy tool, so that
// 'Go-to-Definition' (F12) and 'include-cleaner' respectively will work.
#if !(defined(_MSC_VER) && defined(__INTELLISENSE__)) && !defined(__clang_analyzer__)
/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
the platform a chance to define it first.
@ -1392,6 +1393,7 @@
#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
#define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow)
#define uprv_addScriptExtensionsCodePoints U_ICU_ENTRY_POINT_RENAME(uprv_addScriptExtensionsCodePoints)
#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
@ -2037,7 +2039,7 @@
#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */
#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) && !defined(__clang_analyzer__) */
#endif /* U_DISABLE_RENAMING */
#endif /* URENAME_H */

View File

@ -518,6 +518,17 @@ typedef enum UScriptCode {
/** @stable ICU 76 */
USCRIPT_TULU_TIGALARI = 207, /* Tutg */
/** @stable ICU 78 */
USCRIPT_BERIA_ERFE = 208, /* Berf */
/** @stable ICU 78 */
USCRIPT_SIDETIC = 209, /* Sidt */
/** @stable ICU 78 */
USCRIPT_TAI_YO = 210, /* Tayo */
/** @stable ICU 78 */
USCRIPT_TOLONG_SIKI = 211, /* Tols */
/** @stable ICU 78 */
USCRIPT_TRADITIONAL_HAN_WITH_LATIN = 212, /* Hntl */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UScriptCode value.
@ -525,7 +536,7 @@ typedef enum UScriptCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
USCRIPT_CODE_LIMIT = 208
USCRIPT_CODE_LIMIT = 213
#endif // U_HIDE_DEPRECATED_API
} UScriptCode;

View File

@ -987,12 +987,10 @@ uset_size(const USet* set);
U_CAPI int32_t U_EXPORT2
uset_getRangeCount(const USet *set);
#ifndef U_HIDE_DRAFT_API
/**
* @param set the set
* @return the number of strings in this set.
* @draft ICU 76
* @stable ICU 76
* @see uset_getRangeCount
* @see uset_getItemCount
* @see uset_size
@ -1009,14 +1007,12 @@ uset_getStringCount(const USet *set);
* @param index the string index, 0 .. uset_getStringCount() - 1
* @param pLength the output string length; must not be NULL
* @return the pointer to the string; NULL if the index is out of range or pLength is NULL
* @draft ICU 76
* @stable ICU 76
* @see uset_getStringCount
*/
U_CAPI const UChar* U_EXPORT2
uset_getString(const USet *set, int32_t index, int32_t *pLength);
#endif // U_HIDE_DRAFT_API
/**
* Returns the number of items in this set. An item is either a range
* of characters or a single multicharacter string.
@ -1327,7 +1323,6 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
UChar32* pStart, UChar32* pEnd);
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#ifndef U_HIDE_DRAFT_API
namespace U_HEADER_ONLY_NAMESPACE {
@ -1336,14 +1331,14 @@ namespace U_HEADER_ONLY_NAMESPACE {
/**
* Iterator returned by USetCodePoints.
* @draft ICU 76
* @stable ICU 76
*/
class USetCodePointIterator {
public:
/** @draft ICU 76 */
/** @stable ICU 76 */
USetCodePointIterator(const USetCodePointIterator &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator==(const USetCodePointIterator &other) const {
// No need to compare rangeCount & end given private constructor
// and assuming we don't compare iterators across the set being modified.
@ -1353,15 +1348,15 @@ public:
return uset == other.uset && c == other.c;
}
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
/** @stable ICU 76 */
UChar32 operator*() const { return c; }
/**
* Pre-increment.
* @draft ICU 76
* @stable ICU 76
*/
USetCodePointIterator &operator++() {
if (c < end) {
@ -1382,7 +1377,7 @@ public:
/**
* Post-increment.
* @draft ICU 76
* @stable ICU 76
*/
USetCodePointIterator operator++(int) {
USetCodePointIterator result(*this);
@ -1419,7 +1414,7 @@ private:
*
* C++ UnicodeSet has member functions for iteration, including codePoints().
*
* @draft ICU 76
* @stable ICU 76
* @see USetRanges
* @see USetStrings
* @see USetElements
@ -1428,19 +1423,19 @@ class USetCodePoints {
public:
/**
* Constructs a C++ "range" object over the code points of the USet.
* @draft ICU 76
* @stable ICU 76
*/
USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
/** @draft ICU 76 */
/** @stable ICU 76 */
USetCodePoints(const USetCodePoints &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
USetCodePointIterator begin() const {
return USetCodePointIterator(uset, 0, rangeCount);
}
/** @draft ICU 76 */
/** @stable ICU 76 */
USetCodePointIterator end() const {
return USetCodePointIterator(uset, rangeCount, rangeCount);
}
@ -1455,25 +1450,25 @@ private:
* Returned by USetRangeIterator which is returned by USetRanges.
* Both the rangeStart and rangeEnd are in the range.
* (end() returns an iterator corresponding to rangeEnd+1.)
* @draft ICU 76
* @stable ICU 76
*/
struct CodePointRange {
/** @draft ICU 76 */
/** @stable ICU 76 */
struct iterator {
/** @draft ICU 76 */
/** @stable ICU 76 */
iterator(UChar32 aC) : c(aC) {}
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator==(const iterator &other) const { return c == other.c; }
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator!=(const iterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
/** @stable ICU 76 */
UChar32 operator*() const { return c; }
/**
* Pre-increment.
* @draft ICU 76
* @stable ICU 76
*/
iterator &operator++() {
++c;
@ -1482,7 +1477,7 @@ struct CodePointRange {
/**
* Post-increment.
* @draft ICU 76
* @stable ICU 76
*/
iterator operator++(int) {
return c++;
@ -1490,44 +1485,44 @@ struct CodePointRange {
/**
* The current code point in the range.
* @draft ICU 76
* @stable ICU 76
*/
UChar32 c;
};
/** @draft ICU 76 */
/** @stable ICU 76 */
CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {}
/** @draft ICU 76 */
/** @stable ICU 76 */
CodePointRange(const CodePointRange &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
size_t size() const { return (rangeEnd + 1) - rangeStart; }
/** @draft ICU 76 */
/** @stable ICU 76 */
iterator begin() const { return rangeStart; }
/** @draft ICU 76 */
/** @stable ICU 76 */
iterator end() const { return rangeEnd + 1; }
/**
* Start of a USet/UnicodeSet range of code points.
* @draft ICU 76
* @stable ICU 76
*/
UChar32 rangeStart;
/**
* Inclusive end of a USet/UnicodeSet range of code points.
* @draft ICU 76
* @stable ICU 76
*/
UChar32 rangeEnd;
};
/**
* Iterator returned by USetRanges.
* @draft ICU 76
* @stable ICU 76
*/
class USetRangeIterator {
public:
/** @draft ICU 76 */
/** @stable ICU 76 */
USetRangeIterator(const USetRangeIterator &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator==(const USetRangeIterator &other) const {
// No need to compare rangeCount given private constructor
// and assuming we don't compare iterators across the set being modified.
@ -1536,10 +1531,10 @@ public:
return uset == other.uset && rangeIndex == other.rangeIndex;
}
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
/** @stable ICU 76 */
CodePointRange operator*() const {
if (rangeIndex < rangeCount) {
UChar32 start, end;
@ -1554,7 +1549,7 @@ public:
/**
* Pre-increment.
* @draft ICU 76
* @stable ICU 76
*/
USetRangeIterator &operator++() {
++rangeIndex;
@ -1563,7 +1558,7 @@ public:
/**
* Post-increment.
* @draft ICU 76
* @stable ICU 76
*/
USetRangeIterator operator++(int) {
USetRangeIterator result(*this);
@ -1600,7 +1595,7 @@ private:
*
* C++ UnicodeSet has member functions for iteration, including ranges().
*
* @draft ICU 76
* @stable ICU 76
* @see USetCodePoints
* @see USetStrings
* @see USetElements
@ -1609,19 +1604,19 @@ class USetRanges {
public:
/**
* Constructs a C++ "range" object over the code point ranges of the USet.
* @draft ICU 76
* @stable ICU 76
*/
USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
/** @draft ICU 76 */
/** @stable ICU 76 */
USetRanges(const USetRanges &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
USetRangeIterator begin() const {
return USetRangeIterator(uset, 0, rangeCount);
}
/** @draft ICU 76 */
/** @stable ICU 76 */
USetRangeIterator end() const {
return USetRangeIterator(uset, rangeCount, rangeCount);
}
@ -1633,14 +1628,14 @@ private:
/**
* Iterator returned by USetStrings.
* @draft ICU 76
* @stable ICU 76
*/
class USetStringIterator {
public:
/** @draft ICU 76 */
/** @stable ICU 76 */
USetStringIterator(const USetStringIterator &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator==(const USetStringIterator &other) const {
// No need to compare count given private constructor
// and assuming we don't compare iterators across the set being modified.
@ -1649,10 +1644,10 @@ public:
return uset == other.uset && index == other.index;
}
/** @draft ICU 76 */
/** @stable ICU 76 */
bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
/** @stable ICU 76 */
std::u16string_view operator*() const {
if (index < count) {
int32_t length;
@ -1665,7 +1660,7 @@ public:
/**
* Pre-increment.
* @draft ICU 76
* @stable ICU 76
*/
USetStringIterator &operator++() {
++index;
@ -1674,7 +1669,7 @@ public:
/**
* Post-increment.
* @draft ICU 76
* @stable ICU 76
*/
USetStringIterator operator++(int) {
USetStringIterator result(*this);
@ -1710,7 +1705,7 @@ private:
*
* C++ UnicodeSet has member functions for iteration, including strings().
*
* @draft ICU 76
* @stable ICU 76
* @see USetCodePoints
* @see USetRanges
* @see USetElements
@ -1719,19 +1714,19 @@ class USetStrings {
public:
/**
* Constructs a C++ "range" object over the strings of the USet.
* @draft ICU 76
* @stable ICU 76
*/
USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
/** @draft ICU 76 */
/** @stable ICU 76 */
USetStrings(const USetStrings &other) = default;
/** @draft ICU 76 */
/** @stable ICU 76 */
USetStringIterator begin() const {
return USetStringIterator(uset, 0, count);
}
/** @draft ICU 76 */
/** @stable ICU 76 */
USetStringIterator end() const {
return USetStringIterator(uset, count, count);
}
@ -1740,7 +1735,6 @@ private:
const USet *uset;
int32_t count;
};
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
@ -1900,9 +1894,10 @@ private:
int32_t rangeCount, stringCount;
};
#endif // U_HIDE_DRAFT_API
} // namespace U_HEADER_ONLY_NAMESPACE
#endif // U_HIDE_DRAFT_API
#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#endif // __USET_H__

View File

@ -121,8 +121,39 @@
/* single-code point definitions -------------------------------------------- */
#ifndef U_HIDE_DRAFT_API
/**
* Is c a Unicode code point U+0000..U+10FFFF?
* https://www.unicode.org/glossary/#code_point
*
* @param c 32-bit code point
* @return true or false
* @draft ICU 78
* @see AllCodePoints
* @see U_IS_SCALAR_VALUE
*/
#define U_IS_CODE_POINT(c) ((uint32_t)(c)<=0x10ffff)
/**
* Is c a Unicode scalar value, that is, a non-surrogate code point?
* Only scalar values can be represented in well-formed UTF-8/16/32.
* https://www.unicode.org/glossary/#unicode_scalar_value
*
* @param c 32-bit code point
* @return true or false
* @draft ICU 78
* @see AllScalarValues
* @see U_IS_CODE_POINT
*/
#define U_IS_SCALAR_VALUE(c) ((uint32_t)(c)<0xd800 || (0xe000<=(c) && (c)<=0x10ffff))
#endif // U_HIDE_DRAFT_API
/**
* Is this code point a Unicode noncharacter?
* https://www.unicode.org/glossary/#noncharacter
*
* @param c 32-bit code point
* @return true or false
* @stable ICU 2.4
@ -150,7 +181,7 @@
*/
#define U_IS_UNICODE_CHAR(c) \
((uint32_t)(c)<0xd800 || \
(0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
(0xe000<=(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
/**
* Is this code point a BMP code point (U+0000..U+ffff)?

View File

@ -170,7 +170,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
#define U8_IS_SINGLE(c) ((int8_t)(c)>=0)
/**
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
@ -214,6 +214,32 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_MAX_LENGTH 4
#ifndef U_HIDE_DRAFT_API
/**
* Returns the length of a well-formed UTF-8 byte sequence according to its lead byte.
* Returns 1 for 0..0xc1 as well as for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @return 1..4
* @draft ICU 78
*/
#define U8_LENGTH_FROM_LEAD_BYTE(leadByte) (U8_COUNT_TRAIL_BYTES(leadByte) + 1)
/**
* Returns the length of a well-formed UTF-8 byte sequence according to its lead byte.
* Returns 1 for 0..0xc1. Undefined for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @return 1..4
* @draft ICU 78
*/
#define U8_LENGTH_FROM_LEAD_BYTE_UNSAFE(leadByte) (U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) + 1)
#endif // U_HIDE_DRAFT_API
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
@ -517,7 +543,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
if(U8_IS_TRAIL(__t1)) { \
++(i); \
} \
} else /* c>=0xf0 */ { \
} else /* b>=0xf0 */ { \
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
@ -683,7 +709,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(U8_IS_TRAIL(c)) { \
if(!U8_IS_SINGLE(c)) { \
uint8_t __b, __count=1, __shift=6; \
\
/* c is a trail byte */ \

View File

@ -385,8 +385,10 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[];
/* each following branch falls through to the next one */ \
case 3: \
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
U_FALLTHROUGH; \
case 2: \
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
U_FALLTHROUGH; \
case 1: \
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
/* no other branches to optimize switch() */ \

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,161 @@
// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html
// utfstring.h
// created: 2025jul18 Markus W. Scherer
#ifndef __UTFSTRING_H__
#define __UTFSTRING_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
#include "unicode/utf16.h"
/**
* \file
* \brief C++ header-only API: C++ string helper functions.
*/
#ifndef U_HIDE_DRAFT_API
namespace U_HEADER_ONLY_NAMESPACE {
namespace utfstring {
// Write code points to strings -------------------------------------------- ***
#ifndef U_IN_DOXYGEN
namespace prv {
// This function, and the public wrappers,
// want to be U_FORCE_INLINE but the gcc-debug-build-and-test CI check failed with
// error: always_inline function might not be inlinable [-Werror=attributes]
template<typename StringClass, bool validate>
inline StringClass &appendCodePoint(StringClass &s, uint32_t c) {
using Unit = typename StringClass::value_type;
if constexpr (sizeof(Unit) == 1) {
// UTF-8: Similar to U8_APPEND().
if (c <= 0x7f) {
s.push_back(static_cast<Unit>(c));
} else {
Unit buf[4];
uint8_t len;
if (c <= 0x7ff) {
len = 2;
buf[2] = (c >> 6) | 0xc0;
} else {
if (validate ?
c < 0xd800 ||
(c < 0xe000 || c > 0x10ffff ? (c = 0xfffd, true) : c <= 0xffff) :
c <= 0xffff) {
len = 3;
buf[1] = (c >> 12) | 0xe0;
} else {
len = 4;
buf[0] = (c >> 18) | 0xf0;
buf[1] = ((c >> 12) & 0x3f) | 0x80;
}
buf[2] = ((c >> 6) & 0x3f) | 0x80;
}
buf[3] = (c & 0x3f) | 0x80;
s.append(buf + 4 - len, len);
}
} else if constexpr (sizeof(Unit) == 2) {
// UTF-16: Similar to U16_APPEND().
if (validate ?
c < 0xd800 || (c < 0xe000 || c > 0x10ffff ? (c = 0xfffd, true) : c <= 0xffff) :
c <= 0xffff) {
s.push_back(static_cast<Unit>(c));
} else {
Unit buf[2] = { U16_LEAD(c), U16_TRAIL(c) };
s.append(buf, 2);
}
} else {
// UTF-32
s.push_back(!validate || U_IS_SCALAR_VALUE(c) ? c : 0xfffd);
}
return s;
}
} // namespace prv
#endif // U_IN_DOXYGEN
#ifndef U_HIDE_DRAFT_API
/**
* Appends the code point to the string.
* Appends the U+FFFD replacement character instead if c is not a scalar value.
* See https://www.unicode.org/glossary/#unicode_scalar_value
*
* @tparam StringClass A version of std::basic_string (or a compatible type)
* @param s The string to append to
* @param c The code point to append
* @return s
* @draft ICU 78
* @see U_IS_SCALAR_VALUE
*/
template<typename StringClass>
inline StringClass &appendOrFFFD(StringClass &s, UChar32 c) {
return prv::appendCodePoint<StringClass, true>(s, c);
}
/**
* Appends the code point to the string.
* The code point must be a scalar value; otherwise the behavior is undefined.
* See https://www.unicode.org/glossary/#unicode_scalar_value
*
* @tparam StringClass A version of std::basic_string (or a compatible type)
* @param s The string to append to
* @param c The code point to append (must be a scalar value)
* @return s
* @draft ICU 78
* @see U_IS_SCALAR_VALUE
*/
template<typename StringClass>
inline StringClass &appendUnsafe(StringClass &s, UChar32 c) {
return prv::appendCodePoint<StringClass, false>(s, c);
}
/**
* Returns the code point as a string of code units.
* Returns the U+FFFD replacement character instead if c is not a scalar value.
* See https://www.unicode.org/glossary/#unicode_scalar_value
*
* @tparam StringClass A version of std::basic_string (or a compatible type)
* @param c The code point
* @return the string of c's code units
* @draft ICU 78
* @see U_IS_SCALAR_VALUE
*/
template<typename StringClass>
inline StringClass encodeOrFFFD(UChar32 c) {
StringClass s;
prv::appendCodePoint<StringClass, true>(s, c);
return s;
}
/**
* Returns the code point as a string of code units.
* The code point must be a scalar value; otherwise the behavior is undefined.
* See https://www.unicode.org/glossary/#unicode_scalar_value
*
* @tparam StringClass A version of std::basic_string (or a compatible type)
* @param c The code point
* @return the string of c's code units
* @draft ICU 78
* @see U_IS_SCALAR_VALUE
*/
template<typename StringClass>
inline StringClass encodeUnsafe(UChar32 c) {
StringClass s;
prv::appendCodePoint<StringClass, false>(s, c);
return s;
}
#endif // U_HIDE_DRAFT_API
} // namespace utfstring
} // namespace U_HEADER_ONLY_NAMESPACE
#endif // U_HIDE_DRAFT_API
#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#endif // __UTFSTRING_H__

View File

@ -384,6 +384,85 @@ typedef double UDate;
#define U_TOOLUTIL_API U_IMPORT
#endif
#ifndef U_FORCE_HIDE_DRAFT_API
/**
* \def U_DATA_API_CLASS
* Set to export library symbols from inside the stubdata library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
/**
* \def U_COMMON_API_CLASS
* Set to export library symbols from inside the common library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
/**
* \def U_I18N_API_CLASS
* Set to export library symbols from inside the i18n library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
/**
* \def U_LAYOUT_API_CLASS
* Set to export library symbols from inside the layout engine library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
/**
* \def U_LAYOUTEX_API_CLASS
* Set to export library symbols from inside the layout extensions library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
/**
* \def U_IO_API_CLASS
* Set to export library symbols from inside the ustdio library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
/**
* \def U_TOOLUTIL_API_CLASS
* Set to export library symbols from inside the toolutil library,
* and to import them from outside, to be used on a class.
* @draft ICU 78
*/
// When used on Windows, the U_..._API macros expand to __declspec(dllexport)
// and __declspec(dllimport), which when used on a class results in all members
// of the class being exported, including private members, which is problematic
// for classes that have private members that can't be exported (such as
// templates from the standard library):
//
// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-c4251
//
#if U_PLATFORM_HAS_WIN32_API
#define U_DATA_API_CLASS
#define U_COMMON_API_CLASS
#define U_I18N_API_CLASS
#define U_LAYOUT_API_CLASS
#define U_LAYOUTEX_API_CLASS
#define U_IO_API_CLASS
#define U_TOOLUTIL_API_CLASS
#else
#define U_DATA_API_CLASS U_DATA_API
#define U_COMMON_API_CLASS U_COMMON_API
#define U_I18N_API_CLASS U_I18N_API
#define U_LAYOUT_API_CLASS U_LAYOUT_API
#define U_LAYOUTEX_API_CLASS U_LAYOUTEX_API
#define U_IO_API_CLASS U_IO_API
#define U_TOOLUTIL_API_CLASS U_TOOLUTIL_API
#endif
#endif // U_FORCE_HIDE_DRAFT_API
/**
* \def U_STANDARD_CPP_NAMESPACE
* Control of C++ Namespace

View File

@ -53,7 +53,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 77
#define U_ICU_VERSION_MAJOR_NUM 78
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
@ -79,7 +79,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _77
#define U_ICU_VERSION_SUFFIX _78
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -132,7 +132,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "77.1"
#define U_ICU_VERSION "78.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -145,13 +145,13 @@
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "77"
#define U_ICU_VERSION_SHORT "78"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "77.1"
#define U_ICU_DATA_VERSION "78.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View File

@ -125,7 +125,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
U_NAMESPACE_USE
# endif
#ifndef U_FORCE_HIDE_DRAFT_API
/**
* \def U_HEADER_NESTED_NAMESPACE
* Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
@ -136,7 +135,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* this is always "header". Header-only types are not marked for export,
* which on Windows already avoids callers linking with library instantiations.
*
* @draft ICU 76
* @stable ICU 76
* @see U_HEADER_ONLY_NAMESPACE
*/
@ -147,9 +146,10 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* "U_ICU_NAMESPACE::header" or "U_ICU_NAMESPACE::internal",
* see U_HEADER_NESTED_NAMESPACE for details.
*
* @draft ICU 76
* @stable ICU 76
*/
#ifndef U_FORCE_HIDE_DRAFT_API
/**
* \def U_ICU_NAMESPACE_OR_INTERNAL
* Namespace used for header-only APIs that used to be regular C++ APIs.
@ -159,6 +159,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
*
* @draft ICU 77
*/
#endif // U_FORCE_HIDE_DRAFT_API
// The first test is the same as for defining U_EXPORT for Windows.
#if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
@ -180,7 +181,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
#define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE
namespace U_HEADER_ONLY_NAMESPACE {}
#endif // U_FORCE_HIDE_DRAFT_API
#endif /* __cplusplus */

View File

@ -974,12 +974,12 @@ void UnicodeSet::_add(const UnicodeString& s) {
setToBogus();
return;
}
UnicodeString* t = new UnicodeString(s);
if (t == nullptr) { // Check for memory allocation error.
LocalPointer<UnicodeString> t(new UnicodeString(s));
if (t.isNull()) { // Check for memory allocation error.
setToBogus();
return;
}
strings_->sortedInsert(t, compareUnicodeString, ec);
strings_->sortedInsert(t.orphan(), compareUnicodeString, ec);
if (U_FAILURE(ec)) {
setToBogus();
}

View File

@ -275,20 +275,24 @@ UnicodeString::doExtract(int32_t start, int32_t length,
}
// perform the conversion
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &errorCode);
UErrorCode bufferStatus = U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
length = static_cast<int32_t>(dest - originalDest);
// if an overflow occurs, then get the preflighting length
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) {
char buffer[1024];
destLimit=buffer+sizeof(buffer);
do {
dest=buffer;
errorCode=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &errorCode);
bufferStatus=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
length += static_cast<int32_t>(dest - buffer);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR);
} while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
}
if (U_FAILURE(bufferStatus)) {
errorCode = bufferStatus;
}
return u_terminateChars(originalDest, destCapacity, length, &errorCode);
@ -389,17 +393,15 @@ UnicodeString::doCodepageCreate(const char *codepageData,
// perform the conversion
array = getArrayStart();
myTarget = array + length();
UErrorCode bufferStatus = U_ZERO_ERROR;
ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
&mySource, mySourceEnd, nullptr, true, &status);
&mySource, mySourceEnd, nullptr, true, &bufferStatus);
// update the conversion parameters
setLength(static_cast<int32_t>(myTarget - array));
// allocate more space and copy data, if needed
if(status == U_BUFFER_OVERFLOW_ERROR) {
// reset the error code
status = U_ZERO_ERROR;
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR) {
// keep the previous conversion results
doCopyArray = true;
@ -407,6 +409,9 @@ UnicodeString::doCodepageCreate(const char *codepageData,
// try 2 char16_t's per remaining source byte
arraySize = static_cast<int32_t>(length() + 2 * (mySourceEnd - mySource));
} else {
if (U_FAILURE(bufferStatus)) {
status = bufferStatus;
}
break;
}
}

View File

@ -74,4 +74,9 @@
#define _POSIX_C_SOURCE 200809L
#endif
/* Prevent _XOPEN_SOURCE from breaking build on macOS when aligned_alloc exists. */
#if defined(__APPLE__) && !defined(_DARWIN_C_SOURCE)
# define _DARWIN_C_SOURCE
#endif
#endif /* __UPOSIXDEFS_H__ */

View File

@ -336,7 +336,7 @@ U_CFUNC uint32_t
u_getUnicodeProperties(UChar32 c, int32_t column);
/**
* Get the the maximum values for some enum/int properties.
* Get the maximum values for some enum/int properties.
* Use the same column numbers as for u_getUnicodeProperties().
* The returned value will contain maximum values stored in the same bit fields
* as where the enum values are stored in the u_getUnicodeProperties()
@ -500,6 +500,10 @@ ublock_addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode);
uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
*/
/** @internal for icuexportdata */
U_CAPI void U_EXPORT2
uprv_addScriptExtensionsCodePoints(const USetAdder *sa, UErrorCode *pErrorCode);
// TODO: Move this into a different header file (udataswp.h? new unames.h?) so that uprops.h
// need not be C-compatible any more.
/**

View File

@ -120,9 +120,7 @@ public:
// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
#endif
StackUResourceBundle();
~StackUResourceBundle();

View File

@ -45,11 +45,11 @@ const int32_t SCRIPT_PROPS[] = {
// Begin copy-paste output from
// tools/trunk/unicode/py/parsescriptmetadata.py
0x0040 | RECOMMENDED, // Zyyy
0x0308 | RECOMMENDED, // Zinh
0x030F | RECOMMENDED, // Zinh
0x0628 | RECOMMENDED | RTL, // Arab
0x0531 | RECOMMENDED | CASED, // Armn
0x0995 | RECOMMENDED, // Beng
0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
0x3105 | LIMITED_USE | LB_LETTERS, // Bopo
0x13C4 | LIMITED_USE | CASED, // Cher
0x03E2 | EXCLUSION | CASED, // Copt
0x042F | RECOMMENDED | CASED, // Cyrl
@ -223,7 +223,7 @@ const int32_t SCRIPT_PROPS[] = {
0x11A5C | EXCLUSION, // Soyo
0x11A0B | EXCLUSION, // Zanb
0x1180B | EXCLUSION, // Dogr
0x11D71 | LIMITED_USE, // Gong
0x11D71 | EXCLUSION, // Gong
0x11EE5 | EXCLUSION, // Maka
0x16E40 | EXCLUSION | CASED, // Medf
0x10D12 | LIMITED_USE | RTL, // Rohg
@ -252,6 +252,10 @@ const int32_t SCRIPT_PROPS[] = {
0x11BC4 | EXCLUSION, // Sunu
0x105C2 | EXCLUSION, // Todr
0x11392 | EXCLUSION, // Tutg
0x16EA1 | EXCLUSION | CASED, // Berf
0x10950 | EXCLUSION | RTL, // Sidt
0x1E6D5 | EXCLUSION | LB_LETTERS, // Tayo
0x11DC6 | EXCLUSION, // Tols
// End copy-paste from parsescriptmetadata.py
};

View File

@ -666,11 +666,12 @@ usprep_prepare( const UStringPrepProfile* profile,
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
UErrorCode bufferStatus = U_ZERO_ERROR;
int32_t b1Len = usprep_map(profile, src, srcLength,
b1, s1.getCapacity(), options, parseError, status);
s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
b1, s1.getCapacity(), options, parseError, &bufferStatus);
s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0);
if(*status == U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b1 = s1.getBuffer(b1Len);
@ -679,12 +680,13 @@ usprep_prepare( const UStringPrepProfile* profile,
return 0;
}
*status = U_ZERO_ERROR; // reset error
bufferStatus = U_ZERO_ERROR; // reset error
b1Len = usprep_map(profile, src, srcLength,
b1, s1.getCapacity(), options, parseError, status);
s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
b1, s1.getCapacity(), options, parseError, &bufferStatus);
s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0);
}
if(U_FAILURE(*status)){
if(U_FAILURE(bufferStatus)){
*status = bufferStatus;
return 0;
}

View File

@ -95,15 +95,14 @@ _strToWCS(wchar_t *dest,
pSrcLimit = pSrc + srcLength;
for(;;) {
/* reset the error state */
*pErrorCode = U_ZERO_ERROR;
UErrorCode bufferStatus = U_ZERO_ERROR;
/* convert to chars using default converter */
ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),pErrorCode);
ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),&bufferStatus);
count =(tempBuf - saveBuf);
/* This should rarely occur */
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){
tempBuf = saveBuf;
/* we don't have enough room on the stack grow the buffer */
@ -119,16 +118,15 @@ _strToWCS(wchar_t *dest,
saveBuf = tempBuf;
tempBufLimit = tempBuf + tempBufCapacity;
tempBuf = tempBuf + count;
} else {
if (U_FAILURE(bufferStatus)) {
*pErrorCode = bufferStatus;
goto cleanup;
}
break;
}
}
if(U_FAILURE(*pErrorCode)){
goto cleanup;
}
/* done with conversion null terminate the char buffer */
if(count>=tempBufCapacity){
tempBuf = saveBuf;
@ -441,20 +439,22 @@ _strFromWCS( char16_t *dest,
}
for(;;) {
*pErrorCode = U_ZERO_ERROR;
UErrorCode bufferStatus = U_ZERO_ERROR;
/* convert to stack buffer*/
ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),&bufferStatus);
/* increment count to number written to stack */
count+= pTarget - target;
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){
target = uStack;
pTarget = uStack;
pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
} else {
if (U_FAILURE(bufferStatus)) {
*pErrorCode = bufferStatus;
}
break;
}

View File

@ -872,11 +872,12 @@ UTS46::processLabel(UnicodeString &dest,
buffer[1]=0x6e;
buffer[2]=0x2d;
buffer[3]=0x2d;
UErrorCode punycodeErrorCode=U_ZERO_ERROR;
int32_t punycodeLength=u_strToPunycode(label, labelLength,
buffer+4, punycode.getCapacity()-4,
nullptr, &errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
nullptr, &punycodeErrorCode);
if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) {
punycodeErrorCode=U_ZERO_ERROR;
punycode.releaseBuffer(4);
buffer=punycode.getBuffer(4+punycodeLength);
if(buffer==nullptr) {
@ -885,11 +886,12 @@ UTS46::processLabel(UnicodeString &dest,
}
punycodeLength=u_strToPunycode(label, labelLength,
buffer+4, punycode.getCapacity()-4,
nullptr, &errorCode);
nullptr, &punycodeErrorCode);
}
punycodeLength+=4;
punycode.releaseBuffer(punycodeLength);
if(U_FAILURE(errorCode)) {
if(U_FAILURE(punycodeErrorCode)) {
errorCode = punycodeErrorCode;
return destLabelLength;
}
if(punycodeLength>63) {

View File

@ -40,7 +40,9 @@ ScriptSet::ScriptSet(const ScriptSet &other) {
}
ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
if (this != &other) {
uprv_memcpy(bits, other.bits, sizeof(bits));
}
return *this;
}

View File

@ -64,6 +64,7 @@ typedef enum ECleanupI18NType {
UCLN_I18N_LIST_FORMATTER,
UCLN_I18N_NUMSYS,
UCLN_I18N_MF2_UNISETS,
UCLN_I18N_MF2_DATE_PARSERS,
UCLN_I18N_COUNT /* This must be last */
} ECleanupI18NType;

Binary file not shown.