Bug Summary

File:out/../deps/icu-small/source/i18n/number_longnames.cpp
Warning:line 852, column 17
Value stored to 'endSlice' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name number_longnames.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_I18N_IMPLEMENTATION=1 -D U_IO_IMPLEMENTATION=1 -D U_TOOLUTIL_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -I ../deps/icu-small/source/i18n -I ../deps/icu-small/source/tools/toolutil -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/i18n/number_longnames.cpp
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING0
7
8#include <cstdlib>
9
10#include "unicode/simpleformatter.h"
11#include "unicode/ures.h"
12#include "ureslocs.h"
13#include "charstr.h"
14#include "uresimp.h"
15#include "measunit_impl.h"
16#include "number_longnames.h"
17#include "number_microprops.h"
18#include <algorithm>
19#include "cstring.h"
20#include "util.h"
21
22using namespace icu;
23using namespace icu::number;
24using namespace icu::number::impl;
25
26namespace {
27
28/**
29 * Display Name (this format has no placeholder).
30 *
31 * Used as an index into the LongNameHandler::simpleFormats array. Units
32 * resources cover the normal set of PluralRules keys, as well as `dnam` and
33 * `per` forms.
34 */
35constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
36/**
37 * "per" form (e.g. "{0} per day" is day's "per" form).
38 *
39 * Used as an index into the LongNameHandler::simpleFormats array. Units
40 * resources cover the normal set of PluralRules keys, as well as `dnam` and
41 * `per` forms.
42 */
43constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
44/**
45 * Gender of the word, in languages with grammatical gender.
46 */
47constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
48// Number of keys in the array populated by PluralTableSink.
49constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
50
51// TODO(icu-units#28): load this list from resources, after creating a "&set"
52// function for use in ldml2icu rules.
53const int32_t GENDER_COUNT = 7;
54const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate",
55 "masculine", "neuter", "personal"};
56
57// Converts a UnicodeString to a const char*, either pointing to a string in
58// gGenders, or pointing to an empty string if an appropriate string was not
59// found.
60const char *getGenderString(UnicodeString uGender, UErrorCode status) {
61 if (uGender.length() == 0) {
62 return "";
63 }
64 CharString gender;
65 gender.appendInvariantChars(uGender, status);
66 if (U_FAILURE(status)) {
67 return "";
68 }
69 int32_t first = 0;
70 int32_t last = GENDER_COUNT;
71 while (first < last) {
72 int32_t mid = (first + last) / 2;
73 int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]):: strcmp(gender.data(), gGenders[mid]);
74 if (cmp == 0) {
75 return gGenders[mid];
76 } else if (cmp > 0) {
77 first = mid + 1;
78 } else if (cmp < 0) {
79 last = mid;
80 }
81 }
82 // We don't return an error in case our gGenders list is incomplete in
83 // production.
84 //
85 // TODO(icu-units#28): a unit test checking all locales' genders are covered
86 // by gGenders? Else load a complete list of genders found in
87 // grammaticalFeatures in an initOnce.
88 return "";
89}
90
91// Returns the array index that corresponds to the given pluralKeyword.
92static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
93 // pluralKeyword can also be "dnam", "per", or "gender"
94 switch (*pluralKeyword) {
95 case 'd':
96 if (uprv_strcmp(pluralKeyword + 1, "nam"):: strcmp(pluralKeyword + 1, "nam") == 0) {
97 return DNAM_INDEX;
98 }
99 break;
100 case 'g':
101 if (uprv_strcmp(pluralKeyword + 1, "ender"):: strcmp(pluralKeyword + 1, "ender") == 0) {
102 return GENDER_INDEX;
103 }
104 break;
105 case 'p':
106 if (uprv_strcmp(pluralKeyword + 1, "er"):: strcmp(pluralKeyword + 1, "er") == 0) {
107 return PER_INDEX;
108 }
109 break;
110 default:
111 break;
112 }
113 StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
114 return plural;
115}
116
117// Selects a string out of the `strings` array which corresponds to the
118// specified plural form, with fallback to the OTHER form.
119//
120// The `strings` array must have ARRAY_LENGTH items: one corresponding to each
121// of the plural forms, plus a display name ("dnam") and a "per" form.
122static UnicodeString getWithPlural(
123 const UnicodeString* strings,
124 StandardPlural::Form plural,
125 UErrorCode& status) {
126 UnicodeString result = strings[plural];
127 if (result.isBogus()) {
128 result = strings[StandardPlural::Form::OTHER];
129 }
130 if (result.isBogus()) {
131 // There should always be data in the "other" plural variant.
132 status = U_INTERNAL_PROGRAM_ERROR;
133 }
134 return result;
135}
136
137enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
138
139/**
140 * Returns three outputs extracted from pattern.
141 *
142 * @param coreUnit is extracted as per Extract(...) in the spec:
143 * https://unicode.org/reports/tr35/tr35-general.html#compound-units
144 * @param PlaceholderPosition indicates where in the string the placeholder was
145 * found.
146 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
147 * contains the space character (if any) that separated the placeholder from
148 * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
149 * space character is considered.
150 */
151void extractCorePattern(const UnicodeString &pattern,
152 UnicodeString &coreUnit,
153 PlaceholderPosition &placeholderPosition,
154 UChar &joinerChar) {
155 joinerChar = 0;
156 int32_t len = pattern.length();
157 if (pattern.startsWith(u"{0}", 3)) {
158 placeholderPosition = PH_BEGINNING;
159 if (u_isJavaSpaceCharu_isJavaSpaceChar_71(pattern[3])) {
160 joinerChar = pattern[3];
161 coreUnit.setTo(pattern, 4, len - 4);
162 } else {
163 coreUnit.setTo(pattern, 3, len - 3);
164 }
165 } else if (pattern.endsWith(u"{0}", 3)) {
166 placeholderPosition = PH_END;
167 if (u_isJavaSpaceCharu_isJavaSpaceChar_71(pattern[len - 4])) {
168 coreUnit.setTo(pattern, 0, len - 4);
169 joinerChar = pattern[len - 4];
170 } else {
171 coreUnit.setTo(pattern, 0, len - 3);
172 }
173 } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
174 placeholderPosition = PH_NONE;
175 coreUnit = pattern;
176 } else {
177 placeholderPosition = PH_MIDDLE;
178 coreUnit = pattern;
179 }
180}
181
182//////////////////////////
183/// BEGIN DATA LOADING ///
184//////////////////////////
185
186// Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
187// string both in case of unknown gender and in case of unknown unit.
188UnicodeString
189getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
190 LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status));
191 if (U_FAILURE(status)) { return {}; }
192
193 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
194 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
195 StringPiece subtypeForResource;
196 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()):: strlen(builtinUnit.getSubtype()));
197 if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person"):: strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person"
)
== 0) {
198 subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
199 } else {
200 subtypeForResource = builtinUnit.getSubtype();
201 }
202
203 CharString key;
204 key.append("units/", status);
205 key.append(builtinUnit.getType(), status);
206 key.append("/", status);
207 key.append(subtypeForResource, status);
208 key.append("/gender", status);
209
210 UErrorCode localStatus = status;
211 int32_t resultLen = 0;
212 const UChar *result =
213 ures_getStringByKeyWithFallbackures_getStringByKeyWithFallback_71(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
214 if (U_SUCCESS(localStatus)) {
215 status = localStatus;
216 return UnicodeString(true, result, resultLen);
217 } else {
218 // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
219 // check whether the parent "$unitRes" exists? Then we could return
220 // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
221 // being a builtin).
222 return {};
223 }
224}
225
226// Loads data from a resource tree with paths matching
227// $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
228// and genders.
229//
230// An InflectedPluralSink is configured to load data for a specific gender and
231// case. It loads all plural forms, because selection between plural forms is
232// dependent upon the value being formatted.
233//
234// See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
235// units/compound/power2: German has case, French has differences for gender,
236// but no case.
237//
238// TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
239// tree structures are different. After homogenizing the structures, we may be
240// able to unify the two classes.
241//
242// TODO: Spec violation: expects presence of "count" - does not fallback to an
243// absent "count"! If this fallback were added, getCompoundValue could be
244// superseded?
245class InflectedPluralSink : public ResourceSink {
246 public:
247 // Accepts `char*` rather than StringPiece because
248 // ResourceTable::findValue(...) requires a null-terminated `char*`.
249 //
250 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
251 // checking is performed.
252 explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
253 : gender(gender), caseVariant(caseVariant), outArray(outArray) {
254 // Initialize the array to bogus strings.
255 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
256 outArray[i].setToBogus();
257 }
258 }
259
260 // See ResourceSink::put().
261 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDEoverride {
262 int32_t pluralIndex = getIndex(key, status);
263 if (U_FAILURE(status)) { return; }
264 if (!outArray[pluralIndex].isBogus()) {
265 // We already have a pattern
266 return;
267 }
268 ResourceTable genderTable = value.getTable(status);
269 ResourceTable caseTable; // This instance has to outlive `value`
270 if (loadForPluralForm(genderTable, caseTable, value, status)) {
271 outArray[pluralIndex] = value.getUnicodeString(status);
272 }
273 }
274
275 private:
276 // Tries to load data for the configured gender from `genderTable`. Returns
277 // true if found, returning the data in `value`. The returned data will be
278 // for the configured gender if found, falling back to "neuter" and
279 // no-gender if not. The caseTable parameter holds the intermediate
280 // ResourceTable for the sake of lifetime management.
281 bool loadForPluralForm(const ResourceTable &genderTable,
282 ResourceTable &caseTable,
283 ResourceValue &value,
284 UErrorCode &status) {
285 if (uprv_strcmp(gender, ""):: strcmp(gender, "") != 0) {
286 if (loadForGender(genderTable, gender, caseTable, value, status)) {
287 return true;
288 }
289 if (uprv_strcmp(gender, "neuter"):: strcmp(gender, "neuter") != 0 &&
290 loadForGender(genderTable, "neuter", caseTable, value, status)) {
291 return true;
292 }
293 }
294 if (loadForGender(genderTable, "_", caseTable, value, status)) {
295 return true;
296 }
297 return false;
298 }
299
300 // Tries to load data for the given gender from `genderTable`. Returns true
301 // if found, returning the data in `value`. The returned data will be for
302 // the configured case if found, falling back to "nominative" and no-case if
303 // not.
304 bool loadForGender(const ResourceTable &genderTable,
305 const char *genderVal,
306 ResourceTable &caseTable,
307 ResourceValue &value,
308 UErrorCode &status) {
309 if (!genderTable.findValue(genderVal, value)) {
310 return false;
311 }
312 caseTable = value.getTable(status);
313 if (uprv_strcmp(caseVariant, ""):: strcmp(caseVariant, "") != 0) {
314 if (loadForCase(caseTable, caseVariant, value)) {
315 return true;
316 }
317 if (uprv_strcmp(caseVariant, "nominative"):: strcmp(caseVariant, "nominative") != 0 &&
318 loadForCase(caseTable, "nominative", value)) {
319 return true;
320 }
321 }
322 if (loadForCase(caseTable, "_", value)) {
323 return true;
324 }
325 return false;
326 }
327
328 // Tries to load data for the given case from `caseTable`. Returns true if
329 // found, returning the data in `value`.
330 bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
331 if (!caseTable.findValue(caseValue, value)) {
332 return false;
333 }
334 return true;
335 }
336
337 const char *gender;
338 const char *caseVariant;
339 UnicodeString *outArray;
340};
341
342// Fetches localised formatting patterns for the given subKey. See documentation
343// for InflectedPluralSink for details.
344//
345// Data is loaded for the appropriate unit width, with missing data filled in
346// from unitsShort.
347void getInflectedMeasureData(StringPiece subKey,
348 const Locale &locale,
349 const UNumberUnitWidth &width,
350 const char *gender,
351 const char *caseVariant,
352 UnicodeString *outArray,
353 UErrorCode &status) {
354 InflectedPluralSink sink(gender, caseVariant, outArray);
355 LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status));
356 if (U_FAILURE(status)) { return; }
357
358 CharString key;
359 key.append("units", status);
360 if (width == UNUM_UNIT_WIDTH_NARROW) {
361 key.append("Narrow", status);
362 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
363 key.append("Short", status);
364 }
365 key.append("/", status);
366 key.append(subKey, status);
367
368 UErrorCode localStatus = status;
369 ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), key.data(), sink, localStatus);
370 if (width == UNUM_UNIT_WIDTH_SHORT) {
371 status = localStatus;
372 return;
373 }
374}
375
376class PluralTableSink : public ResourceSink {
377 public:
378 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
379 // checking is performed.
380 explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
381 // Initialize the array to bogus strings.
382 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
383 outArray[i].setToBogus();
384 }
385 }
386
387 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDEoverride {
388 if (uprv_strcmp(key, "case"):: strcmp(key, "case") == 0) {
389 return;
390 }
391 int32_t index = getIndex(key, status);
392 if (U_FAILURE(status)) { return; }
393 if (!outArray[index].isBogus()) {
394 return;
395 }
396 outArray[index] = value.getUnicodeString(status);
397 if (U_FAILURE(status)) { return; }
398 }
399
400 private:
401 UnicodeString *outArray;
402};
403
404/**
405 * Populates outArray with `locale`-specific values for `unit` through use of
406 * PluralTableSink. Only the set of basic units are supported!
407 *
408 * Reading from resources *unitsNarrow* and *unitsShort* (for width
409 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
410 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
411 *
412 * @param unit must be a built-in unit, i.e. must have a type and subtype,
413 * listed in gTypes and gSubTypes in measunit.cpp.
414 * @param unitDisplayCase the empty string and "nominative" are treated the
415 * same. For other cases, strings for the requested case are used if found.
416 * (For any missing case-specific data, we fall back to nominative.)
417 * @param outArray must be of fixed length ARRAY_LENGTH.
418 */
419void getMeasureData(const Locale &locale,
420 const MeasureUnit &unit,
421 const UNumberUnitWidth &width,
422 const char *unitDisplayCase,
423 UnicodeString *outArray,
424 UErrorCode &status) {
425 PluralTableSink sink(outArray);
426 LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status));
427 if (U_FAILURE(status)) { return; }
428
429 CharString subKey;
430 subKey.append("/", status);
431 subKey.append(unit.getType(), status);
432 subKey.append("/", status);
433
434 // Check if unitSubType is an alias or not.
435 LocalUResourceBundlePointer aliasBundle(ures_openures_open_71(U_ICUDATA_ALIAS"ICUDATA", "metadata", &status));
436
437 UErrorCode aliasStatus = status;
438 StackUResourceBundle aliasFillIn;
439 CharString aliasKey;
440 aliasKey.append("alias/unit/", aliasStatus);
441 aliasKey.append(unit.getSubtype(), aliasStatus);
442 aliasKey.append("/replacement", aliasStatus);
443 ures_getByKeyWithFallbackures_getByKeyWithFallback_71(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(),
444 &aliasStatus);
445 CharString unitSubType;
446 if (!U_FAILURE(aliasStatus)) {
447 // This means the subType is an alias. Then, replace unitSubType with the replacement.
448 auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status);
449 unitSubType.appendInvariantChars(replacement, status);
450 } else {
451 unitSubType.append(unit.getSubtype(), status);
452 }
453
454 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
455 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
456 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data()):: strlen(unitSubType.data()));
457 if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person"):: strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) {
458 subKey.append({unitSubType.data(), subtypeLen - 7}, status);
459 } else {
460 subKey.append({unitSubType.data(), subtypeLen}, status);
461 }
462
463 if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
464 UErrorCode localStatus = status;
465 CharString genderKey;
466 genderKey.append("units", localStatus);
467 genderKey.append(subKey, localStatus);
468 genderKey.append("/gender", localStatus);
469 StackUResourceBundle fillIn;
470 ures_getByKeyWithFallbackures_getByKeyWithFallback_71(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
471 &localStatus);
472 outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
473 }
474
475 CharString key;
476 key.append("units", status);
477 if (width == UNUM_UNIT_WIDTH_NARROW) {
478 key.append("Narrow", status);
479 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
480 key.append("Short", status);
481 }
482 key.append(subKey, status);
483
484 // Grab desired case first, if available. Then grab no-case data to fill in
485 // the gaps.
486 if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
487 CharString caseKey;
488 caseKey.append(key, status);
489 caseKey.append("/case/", status);
490 caseKey.append(unitDisplayCase, status);
491
492 UErrorCode localStatus = U_ZERO_ERROR;
493 // TODO(icu-units#138): our fallback logic is not spec-compliant:
494 // lateral fallback should happen before locale fallback. Switch to
495 // getInflectedMeasureData after homogenizing data format? Find a unit
496 // test case that demonstrates the incorrect fallback logic (via
497 // regional variant of an inflected language?)
498 ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
499 }
500
501 // TODO(icu-units#138): our fallback logic is not spec-compliant: we
502 // check the given case, then go straight to the no-case data. The spec
503 // states we should first look for case="nominative". As part of #138,
504 // either get the spec changed, or add unit tests that warn us if
505 // case="nominative" data differs from no-case data?
506 UErrorCode localStatus = U_ZERO_ERROR;
507 ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), key.data(), sink, localStatus);
508 if (width == UNUM_UNIT_WIDTH_SHORT) {
509 if (U_FAILURE(localStatus)) {
510 status = localStatus;
511 }
512 return;
513 }
514}
515
516// NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
517void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
518 UErrorCode &status) {
519 // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
520 // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
521 PluralTableSink sink(outArray);
522 LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_CURR"icudt" "71" "l" "-" "curr", locale.getName(), &status));
523 if (U_FAILURE(status)) { return; }
524 ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
525 if (U_FAILURE(status)) { return; }
526 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
527 UnicodeString &pattern = outArray[i];
528 if (pattern.isBogus()) {
529 continue;
530 }
531 int32_t longNameLen = 0;
532 const char16_t *longName = ucurr_getPluralNameucurr_getPluralName_71(
533 currency.getISOCurrency(),
534 locale.getName(),
535 nullptr /* isChoiceFormat */,
536 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
537 &longNameLen,
538 &status);
539 // Example pattern from data: "{0} {1}"
540 // Example output after find-and-replace: "{0} US dollars"
541 pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
542 }
543}
544
545UnicodeString getCompoundValue(StringPiece compoundKey,
546 const Locale &locale,
547 const UNumberUnitWidth &width,
548 UErrorCode &status) {
549 LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status));
550 if (U_FAILURE(status)) { return {}; }
551 CharString key;
552 key.append("units", status);
553 if (width == UNUM_UNIT_WIDTH_NARROW) {
554 key.append("Narrow", status);
555 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
556 key.append("Short", status);
557 }
558 key.append("/compound/", status);
559 key.append(compoundKey, status);
560
561 UErrorCode localStatus = status;
562 int32_t len = 0;
563 const UChar *ptr =
564 ures_getStringByKeyWithFallbackures_getStringByKeyWithFallback_71(unitsBundle.getAlias(), key.data(), &len, &localStatus);
565 if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
566 // Fall back to short, which contains more compound data
567 key.clear();
568 key.append("unitsShort/compound/", status);
569 key.append(compoundKey, status);
570 ptr = ures_getStringByKeyWithFallbackures_getStringByKeyWithFallback_71(unitsBundle.getAlias(), key.data(), &len, &status);
571 } else {
572 status = localStatus;
573 }
574 if (U_FAILURE(status)) {
575 return {};
576 }
577 return UnicodeString(ptr, len);
578}
579
580/**
581 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
582 *
583 * Consider a deriveComponent rule that looks like this:
584 *
585 * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
586 *
587 * Instantiating an instance as follows:
588 *
589 * DerivedComponents d(loc, "case", "per");
590 *
591 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
592 * and `d.value1("foo")` will be "nominative".
593 *
594 * The values returned by value0(...) and value1(...) are valid only while the
595 * instance exists. In case of any kind of failure, value0(...) and value1(...)
596 * will return "".
597 */
598class DerivedComponents {
599 public:
600 /**
601 * Constructor.
602 *
603 * The feature and structure parameters must be null-terminated. The string
604 * referenced by compoundValue must exist for longer than the
605 * DerivedComponents instance.
606 */
607 DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
608 StackUResourceBundle derivationsBundle, stackBundle;
609 ures_openDirectFillInures_openDirectFillIn_71(derivationsBundle.getAlias(), NULL__null, "grammaticalFeatures", &status);
610 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
611 &status);
612 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
613 &status);
614 if (U_FAILURE(status)) {
615 return;
616 }
617 UErrorCode localStatus = U_ZERO_ERROR;
618 // TODO(icu-units#28): use standard normal locale resolution algorithms
619 // rather than just grabbing language:
620 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
621 &localStatus);
622 // TODO(icu-units#28):
623 // - code currently assumes if the locale exists, the rules are there -
624 // instead of falling back to root when the requested rule is missing.
625 // - investigate ures.h functions, see if one that uses res_findResource()
626 // might be better (or use res_findResource directly), or maybe help
627 // improve ures documentation to guide function selection?
628 if (localStatus == U_MISSING_RESOURCE_ERROR) {
629 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
630 } else {
631 status = localStatus;
632 }
633 ures_getByKeyures_getByKey_71(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
634 ures_getByKeyures_getByKey_71(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
635 ures_getByKeyures_getByKey_71(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
636 UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
637 UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
638 if (U_SUCCESS(status)) {
639 if (val0.compare(UnicodeString(u"compound")) == 0) {
640 compound0_ = true;
641 } else {
642 compound0_ = false;
643 value0_.appendInvariantChars(val0, status);
644 }
645 if (val1.compare(UnicodeString(u"compound")) == 0) {
646 compound1_ = true;
647 } else {
648 compound1_ = false;
649 value1_.appendInvariantChars(val1, status);
650 }
651 }
652 }
653
654 // Returns a StringPiece that is only valid as long as the instance exists.
655 StringPiece value0(const StringPiece compoundValue) const {
656 return compound0_ ? compoundValue : value0_.toStringPiece();
657 }
658
659 // Returns a StringPiece that is only valid as long as the instance exists.
660 StringPiece value1(const StringPiece compoundValue) const {
661 return compound1_ ? compoundValue : value1_.toStringPiece();
662 }
663
664 // Returns a char* that is only valid as long as the instance exists.
665 const char *value0(const char *compoundValue) const {
666 return compound0_ ? compoundValue : value0_.data();
667 }
668
669 // Returns a char* that is only valid as long as the instance exists.
670 const char *value1(const char *compoundValue) const {
671 return compound1_ ? compoundValue : value1_.data();
672 }
673
674 private:
675 UErrorCode status = U_ZERO_ERROR;
676
677 // Holds strings referred to by value0 and value1;
678 bool compound0_ = false, compound1_ = false;
679 CharString value0_, value1_;
680};
681
682// TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
683// testsuite support for testing with synthetic data?
684/**
685 * Loads and returns the value in rules that look like these:
686 *
687 * <deriveCompound feature="gender" structure="per" value="0"/>
688 * <deriveCompound feature="gender" structure="times" value="1"/>
689 *
690 * Currently a fake example, but spec compliant:
691 * <deriveCompound feature="gender" structure="power" value="feminine"/>
692 *
693 * NOTE: If U_FAILURE(status), returns an empty string.
694 */
695UnicodeString
696getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
697 StackUResourceBundle derivationsBundle, stackBundle;
698 ures_openDirectFillInures_openDirectFillIn_71(derivationsBundle.getAlias(), NULL__null, "grammaticalFeatures", &status);
699 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
700 &status);
701 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
702 // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
703 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
704 // TODO:
705 // - code currently assumes if the locale exists, the rules are there -
706 // instead of falling back to root when the requested rule is missing.
707 // - investigate ures.h functions, see if one that uses res_findResource()
708 // might be better (or use res_findResource directly), or maybe help
709 // improve ures documentation to guide function selection?
710 if (status == U_MISSING_RESOURCE_ERROR) {
711 status = U_ZERO_ERROR;
712 ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
713 }
714 ures_getByKeyures_getByKey_71(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
715 ures_getByKeyures_getByKey_71(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
716 UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
717 if (U_FAILURE(status)) {
718 return {};
719 }
720 U_ASSERT(!uVal.isBogus())(void)0;
721 return uVal;
722}
723
724// Returns the gender string for structures following these rules:
725//
726// <deriveCompound feature="gender" structure="per" value="0"/>
727// <deriveCompound feature="gender" structure="times" value="1"/>
728//
729// Fake example:
730// <deriveCompound feature="gender" structure="power" value="feminine"/>
731//
732// data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
733// correspond to value="0" and value="1".
734//
735// Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
736// "prefix" doesn't).
737UnicodeString getDerivedGender(Locale locale,
738 const char *structure,
739 UnicodeString *data0,
740 UnicodeString *data1,
741 UErrorCode &status) {
742 UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
743 if (val.length() == 1) {
744 switch (val[0]) {
745 case u'0':
746 return data0[GENDER_INDEX];
747 case u'1':
748 if (data1 == nullptr) {
749 return {};
750 }
751 return data1[GENDER_INDEX];
752 }
753 }
754 return val;
755}
756
757////////////////////////
758/// END DATA LOADING ///
759////////////////////////
760
761// TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
762const UChar *trimSpaceChars(const UChar *s, int32_t &length) {
763 if (length <= 0 || (!u_isJavaSpaceCharu_isJavaSpaceChar_71(s[0]) && !u_isJavaSpaceCharu_isJavaSpaceChar_71(s[length - 1]))) {
764 return s;
765 }
766 int32_t start = 0;
767 int32_t limit = length;
768 while (start < limit && u_isJavaSpaceCharu_isJavaSpaceChar_71(s[start])) {
769 ++start;
770 }
771 if (start < limit) {
772 // There is non-white space at start; we will not move limit below that,
773 // so we need not test start<limit in the loop.
774 while (u_isJavaSpaceCharu_isJavaSpaceChar_71(s[limit - 1])) {
775 --limit;
776 }
777 }
778 length = limit - start;
779 return s + start;
780}
781
782/**
783 * Calculates the gender of an arbitrary unit: this is the *second*
784 * implementation of an algorithm to do this:
785 *
786 * Gender is also calculated in "processPatternTimes": that code path is "bottom
787 * up", loading the gender for every component of a compound unit (at the same
788 * time as loading the Long Names formatting patterns), even if the gender is
789 * unneeded, then combining the single units' genders into the compound unit's
790 * gender, according to the rules. This algorithm does a lazier "top-down"
791 * evaluation, starting with the compound unit, calculating which single unit's
792 * gender is needed by breaking it down according to the rules, and then loading
793 * only the gender of the one single unit who's gender is needed.
794 *
795 * For future refactorings:
796 * 1. we could drop processPatternTimes' gender calculation and just call this
797 * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
798 * same table as the formatting patterns, so loading it then may be
799 * efficient. For other unit widths however, it needs to be explicitly looked
800 * up anyway.
801 * 2. alternatively, if CLDR is providing all the genders we need such that we
802 * don't need to calculate them in ICU anymore, we could drop this function
803 * and keep only processPatternTimes' calculation. (And optimise it a bit?)
804 *
805 * @param locale The desired locale.
806 * @param unit The measure unit to calculate the gender for.
807 * @return The gender string for the unit, or an empty string if unknown or
808 * ungendered.
809 */
810UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
811 MeasureUnitImpl impl;
812 const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
813 int32_t singleUnitIndex = 0;
814 if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
815 int32_t startSlice = 0;
816 // inclusive
817 int32_t endSlice = mui.singleUnits.length()-1;
818 U_ASSERT(endSlice > 0)(void)0; // Else it would not be COMPOUND
819 if (mui.singleUnits[endSlice]->dimensionality < 0) {
820 // We have a -per- construct
821 UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
822 if (perRule.length() != 1) {
823 // Fixed gender for -per- units
824 return perRule;
825 }
826 if (perRule[0] == u'1') {
827 // Find the start of the denominator. We already know there is one.
828 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
829 startSlice++;
830 }
831 } else {
832 // Find the end of the numerator
833 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
834 endSlice--;
835 }
836 if (endSlice < 0) {
837 // We have only a denominator, e.g. "per-second".
838 // TODO(icu-units#28): find out what gender to use in the
839 // absence of a first value - mentioned in CLDR-14253.
840 return {};
841 }
842 }
843 }
844 if (endSlice > startSlice) {
845 // We have a -times- construct
846 UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
847 if (timesRule.length() != 1) {
848 // Fixed gender for -times- units
849 return timesRule;
850 }
851 if (timesRule[0] == u'0') {
852 endSlice = startSlice;
Value stored to 'endSlice' is never read
853 } else {
854 // We assume timesRule[0] == u'1'
855 startSlice = endSlice;
856 }
857 }
858 U_ASSERT(startSlice == endSlice)(void)0;
859 singleUnitIndex = startSlice;
860 } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
861 status = U_INTERNAL_PROGRAM_ERROR;
862 return {};
863 } else {
864 U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE)(void)0;
865 U_ASSERT(mui.singleUnits.length() == 1)(void)0;
866 }
867
868 // Now we know which singleUnit's gender we want
869 const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
870 // Check for any power-prefix gender override:
871 if (std::abs(singleUnit->dimensionality) != 1) {
872 UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
873 if (powerRule.length() != 1) {
874 // Fixed gender for -powN- units
875 return powerRule;
876 }
877 // powerRule[0] == u'0'; u'1' not currently in spec.
878 }
879 // Check for any SI and binary prefix gender override:
880 if (std::abs(singleUnit->dimensionality) != 1) {
881 UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
882 if (prefixRule.length() != 1) {
883 // Fixed gender for -powN- units
884 return prefixRule;
885 }
886 // prefixRule[0] == u'0'; u'1' not currently in spec.
887 }
888 // Now we've boiled it down to the gender of one simple unit identifier:
889 return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
890 status);
891}
892
893void maybeCalculateGender(const Locale &locale,
894 const MeasureUnit &unitRef,
895 UnicodeString *outArray,
896 UErrorCode &status) {
897 if (outArray[GENDER_INDEX].isBogus()) {
898 UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
899 if (meterGender.isEmpty()) {
900 // No gender for meter: assume ungendered language
901 return;
902 }
903 // We have a gendered language, but are lacking gender for unitRef.
904 outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
905 }
906}
907
908} // namespace
909
910void LongNameHandler::forMeasureUnit(const Locale &loc,
911 const MeasureUnit &unitRef,
912 const UNumberUnitWidth &width,
913 const char *unitDisplayCase,
914 const PluralRules *rules,
915 const MicroPropsGenerator *parent,
916 LongNameHandler *fillIn,
917 UErrorCode &status) {
918 // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
919 // Points 1 and 2 are mostly handled by MeasureUnit:
920 //
921 // 1. If the unitId is empty or invalid, fail
922 // 2. Put the unitId into normalized order
923 U_ASSERT(fillIn != nullptr)(void)0;
924
925 if (uprv_strcmp(unitRef.getType(), ""):: strcmp(unitRef.getType(), "") != 0) {
926 // Handling built-in units:
927 //
928 // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
929 // - If result is not empty, return it
930 UnicodeString simpleFormats[ARRAY_LENGTH];
931 getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
932 maybeCalculateGender(loc, unitRef, simpleFormats, status);
933 if (U_FAILURE(status)) {
934 return;
935 }
936 fillIn->rules = rules;
937 fillIn->parent = parent;
938 fillIn->simpleFormatsToModifiers(simpleFormats,
939 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
940 if (!simpleFormats[GENDER_INDEX].isBogus()) {
941 fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
942 }
943 return;
944
945 // TODO(icu-units#145): figure out why this causes a failure in
946 // format/MeasureFormatTest/TestIndividualPluralFallback and other
947 // tests, when it should have been an alternative for the lines above:
948
949 // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
950 // fillIn->rules = rules;
951 // fillIn->parent = parent;
952 // return;
953 } else {
954 // Check if it is a MeasureUnit this constructor handles: this
955 // constructor does not handle mixed units
956 U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED)(void)0;
957 forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
958 fillIn->rules = rules;
959 fillIn->parent = parent;
960 return;
961 }
962}
963
964void LongNameHandler::forArbitraryUnit(const Locale &loc,
965 const MeasureUnit &unitRef,
966 const UNumberUnitWidth &width,
967 const char *unitDisplayCase,
968 LongNameHandler *fillIn,
969 UErrorCode &status) {
970 if (U_FAILURE(status)) {
971 return;
972 }
973 if (fillIn == nullptr) {
974 status = U_INTERNAL_PROGRAM_ERROR;
975 return;
976 }
977
978 // Numbered list items are from the algorithms at
979 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
980 //
981 // 4. Divide the unitId into numerator (the part before the "-per-") and
982 // denominator (the part after the "-per-). If both are empty, fail
983 MeasureUnitImpl unit;
984 MeasureUnitImpl perUnit;
985 {
986 MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
987 if (U_FAILURE(status)) {
988 return;
989 }
990 for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
991 SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
992 if (subUnit->dimensionality > 0) {
993 unit.appendSingleUnit(*subUnit, status);
994 } else {
995 subUnit->dimensionality *= -1;
996 perUnit.appendSingleUnit(*subUnit, status);
997 }
998 }
999 }
1000
1001 // TODO(icu-units#28): check placeholder logic, see if it needs to be
1002 // present here instead of only in processPatternTimes:
1003 //
1004 // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1005
1006 DerivedComponents derivedPerCases(loc, "case", "per");
1007
1008 // 6. numeratorUnitString
1009 UnicodeString numeratorUnitData[ARRAY_LENGTH];
1010 processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1011 numeratorUnitData, status);
1012
1013 // 7. denominatorUnitString
1014 UnicodeString denominatorUnitData[ARRAY_LENGTH];
1015 processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1016 denominatorUnitData, status);
1017
1018 // TODO(icu-units#139):
1019 // - implement DerivedComponents for "plural/times" and "plural/power":
1020 // French has different rules, we'll be producing the wrong results
1021 // currently. (Prove via tests!)
1022 // - implement DerivedComponents for "plural/per", "plural/prefix",
1023 // "case/times", "case/power", and "case/prefix" - although they're
1024 // currently hardcoded. Languages with different rules are surely on the
1025 // way.
1026 //
1027 // Currently we only use "case/per", "plural/times", "case/times", and
1028 // "case/power".
1029 //
1030 // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1031 // These rules are currently (ICU 69) all the same and hard-coded below.
1032 UnicodeString perUnitPattern;
1033 if (!denominatorUnitData[PER_INDEX].isBogus()) {
1034 // If we have no denominator, we obtain the empty string:
1035 perUnitPattern = denominatorUnitData[PER_INDEX];
1036 } else {
1037 // 8. Set perPattern to be getValue([per], locale, length)
1038 UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1039 // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1040 SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1041 if (U_FAILURE(status)) {
1042 return;
1043 }
1044 // Plural and placeholder handling for 7. denominatorUnitString:
1045 // TODO(icu-units#139): hardcoded:
1046 // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1047 UnicodeString denominatorFormat =
1048 getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1049 // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1050 SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1051 if (U_FAILURE(status)) {
1052 return;
1053 }
1054 UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1055 int32_t trimmedLen = denominatorPattern.length();
1056 const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1057 UnicodeString denominatorString(false, trimmed, trimmedLen);
1058 // 9. If the denominatorString is empty, set result to
1059 // [numeratorString], otherwise set result to format(perPattern,
1060 // numeratorString, denominatorString)
1061 //
1062 // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1063 // following line?
1064 perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1065 if (U_FAILURE(status)) {
1066 return;
1067 }
1068 }
1069 if (perUnitPattern.length() == 0) {
1070 fillIn->simpleFormatsToModifiers(numeratorUnitData,
1071 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1072 } else {
1073 fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1074 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1075 }
1076
1077 // Gender
1078 //
1079 // TODO(icu-units#28): find out what gender to use in the absence of a first
1080 // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1081 //
1082 // gender/per deriveCompound rules don't say:
1083 // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) -->
1084 fillIn->gender = getGenderString(
1085 getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1086}
1087
1088void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1089 Locale loc,
1090 const UNumberUnitWidth &width,
1091 const char *caseVariant,
1092 UnicodeString *outArray,
1093 UErrorCode &status) {
1094 if (U_FAILURE(status)) {
1095 return;
1096 }
1097 if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1098 // These are handled by MixedUnitLongNameHandler
1099 status = U_UNSUPPORTED_ERROR;
1100 return;
1101 }
1102
1103#if U_DEBUG0
1104 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1105 U_ASSERT(outArray[pluralIndex].length() == 0)(void)0;
1106 U_ASSERT(!outArray[pluralIndex].isBogus())(void)0;
1107 }
1108#endif
1109
1110 if (productUnit.identifier.isEmpty()) {
1111 // TODO(icu-units#28): consider when serialize should be called.
1112 // identifier might also be empty for MeasureUnit().
1113 productUnit.serialize(status);
1114 }
1115 if (U_FAILURE(status)) {
1116 return;
1117 }
1118 if (productUnit.identifier.length() == 0) {
1119 // MeasureUnit(): no units: return empty strings.
1120 return;
1121 }
1122
1123 MeasureUnit builtinUnit;
1124 if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1125 // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1126 // breaks them all down. Do we want to drop this?
1127 // - findBySubType isn't super efficient, if we skip it and go to basic
1128 // singles, we don't have to construct MeasureUnit's anymore.
1129 // - Check all the existing unit tests that fail without this: is it due
1130 // to incorrect fallback via getMeasureData?
1131 // - Do those unit tests cover this code path representatively?
1132 if (builtinUnit != MeasureUnit()) {
1133 getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1134 maybeCalculateGender(loc, builtinUnit, outArray, status);
1135 }
1136 return;
1137 }
1138
1139 // 2. Set timesPattern to be getValue(times, locale, length)
1140 UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1141 SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1142 if (U_FAILURE(status)) {
1143 return;
1144 }
1145
1146 PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1147 UChar globalJoinerChar = 0;
1148 // Numbered list items are from the algorithms at
1149 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1150 //
1151 // pattern(...) point 5:
1152 // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1153 //
1154 // 3. Set result to be empty
1155 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1156 // Initial state: empty string pattern, via all falling back to OTHER:
1157 if (pluralIndex == StandardPlural::Form::OTHER) {
1158 outArray[pluralIndex].remove();
1159 } else {
1160 outArray[pluralIndex].setToBogus();
1161 }
1162 globalPlaceholder[pluralIndex] = PH_EMPTY;
1163 }
1164
1165 // Empty string represents "compound" (propagate the plural form).
1166 const char *pluralCategory = "";
1167 DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1168 DerivedComponents derivedTimesCases(loc, "case", "times");
1169 DerivedComponents derivedPowerCases(loc, "case", "power");
1170
1171 // 4. For each single_unit in product_unit
1172 for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1173 singleUnitIndex++) {
1174 SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1175 const char *singlePluralCategory;
1176 const char *singleCaseVariant;
1177 // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1178 // assign incorrect case variants here:
1179 if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1180 // 4.1. If hasMultiple
1181 singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1182 singleCaseVariant = derivedTimesCases.value0(caseVariant);
1183 pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1184 caseVariant = derivedTimesCases.value1(caseVariant);
1185 } else {
1186 singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1187 singleCaseVariant = derivedTimesCases.value1(caseVariant);
1188 }
1189
1190 // 4.2. Get the gender of that single_unit
1191 MeasureUnit simpleUnit;
1192 if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1193 // Ideally all simple units should be known, but they're not:
1194 // 100-kilometer is internally treated as a simple unit, but it is
1195 // not a built-in unit and does not have formatting data in CLDR 39.
1196 //
1197 // TODO(icu-units#28): test (desirable) invariants in unit tests.
1198 status = U_UNSUPPORTED_ERROR;
1199 return;
1200 }
1201 const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1202
1203 // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1204 U_ASSERT(singleUnit->dimensionality > 0)(void)0;
1205 int32_t dimensionality = singleUnit->dimensionality;
1206 UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1207 if (dimensionality != 1) {
1208 // 4.3.1. set dimensionalityPrefixPattern to be
1209 // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1210 // such as "{0} kwadratowym"
1211 CharString dimensionalityKey("compound/power", status);
1212 dimensionalityKey.appendNumber(dimensionality, status);
1213 getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1214 singleCaseVariant, dimensionalityPrefixPatterns, status);
1215 if (U_FAILURE(status)) {
1216 // At the time of writing, only pow2 and pow3 are supported.
1217 // Attempting to format other powers results in a
1218 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1219 // understand it:
1220 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1221 status = U_UNSUPPORTED_ERROR;
1222 }
1223 return;
1224 }
1225
1226 // TODO(icu-units#139):
1227 // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1228
1229 // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1230 singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1231 // 4.3.4. remove the dimensionality_prefix from singleUnit
1232 singleUnit->dimensionality = 1;
1233 }
1234
1235 // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1236 UMeasurePrefix prefix = singleUnit->unitPrefix;
1237 UnicodeString prefixPattern;
1238 if (prefix != UMEASURE_PREFIX_ONE) {
1239 // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1240 // length), such as "centy{0}"
1241 CharString prefixKey;
1242 // prefixKey looks like "1024p3" or "10p-2":
1243 prefixKey.appendNumber(umeas_getPrefixBaseumeas_getPrefixBase_71(prefix), status);
1244 prefixKey.append('p', status);
1245 prefixKey.appendNumber(umeas_getPrefixPowerumeas_getPrefixPower_71(prefix), status);
1246 // Contains a pattern like "centy{0}".
1247 prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1248
1249 // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1250 //
1251 // TODO(icu-units#139): that refers to these rules:
1252 // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1253 // though I'm not sure what other value they might end up having.
1254 //
1255 // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1256 //
1257 // TODO(icu-units#139): that refers to:
1258 // <deriveComponent feature="case" structure="prefix" value0="nominative"
1259 // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1260 // propagates.
1261
1262 // 4.4.4. remove the si_prefix from singleUnit
1263 singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1264 }
1265
1266 // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1267 // singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1268 UnicodeString singleUnitArray[ARRAY_LENGTH];
1269 // At this point we are left with a Simple Unit:
1270 U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==(void)0
1271 0)(void)0;
1272 getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1273 status);
1274 if (U_FAILURE(status)) {
1275 // Shouldn't happen if we have data for all single units
1276 return;
1277 }
1278
1279 // Calculate output gender
1280 if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1281 U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty())(void)0;
1282 UnicodeString uVal;
1283
1284 if (prefix != UMEASURE_PREFIX_ONE) {
1285 singleUnitArray[GENDER_INDEX] =
1286 getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1287 }
1288
1289 if (dimensionality != 1) {
1290 singleUnitArray[GENDER_INDEX] =
1291 getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1292 }
1293
1294 UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1295 if (timesGenderRule.length() == 1) {
1296 switch (timesGenderRule[0]) {
1297 case u'0':
1298 if (singleUnitIndex == 0) {
1299 U_ASSERT(outArray[GENDER_INDEX].isBogus())(void)0;
1300 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1301 }
1302 break;
1303 case u'1':
1304 if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1305 U_ASSERT(outArray[GENDER_INDEX].isBogus())(void)0;
1306 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1307 }
1308 }
1309 } else {
1310 if (outArray[GENDER_INDEX].isBogus()) {
1311 outArray[GENDER_INDEX] = timesGenderRule;
1312 }
1313 }
1314 }
1315
1316 // Calculate resulting patterns for each plural form
1317 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1318 StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1319
1320 // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1321 if (outArray[pluralIndex].isBogus()) {
1322 if (singleUnitArray[pluralIndex].isBogus()) {
1323 // Let the usual plural fallback mechanism take care of this
1324 // plural form
1325 continue;
1326 } else {
1327 // Since our singleUnit can have a plural form that outArray
1328 // doesn't yet have (relying on fallback to OTHER), we start
1329 // by grabbing it with the normal plural fallback mechanism
1330 outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1331 if (U_FAILURE(status)) {
1332 return;
1333 }
1334 }
1335 }
1336
1337 if (uprv_strcmp(singlePluralCategory, ""):: strcmp(singlePluralCategory, "") != 0) {
1338 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1339 }
1340
1341 // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1342 UnicodeString coreUnit;
1343 PlaceholderPosition placeholderPosition;
1344 UChar joinerChar;
1345 extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1346 placeholderPosition, joinerChar);
1347
1348 // 4.7 If the position is middle, then fail
1349 if (placeholderPosition == PH_MIDDLE) {
1350 status = U_UNSUPPORTED_ERROR;
1351 return;
1352 }
1353
1354 // 4.8. If globalPlaceholder is empty
1355 if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1356 globalPlaceholder[pluralIndex] = placeholderPosition;
1357 globalJoinerChar = joinerChar;
1358 } else {
1359 // Expect all units involved to have the same placeholder position
1360 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition)(void)0;
1361 // TODO(icu-units#28): Do we want to add a unit test that checks
1362 // for consistent joiner chars? Probably not, given how
1363 // inconsistent they are. File a CLDR ticket with examples?
1364 }
1365 // Now coreUnit would be just "Meter"
1366
1367 // 4.9. If siPrefixPattern is not empty
1368 if (prefix != UMEASURE_PREFIX_ONE) {
1369 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1370 if (U_FAILURE(status)) {
1371 return;
1372 }
1373
1374 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1375 // coreUnit)
1376 UnicodeString tmp;
1377 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1378 //
1379 // TODO(icu-units#28): run this only if prefixPattern does not
1380 // contain space characters - do languages "as", "bn", "hi",
1381 // "kk", etc have concepts of upper and lower case?:
1382 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1383 coreUnit.toLower(loc);
1384 }
1385 prefixCompiled.format(coreUnit, tmp, status);
1386 if (U_FAILURE(status)) {
1387 return;
1388 }
1389 coreUnit = tmp;
1390 }
1391
1392 // 4.10. If dimensionalityPrefixPattern is not empty
1393 if (dimensionality != 1) {
1394 SimpleFormatter dimensionalityCompiled(
1395 getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1396 if (U_FAILURE(status)) {
1397 return;
1398 }
1399
1400 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1401 // dimensionalityPrefixPattern, coreUnit)
1402 UnicodeString tmp;
1403 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1404 //
1405 // TODO(icu-units#28): run this only if prefixPattern does not
1406 // contain space characters - do languages "as", "bn", "hi",
1407 // "kk", etc have concepts of upper and lower case?:
1408 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1409 coreUnit.toLower(loc);
1410 }
1411 dimensionalityCompiled.format(coreUnit, tmp, status);
1412 if (U_FAILURE(status)) {
1413 return;
1414 }
1415 coreUnit = tmp;
1416 }
1417
1418 if (outArray[pluralIndex].length() == 0) {
1419 // 4.11. If the result is empty, set result to be coreUnit
1420 outArray[pluralIndex] = coreUnit;
1421 } else {
1422 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1423 UnicodeString tmp;
1424 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1425 outArray[pluralIndex] = tmp;
1426 }
1427 }
1428 }
1429 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1430 if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1431 UnicodeString tmp;
1432 tmp.append(u"{0}", 3);
1433 if (globalJoinerChar != 0) {
1434 tmp.append(globalJoinerChar);
1435 }
1436 tmp.append(outArray[pluralIndex]);
1437 outArray[pluralIndex] = tmp;
1438 } else if (globalPlaceholder[pluralIndex] == PH_END) {
1439 if (globalJoinerChar != 0) {
1440 outArray[pluralIndex].append(globalJoinerChar);
1441 }
1442 outArray[pluralIndex].append(u"{0}", 3);
1443 }
1444 }
1445}
1446
1447UnicodeString LongNameHandler::getUnitDisplayName(
1448 const Locale& loc,
1449 const MeasureUnit& unit,
1450 UNumberUnitWidth width,
1451 UErrorCode& status) {
1452 if (U_FAILURE(status)) {
1453 return ICU_Utility::makeBogusString();
1454 }
1455 UnicodeString simpleFormats[ARRAY_LENGTH];
1456 getMeasureData(loc, unit, width, "", simpleFormats, status);
1457 return simpleFormats[DNAM_INDEX];
1458}
1459
1460UnicodeString LongNameHandler::getUnitPattern(
1461 const Locale& loc,
1462 const MeasureUnit& unit,
1463 UNumberUnitWidth width,
1464 StandardPlural::Form pluralForm,
1465 UErrorCode& status) {
1466 if (U_FAILURE(status)) {
1467 return ICU_Utility::makeBogusString();
1468 }
1469 UnicodeString simpleFormats[ARRAY_LENGTH];
1470 getMeasureData(loc, unit, width, "", simpleFormats, status);
1471 // The above already handles fallback from other widths to short
1472 if (U_FAILURE(status)) {
1473 return ICU_Utility::makeBogusString();
1474 }
1475 // Now handle fallback from other plural forms to OTHER
1476 return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1477 simpleFormats[StandardPlural::Form::OTHER];
1478}
1479
1480LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
1481 const PluralRules *rules,
1482 const MicroPropsGenerator *parent,
1483 UErrorCode &status) {
1484 auto* result = new LongNameHandler(rules, parent);
1485 if (result == nullptr) {
1486 status = U_MEMORY_ALLOCATION_ERROR;
1487 return nullptr;
1488 }
1489 UnicodeString simpleFormats[ARRAY_LENGTH];
1490 getCurrencyLongNameData(loc, currency, simpleFormats, status);
1491 if (U_FAILURE(status)) { return nullptr; }
1492 result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1493 // TODO(icu-units#28): currency gender?
1494 return result;
1495}
1496
1497void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1498 UErrorCode &status) {
1499 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1500 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1501 UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1502 if (U_FAILURE(status)) { return; }
1503 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1504 if (U_FAILURE(status)) { return; }
1505 fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1506 }
1507}
1508
1509void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1510 Field field, UErrorCode &status) {
1511 SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1512 if (U_FAILURE(status)) { return; }
1513 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1514 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1515 UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1516 if (U_FAILURE(status)) { return; }
1517 UnicodeString compoundFormat;
1518 if (leadFormat.length() == 0) {
1519 compoundFormat = trailFormat;
1520 } else {
1521 trailCompiled.format(leadFormat, compoundFormat, status);
1522 if (U_FAILURE(status)) { return; }
1523 }
1524 SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1525 if (U_FAILURE(status)) { return; }
1526 fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1527 }
1528}
1529
1530void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1531 UErrorCode &status) const {
1532 if (parent != NULL__null) {
1533 parent->processQuantity(quantity, micros, status);
1534 }
1535 StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1536 micros.modOuter = &fModifiers[pluralForm];
1537 micros.gender = gender;
1538}
1539
1540const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1541 return &fModifiers[plural];
1542}
1543
1544void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1545 const MeasureUnit &mixedUnit,
1546 const UNumberUnitWidth &width,
1547 const char *unitDisplayCase,
1548 const PluralRules *rules,
1549 const MicroPropsGenerator *parent,
1550 MixedUnitLongNameHandler *fillIn,
1551 UErrorCode &status) {
1552 U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED)(void)0;
1553 U_ASSERT(fillIn != nullptr)(void)0;
1554 if (U_FAILURE(status)) {
1555 return;
1556 }
1557
1558 MeasureUnitImpl temp;
1559 const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1560 // Defensive, for production code:
1561 if (impl.complexity != UMEASURE_UNIT_MIXED) {
1562 // Should be using the normal LongNameHandler
1563 status = U_UNSUPPORTED_ERROR;
1564 return;
1565 }
1566
1567 fillIn->fMixedUnitCount = impl.singleUnits.length();
1568 fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1569 for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1570 // Grab data for each of the components.
1571 UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1572 // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this
1573 // propagation of unitDisplayCase is correct:
1574 getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1575 status);
1576 // TODO(ICU-21494): if we add support for gender for mixed units, we may
1577 // need maybeCalculateGender() here.
1578 }
1579
1580 // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1581 // high-magnitude fields
1582 // * for mixed units count N, produce N listFormatters, one for each subset
1583 // that might be formatted.
1584 UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1585 if (width == UNUM_UNIT_WIDTH_NARROW) {
1586 listWidth = ULISTFMT_WIDTH_NARROW;
1587 } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1588 // This might be the same as SHORT in most languages:
1589 listWidth = ULISTFMT_WIDTH_WIDE;
1590 }
1591 fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1592 ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1593 // TODO(ICU-21494): grab gender of each unit, calculate the gender
1594 // associated with this list formatter, save it for later.
1595 fillIn->rules = rules;
1596 fillIn->parent = parent;
1597
1598 // We need a localised NumberFormatter for the numbers of the bigger units
1599 // (providing Arabic numerals, for example).
1600 fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1601}
1602
1603void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1604 UErrorCode &status) const {
1605 U_ASSERT(fMixedUnitCount > 1)(void)0;
1606 if (parent != nullptr) {
1607 parent->processQuantity(quantity, micros, status);
1608 }
1609 micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1610}
1611
1612const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1613 MicroProps &micros,
1614 UErrorCode &status) const {
1615 if (micros.mixedMeasuresCount == 0) {
1616 U_ASSERT(micros.mixedMeasuresCount > 0)(void)0; // Mixed unit: we must have more than one unit value
1617 status = U_UNSUPPORTED_ERROR;
1618 return &micros.helpers.emptyWeakModifier;
1619 }
1620
1621 // Algorithm:
1622 //
1623 // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1624 // find "3 yard" and "1 foot" in micros.mixedMeasures.
1625 //
1626 // Obtain long-names with plural forms corresponding to measure values:
1627 // * {0} yards, {0} foot, {0} inches
1628 //
1629 // Format the integer values appropriately and modify with the format
1630 // strings:
1631 // - 3 yards, 1 foot
1632 //
1633 // Use ListFormatter to combine, with one placeholder:
1634 // - 3 yards, 1 foot and {0} inches
1635 //
1636 // Return a SimpleModifier for this pattern, letting the rest of the
1637 // pipeline take care of the remaining inches.
1638
1639 LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1640 if (U_FAILURE(status)) {
1641 return &micros.helpers.emptyWeakModifier;
1642 }
1643
1644 StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1645 for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1646 DecimalQuantity fdec;
1647
1648 // If numbers are negative, only the first number needs to have its
1649 // negative sign formatted.
1650 int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1651
1652 if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1653 // If quantity is not the first value and quantity is negative
1654 if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1655 quantity.negate();
1656 }
1657
1658 StandardPlural::Form quantityPlural =
1659 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1660 UnicodeString quantityFormatWithPlural =
1661 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1662 SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1663 quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1664 } else {
1665 fdec.setToLong(number);
1666 StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1667 UnicodeString simpleFormat =
1668 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1669 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1670 UnicodeString num;
1671 auto appendable = UnicodeStringAppendable(num);
1672
1673 fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1674 compiledFormatter.format(num, outputMeasuresList[i], status);
1675 }
1676 }
1677
1678 // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1679 // can set micros.gender to the gender associated with the list formatter in
1680 // use below (once we have correct support for that). And then document this
1681 // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1682 // something like this.
1683
1684 // Combine list into a "premixed" pattern
1685 UnicodeString premixedFormatPattern;
1686 fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1687 status);
1688 SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1689 if (U_FAILURE(status)) {
1690 return &micros.helpers.emptyWeakModifier;
1691 }
1692
1693 micros.helpers.mixedUnitModifier =
1694 SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1695 return &micros.helpers.mixedUnitModifier;
1696}
1697
1698const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1699 StandardPlural::Form /*plural*/) const {
1700 // TODO(icu-units#28): investigate this method when investigating where
1701 // ModifierStore::getModifier() gets used. To be sure it remains
1702 // unreachable:
1703 UPRV_UNREACHABLE_EXITabort();
1704 return nullptr;
1705}
1706
1707LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1708 const MaybeStackVector<MeasureUnit> &units,
1709 const UNumberUnitWidth &width,
1710 const char *unitDisplayCase,
1711 const PluralRules *rules,
1712 const MicroPropsGenerator *parent,
1713 UErrorCode &status) {
1714 LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1715 if (U_FAILURE(status)) {
1716 return nullptr;
1717 }
1718 U_ASSERT(units.length() > 0)(void)0;
1719 if (result->fHandlers.resize(units.length()) == nullptr) {
1720 status = U_MEMORY_ALLOCATION_ERROR;
1721 return nullptr;
1722 }
1723 result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1724 for (int32_t i = 0, length = units.length(); i < length; i++) {
1725 const MeasureUnit &unit = *units[i];
1726 result->fMeasureUnits[i] = unit;
1727 if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1728 MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1729 MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL__null,
1730 mlnh, status);
1731 result->fHandlers[i] = mlnh;
1732 } else {
1733 LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1734 LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL__null, lnh, status);
1735 result->fHandlers[i] = lnh;
1736 }
1737 if (U_FAILURE(status)) {
1738 return nullptr;
1739 }
1740 }
1741 return result.orphan();
1742}
1743
1744void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1745 UErrorCode &status) const {
1746 // We call parent->processQuantity() from the Multiplexer, instead of
1747 // letting LongNameHandler handle it: we don't know which LongNameHandler to
1748 // call until we've called the parent!
1749 fParent->processQuantity(quantity, micros, status);
1750
1751 // Call the correct LongNameHandler based on outputUnit
1752 for (int i = 0; i < fHandlers.getCapacity(); i++) {
1753 if (fMeasureUnits[i] == micros.outputUnit) {
1754 fHandlers[i]->processQuantity(quantity, micros, status);
1755 return;
1756 }
1757 }
1758 if (U_FAILURE(status)) {
1759 return;
1760 }
1761 // We shouldn't receive any outputUnit for which we haven't already got a
1762 // LongNameHandler:
1763 status = U_INTERNAL_PROGRAM_ERROR;
1764}
1765
1766#endif /* #if !UCONFIG_NO_FORMATTING */