Standard C/C++

A C locale provides low-level information on how to format monetary amounts. The C++ locale facet moneypunct supplies the same information at a somewhat higher level of abstraction.


March 01, 1998
URL:http://www.drdobbs.com/standard-cc/184403475

March 1998/Standard C/C++/Listing 1

Listing 1: Public view of template class moneypunct

                // STRUCT money_base
struct money_base : public locale::facet {
        enum part {symbol = '$', sign = '+', space = ' ',
                value = 'v', none = 'x'};
        struct pattern {
                char field[4];
                };
        money_base(size_t refs = 0);
        };
template<class E, class Intl>
        class moneypunct : public money_base {
public:
        typedef E char_type;
        typedef basic_string<E, char_traits<E>, allocator<E> >
                string_type;
        E decimal_point() const;
        E thousands_sep() const;
        string grouping() const;
        string_type curr_symbol() const;
        string_type positive_sign() const;
        string_type negative_sign() const;
        int frac_digits() const;
        pattern pos_format() const;
        pattern neg_format() const;
        explicit money_punct(size_t refs, bool intl);
protected:
        virtual E do_decimal_point() const;
        virtual E do_thousands_sep() const;
        virtual string do_grouping() const;
        virtual string_type do_curr_symbol() const;
        virtual string_type do_positive_sign() const;
        virtual string_type do_negative_sign() const;
        virtual int do_frac_digits() const;
        virtual pattern do_pos_format() const;
        virtual pattern do_neg_format() const;
        };
//End of File

March 1998/Standard C/C++/Listing 2

Listing 2: Template class _Mpunct

               // TEMPLATE CLASS _Mpunct
template<class _E>
        class _Mpunct : public money_base {
public:
        typedef _E char_type;
        typedef basic_string<_E, char_traits<_E>, allocator<_E> >
                string_type;
        _E decimal_point() const
                {return (do_decimal_point()); }
        _E thousands_sep() const
                {return (do_thousands_sep()); }
        string grouping() const
                {return (do_grouping()); }
        string_type curr_symbol() const
                {return (do_curr_symbol()); }
        string_type positive_sign() const
                {return (do_positive_sign()); }
        string_type negative_sign() const
                {return (do_negative_sign()); }
        int frac_digits() const
                {return (do_frac_digits()); }
        pattern pos_format() const
                {return (do_pos_format()); }
        pattern neg_format() const
                {return (do_neg_format()); }
        explicit _Mpunct(size_t _R, bool _Intl)
                : money_base(_R), _Ifl(_Intl) {_Init(); }
protected:
        virtual ~_Mpunct()
                {delete[] _Mgr;
                delete[] _Mcs;
                delete[] _Mps;
                delete[] _Mns; }
        virtual _E do_decimal_point() const
                {return (_Mdp); }
        virtual _E do_thousands_sep() const
                {return (_Mks); }
        virtual string do_grouping() const
                {return (string(_Mgr)); }
        virtual string_type do_curr_symbol() const
                {return (string_type(_Mcs)); }
        virtual string_type do_positive_sign() const
                {return (string_type(_Mps)); }
        virtual string_type do_negative_sign() const
                {return (string_type(_Mns)); }
        virtual int do_frac_digits() const
                {return (_Mfd); }
        virtual pattern do_pos_format() const
                {return (_Mpf); }
        virtual pattern do_neg_format() const
                {return (_Mnf); }
private:
        void _Init()
                {const lconv *_P = localeconv();        // SIMPLIFIED
                _Mdp = _WIDEN(_E, _P->mon_decimal_point[0]);
                _Mks = _WIDEN(_E, _P->mon_thousands_sep[0]);
                _Mgr = _MAKLOCSTR(char, _P->mon_grouping);
                _Mcs = _MAKLOCSTR(_E, _Ifl ? _P->int_curr_symbol
                        : _P->currency_symbol);
                _Mps = _MAKLOCSTR(_E, 4 < (unsigned int)_P->p_sign_posn
                        ? "" : _P->positive_sign);
                _Mns = _MAKLOCSTR(_E, 4 < (unsigned int)_P->n_sign_posn
                        ? "-" : _P->negative_sign);
                _Mfd = _Ifl ? _P->int_frac_digits
                        : _P->frac_digits;
                if (_Mfd < 0 || CHAR_MAX <= _Mfd)
                        _Mfd = 0;
                _Makpat(_Mpf, _P->p_sep_by_space,
                        _P->p_cs_precedes, _P->p_sign_posn);
                _Makpat(_Mnf, _P->n_sep_by_space,
                        _P->n_cs_precedes, _P->n_sign_posn); }
        void _Makpat(pattern& _Pat, char _Sep, char _Pre, char _Pos)
                {const char *_S = _Ifl || (_Sep & ~1) != 0
                        || (_Pre & ~1) != 0 || 4 < (unsigned int)_Pos
                        ? "$+vx" : "+v$x" "+v$x" "v$+x" "v+$x" "v$+x"
                                "+$vx" "+$vx" "$v+x" "+$vx" "$+vx"
                                "+v $" "+v $" "v $+" "v+ $" "v $+"
                                "+$ v" "+$ v" "$ v+" "+$ v" "$ +v" + (_Pos
                                + (_Pre == 1 ? 20 : 0) + (_Sep == 1 ? 40 : 0));
                memcpy(_Pat.field, _S, 4); }
        char *_Mgr;
        _E _Mdp, _Mks, *_Mcs, *_Mps, *_Mns;
        int _Mfd;
        pattern _Mpf, _Mnf;
        bool _Ifl;
        };
//End of File

March 1998/Standard C/C++/Listing 3

Listing 3: Template class moneypunct and friends

               // TEMPLATE CLASS moneypunct
template<class _E, bool _Intl = false>
        class moneypunct : public _Mpunct<_E> {
public:
        static const bool intl;
        static locale::id id;
        explicit moneypunct(size_t _R = 0)
                : _Mpunct<_E>(_R, _Intl) {}
        };
template<class _E, bool _Intl>
        const bool moneypunct<_E, _Intl>::intl = _Intl;
template<class _E, bool _Intl>
        locale::id moneypunct<_E, _Intl>::id;
                // TEMPLATE CLASS moneypunct_byname
template<class _E, bool _Intl = false>
        class moneypunct_byname : public moneypunct<_E, _Intl> {
public:
        explicit moneypunct_byname(const char *_S, size_t _R = 0);
protected:
        virtual ~moneypunct_byname();
        };

//End of File

March 1998/Standard C/C++

Standard C/C++: The Facet moneypunct

P. J. Plauger

A C locale provides low-level information on how to format monetary amounts. The C++ locale facet moneypunct supplies the same information at a somewhat higher level of abstraction.


Introduction

Internationalization of software is an open-ended topic. Some things you have to worry about are fairly obvious. We English speakers are quite comfortable with our 26 letters, in two cases. But practically all other languages need additional characters, accent marks, and more elaborate rules than you can imagine for shifting between cases.

Maybe you know that most Europeans write a comma where we use a dot as a decimal point. Even venerable COBOL was prepared for that particular cultural adaptation. But you might not know that 1,000,000 might be written quite differently — with different "thousands"' separators and different groupings of digits — elsewhere in the world.

One of the few real inventions added during the standardization of C was the concept of a locale. A locale is a collection of information peculiar to some culture. It is not necessarily (just) language oriented — accountants have a different culture from engineers, in several important ways. In any event, locales were added to C to capture at least some aspects of cultural dependency that may be important in adapting a computer program distributed in an international market.

(The idea is that you internationalize a program by removing all the cultural dependencies you can. The remaining dependencies you parameterize. A locale tells you what parameter values to use for a given culture. So you localize the program by binding it to a specific locale.)

The locale machinery in the Standard C library is quite simple. You can alter the default "C" locale by calling the function setlocale, declared in <locale.h>. (The names of locales are not standardized, any more than the names of valid files are standardized.) A successful change of locale can alter the character used for a decimal point in the scan and print functions. It can also alter the set of space characters tested for in several library functions.

Within the Standard C library, the only other effect of a change in locale is on the contents of an object of type struct lconv returned by the function localeconv, also declared in <locale.h>. It contains information that is largely advisory — it affects the program only if the code explicitly tests the contents of the returned object. For example, the object tells you all sorts of interesting things about how to display monetary amounts. What you, as programmer, do with this information is up to you.

For the past several months, I've been describing some of the machinery in the Standard C++ library that is based on the locales of the C Standard. C has one locale that's global to a program. C++ lets you construct any number of locale objects, each of which can encapsulate a distinct locale. C has locale categories that describe different aspects of a locale. C++ locale objects store references to any number of objects called locale facets, each of which carries out some locale-specific set of duties when you call upon its member functions. (See "Standard C/C++: Introduction to Locales," CUJ, October 1997.)

The facets I've described so far affect the behavior of other library classes:

I now shift to the more exotic facets. These are template classes supplied as part of the Standard C++ library. Each has two or more specializations defined in the library, typically for sequences with elements of type char and wchar_t. Practically every locale object contains references to objects of these specialized types. But the library does not otherwise make any use of these locale facets.

Just so you know where I'm going, here is a list of the facets I classify as exotic. I list only the char specializations. An analogous wchar_t specialization accompanies each of these in every locale object:

moneypunct<char, false>
moneypunct<char, true>
money_put<char,
        ostreambuf_iterator<char,
                char_traits<char> > >
money_get<char,
        istreambuf_iterator<char>,
                char_traits<char>  >
time_put<char,
        ostreambuf_iterator<char,
                char_traits<char> > >
time_get<char,
        istreambuf_iterator<char>,
                char_traits<char>  >
collate<char>
messages<char>

The topic for this month is the first two facets in the list. You use them to obtain information on how convert a monetary amount to a text representation, or vice versa, by culture-specific rules.

Locale Category LC_MONETARY

To begin with, it helps to know what a C locale tells you about formatting monetary amounts. The C Standards committee X3J11 based its design heavily on recommendations from IBM Corporation. They provided a detailed study of how all the various cultures around the world display such amounts. (Well, at least they studied all the potential customers for IBM computers, I imagine.) The results show an astonishing variety, which is not easily captured in just a handful of parameters. But we tried anyway.

Here are the fields of the structure lconv that are affected by the locale category LC_MONETARY. The comment following each field shows its default value for the "C" locale:

char *currency_symbol;    // ""
char *int_curr_symbol;    // ""
char *mon_decimal_point;  // ""
char *mon_grouping;       // ""
char *mon_thousands_sep;  // ""
char *negative_sign;      // ""
char *positive_sign;      // ""
char frac_digits;         // CHAR_MAX
char int_frac_digits;     // CHAR_MAX
char n_cs_precedes;       // CHAR_MAX
char n_sep_by_space;      // CHAR_MAX
char n_sign_posn;         // CHAR_MAX
char p_cs_precedes;       // CHAR_MAX
char p_sep_by_space;      // CHAR_MAX
char p_sign_posn;         // CHAR_MAX

Members of type pointer to char all point to null-terminated strings. Members of type char all have nonnegative values. A value of CHAR_MAX indicates that a meaningful value is not available in the current locale.

A brief description of each of these members follows, with an example in parentheses that would be suitable for a USA locale:

currency_symbol — the local currency symbol ("$")

int_curr_symbol — the international currency symbol specified by ISO 4217 ("USD")

mon_decimal_point — the decimal point for monetary values (".")

mon_grouping — the sizes of digit groups for monetary values. Successive elements of the string describe groups going away from the decimal point:

Thus, the array {3, 2, CHAR_MAX} calls for a group of three digits, then two, then whatever remains, as in 9876,54,321, while "\3" calls for repeated groups of three digits, as in 987,654,321. ("\3")

mon_thousands_sep — the separator for digit groups to the left of the decimal point for monetary values (",")

negative_sign — the negative sign for monetary values ("-")

positive_sign — the positive sign for monetary values ("")

frac_digits — the number of digits to display to the right of the decimal point for monetary values (2)

int_frac_digits — the number of digits to display to the right of the decimal point for international monetary values (2)

n_cs_precedes — whether the currency symbol precedes or follows the value for negative monetary values:

n_sep_by_space — whether the currency symbol is separated by a space or by no space from the value for negative monetary values:

n_sign_posn — the format for negative monetary values:

p_cs_precedes — whether the currency symbol precedes or follows the value for positive monetary values:

p_sep_by_space — whether the currency symbol is separated by a space or by no space from the value for positive monetary values:

p_sign_posn — the format for positive monetary values:

The good news is that these parameters describe a broad range of monetary formats — all the ones in the world, if you believe IBM's report. The bad news is that they describe formats at a very low level.

Many years ago, I wrote a function called _Fmtval that applied all this information to the business of converting a monetary amount to text. (See my book, The Standard C Library, Prentice-Hall, 1992 for a listing of this function.) The function was messy and hard to read. The published version even contained an error or two. But it basically did the job. And it really underscored the need to view monetary formatting at a higher level than is provided by C locales.

Template Class moneypunct

In the world of the Standard C++ library, that same messy monetary formatting job is divvied up among two locale facets. Template class moneypunct delivers up much the same information as is stored in an lconv object, but in a slightly more structured form. Template class money_put converts a monetary amount to text, using the formatting information it obtains from a companion moneypunct facet. And as you might guess, template class money_get performs a related task. It converts a text sequence to a monetary amount, also using the formatting information it obtains from a companion moneypunct facet.

Listing 1 shows just the public interface for template class moneypunct, to give you an idea of what sorts of information it supplies. It derives from class money_base, which merely defines the member type pattern and some enumeration constants. The names of the member functions are pretty self explanatory. As you might guess, a pattern object provides a kind of format string that summarizes several of the low-level parameters from the C locale.

Every locale object should store references to the facets moneypunct<char, false>, moneypunct<char, true>, moneypunct<wchar_t, false>, and moneypunct<wchar_t, true>, for use by the char and wchar_t specializations of money_get and money_put.

I've spent the past two months reviewing a triad similar to moneypunct, money_get, and money_put — for converting between numeric values and text. To convert an integer to a sequence of char elements, for example, you call one of the member functions named put in class num_put<char, ostreambuf_iterator<char, char_traits<char> > >. That member function eventually calls a function that obtains a reference to the corresponding facet numpunct<char>, as in:

const numpunct<char>& fac =
        use_facet<numpunct<char> >(loc);

The function can then call, say, fac.decimal_point() to determine the decimal point appropriate for the locale object loc. You will find similar code in member functions within class num_get<char, istreambuf_iterator<char, char_traits<char> > >.

The monetary facets behave much the same way, but with an important difference. Take a closer look at Listing 1. For reasons that probably aren't very good, template class moneypunct has two parameters, not just one. The full set of numeric and monetary facet templates thus reads:

template<class E> class num_get;
template<class E> class num_put;
template<class E> class numpunct;
template<class E> class money_get;
template<class E> class money_put;
template<class E, bool Intl> class moneypunct;

In all cases, the type parameter E determines the "element," or character, type. Template class moneypunct alone has an added Boolean parameter, which is true to specify an international format for monetary amounts (as in USD 1730), or false to specify the format presumably favored by the locals (as in $1,730.00). This presents an interesting coding problem.

How does, say, a money_put facet decide which flavor of moneypunct facet to select? Well, that turns out to be one of the arguments you supply when you call one of its put member functions. Depending on the value of this argument, you want to obtain a reference either to a moneypunct<E, false> object or a moneypunct<E, true> object. But these are different types. You have to write something like:

const moneypunct<char, false>& fac0 =
        use_facet<moneypunct<char, false> >(loc);
const moneypunct<char, true>& fac1 =
        use_facet<moneypunct<char, true> >(loc);

The function then must evaluate an expression such as:

intl ? fac1.decimal_point() : fac0.decimal_point()

each time it needs to determine the decimal point appropriate for the locale object loc. If such expressions occurred in just one or two places, the problem wouldn't be so bad, but in fact the code for money_get and money_put contains numerous calls to moneypunct member functions. Yuk.

I avoided this problem by defining a common base class for the two related flavors of moneypunct. Listing 2 shows template class _Mpunct, which implements the code common to moneypunct<E, false> and moneypunct<E, true>. The Boolean "international" flag is stored as a member object when an object of class _Mpunct<E> is constructed. This permits the code above to be rewritten as:

const _Mpunct<char>& fac = intl
        ? use_facet<moneypunct<char, true> >(loc)
        : use_facet<moneypunct<char, false> >(loc);

You can then determine the appropriate decimal point by calling fac.decimal_point(), as with the numeric facets.

As with the facets I've discussed earlier, initializing an object of class moneypunct<_E> requires some amount of implementation-specific magic. I sidestep much of that here, but I do show a bit more of the process than I have for other facets. In particular, I sketch out how the stored parameter values might be initialized from the C locale machinery.

C locales deal purely with elements of type char. The Standard C++ library must also supply wchar_t versions of the same information. Template class moneypunct must be prepared to convert char elements to elements of some fairly arbitrary type E. Thus, you will find a bit of magic code within the member function _Init:

_Init calls the member function _Makpat to convert several of the parameter values returned by localeconv to a pattern object. The conditional expression that does all the work intentionally favors speed and compactness over readability. I make no apologies for it.

Listing 3 shows template class moneypunct and its related templates. All the complexity is captured in the base class _Mpunct<E>, so there is little left to supply.

Listing 3 also shows the template class moneypunct_byname. It provides a way to create a moneypunct<_E> object whose behavior is consistent with a locale whose name you know. It joins the (incomplete) ranks of similar template classes, such as ctype_byname and numpunct_byname. You use them to handcraft a locale object that captures at least some of the properties of a named locale you presumably wish to make use of.

Coming Attractions

Template class moneypunct is not terribly exciting in its own right. Its primary duty, as I indicated above, is to supply information for use by the template classes money_put and money_get. The real action starts next month. o

P.J. Plauger is Senior Editor of C/C++ Users Journal and President of Dinkumware, Ltd. He is the author of the Standard C++ Library shipped with Microsoft's Visual C++, v5.0. For eight years, he served as convener of the ISO C standards committee, WG14. He remains active on the C++ committee, J16. His latest books are The Draft Standard C++ Library, Programming on Purpose (three volumes), and Standard C (with Jim Brodie), all published by Prentice-Hall. You can reach him at [email protected].

Terms of Service | Privacy Statement | Copyright © 2024 UBM Tech, All rights reserved.