Loading...
Searching...
No Matches
unistring.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2003-2023, John Wiegley. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * - Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * - Neither the name of New Artisans LLC nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
42#pragma once
43
44namespace ledger {
45
46int mk_wcwidth(boost::uint32_t ucs);
47
57{
58public:
59 static const std::size_t npos = static_cast<std::size_t>(-1);
60
61 std::vector<boost::uint32_t> utf32chars;
62
65 }
66 unistring(const std::string& input)
67 {
68 const char * p = input.c_str();
69 std::size_t len = input.length();
70
71 // This size should be at least as large as MAX_LINE in context.h
72 assert(len < 4096);
73 VERIFY(utf8::is_valid(p, p + len));
74 utf8::unchecked::utf8to32(p, p + len, std::back_inserter(utf32chars));
75
76 TRACE_CTOR(unistring, "std::string");
77 }
81
82 std::size_t length() const {
83 return utf32chars.size();
84 }
85
86 std::size_t width() const {
87 std::size_t width = 0;
88 foreach (const boost::uint32_t& ch, utf32chars) {
89 width += mk_wcwidth(ch);
90 }
91 return width;
92 }
93
94 std::string extract(const std::string::size_type begin = 0,
95 const std::string::size_type len = 0) const
96 {
97 std::string utf8result;
98 std::string::size_type this_len = length();
99
100 assert(begin <= this_len);
101 assert(begin + len <= this_len);
102
103 if (this_len)
104 utf8::unchecked::utf32to8
105 (utf32chars.begin() + static_cast<std::string::difference_type>(begin),
106 utf32chars.begin() + static_cast<std::string::difference_type>(begin) +
107 static_cast<std::string::difference_type>
108 (len ? (len > this_len ? this_len : len) : this_len),
109 std::back_inserter(utf8result));
110
111 return utf8result;
112 }
113
114 std::string extract_by_width(std::string::size_type begin,
115 std::size_t len) const
116 {
117 std::string utf8result;
118 std::size_t this_width = width();
119 std::string::size_type this_len = length();
120
121 assert(begin <= this_width);
122 if (begin + len > this_width)
123 len = this_width - begin;
124
125 std::size_t pos = 0;
126 std::size_t begin_idx = 0, end_idx = 0;
127 std::size_t head = 0, tail = 0;
128 for (std::size_t idx = 0; idx < this_len; ++idx) {
129 std::size_t w = mk_wcwidth(utf32chars[idx]);
130
131 if (pos < begin) {
132 if (pos + w >= begin) {
133 head = std::min(pos + w, begin + len) - begin;
134 begin_idx = idx + 1;
135 }
136 } else if (pos < begin + len) {
137 if (pos + w > begin + len) {
138 tail = begin + len - pos;
139 end_idx = idx;
140 }
141 if (pos + w == begin + len) {
142 tail = 0;
143 end_idx = idx + 1;
144 }
145 }
146 pos += w;
147 }
148
149 utf8result += std::string(head, '.');
150
151 if (begin_idx < end_idx)
152 utf8::unchecked::utf32to8
153 (utf32chars.begin() + static_cast<std::string::difference_type>(begin_idx),
154 utf32chars.begin() + static_cast<std::string::difference_type>(end_idx),
155 std::back_inserter(utf8result));
156
157 utf8result += std::string(tail, '.');
158
159 return utf8result;
160 }
161
162 std::size_t find(const boost::uint32_t __s, std::size_t __pos = 0) const {
163 std::size_t idx = 0;
164 foreach (const boost::uint32_t& ch, utf32chars) {
165 if (idx >= __pos && ch == __s)
166 return idx;
167 idx++;
168 }
169 return npos;
170 }
171
172 boost::uint32_t& operator[](const std::size_t index) {
173 return utf32chars[index];
174 }
175 const boost::uint32_t& operator[](const std::size_t index) const {
176 return utf32chars[index];
177 }
178};
179
180inline void justify(std::ostream& out,
181 const std::string& str,
182 int width,
183 bool right = false,
184 bool redden = false)
185{
186 if (! right) {
187 if (redden) out << "\033[31m";
188 out << str;
189 if (redden) out << "\033[0m";
190 }
191
192 unistring temp(str);
193
194 int spacing = width - int(temp.width());
195 while (spacing-- > 0)
196 out << ' ';
197
198 if (right) {
199 if (redden) out << "\033[31m";
200 out << str;
201 if (redden) out << "\033[0m";
202 }
203}
204
205} // namespace ledger
#define VERIFY(x)
Definition utils.h:141
#define TRACE_DTOR(cls)
Definition utils.h:144
#define TRACE_CTOR(cls, args)
Definition utils.h:143
#define assert(x)
Definition utils.h:92
void justify(std::ostream &out, const std::string &str, int width, bool right=false, bool redden=false)
Definition unistring.h:180
int mk_wcwidth(boost::uint32_t ucs)
T & downcast(U &object)
Definition utils.h:468
Abstract working with UTF-32 encoded Unicode strings.
Definition unistring.h:57
std::size_t width() const
Definition unistring.h:86
std::size_t length() const
Definition unistring.h:82
std::string extract(const std::string::size_type begin=0, const std::string::size_type len=0) const
Definition unistring.h:94
std::size_t find(const boost::uint32_t __s, std::size_t __pos=0) const
Definition unistring.h:162
static const std::size_t npos
Definition unistring.h:59
const boost::uint32_t & operator[](const std::size_t index) const
Definition unistring.h:175
std::string extract_by_width(std::string::size_type begin, std::size_t len) const
Definition unistring.h:114
std::vector< boost::uint32_t > utf32chars
Definition unistring.h:61
unistring(const std::string &input)
Definition unistring.h:66
boost::uint32_t & operator[](const std::size_t index)
Definition unistring.h:172