| [1657] | 1 | #include <osgText/String> |
|---|
| 2 | |
|---|
| [3230] | 3 | #include <osg/Notify> |
|---|
| [1657] | 4 | #include <osg/Math> |
|---|
| 5 | |
|---|
| 6 | using namespace osgText; |
|---|
| 7 | |
|---|
| 8 | |
|---|
| 9 | |
|---|
| [7648] | 10 | |
|---|
| [1657] | 11 | |
|---|
| 12 | struct look_ahead_iterator |
|---|
| 13 | { |
|---|
| 14 | look_ahead_iterator(const std::string& string): |
|---|
| 15 | _string(string), |
|---|
| 16 | _index(0), |
|---|
| 17 | _nullCharacter(0) {} |
|---|
| 18 | |
|---|
| 19 | bool valid() const { return _index<_string.length(); } |
|---|
| 20 | |
|---|
| 21 | look_ahead_iterator& operator ++ () |
|---|
| 22 | { |
|---|
| 23 | if (_index<_string.length()) ++_index; |
|---|
| 24 | return *this; |
|---|
| 25 | } |
|---|
| 26 | |
|---|
| 27 | look_ahead_iterator operator ++ (int) |
|---|
| 28 | { |
|---|
| 29 | look_ahead_iterator tmp(*this); |
|---|
| 30 | if (_index<_string.length()) ++_index; |
|---|
| 31 | return tmp; |
|---|
| 32 | } |
|---|
| 33 | |
|---|
| 34 | look_ahead_iterator& operator += (int offset) |
|---|
| 35 | { |
|---|
| [1680] | 36 | if (_index<_string.length()) _index = osg::minimum((unsigned int)(_index+offset),(unsigned int)_string.length()); |
|---|
| [1657] | 37 | return *this; |
|---|
| 38 | } |
|---|
| 39 | |
|---|
| 40 | unsigned char operator * () const |
|---|
| 41 | { |
|---|
| 42 | if (_index<_string.length()) return _string[_index]; |
|---|
| 43 | else return _nullCharacter; |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | unsigned char operator [] (unsigned int offset) const |
|---|
| 47 | { |
|---|
| 48 | |
|---|
| 49 | if (_index+offset<_string.length()) return _string[_index+offset]; |
|---|
| 50 | else return _nullCharacter; |
|---|
| 51 | } |
|---|
| 52 | |
|---|
| 53 | |
|---|
| 54 | const std::string& _string; |
|---|
| 55 | unsigned int _index; |
|---|
| 56 | unsigned char _nullCharacter; |
|---|
| [9630] | 57 | |
|---|
| 58 | protected: |
|---|
| 59 | |
|---|
| 60 | look_ahead_iterator& operator = (const look_ahead_iterator&) { return *this; } |
|---|
| [1657] | 61 | }; |
|---|
| 62 | |
|---|
| 63 | String::Encoding findEncoding(look_ahead_iterator& charString,String::Encoding overrideEncoding) |
|---|
| 64 | { |
|---|
| 65 | switch (charString[0]) |
|---|
| 66 | { |
|---|
| 67 | case 0xEF: |
|---|
| 68 | { |
|---|
| 69 | |
|---|
| 70 | if ((charString[1]==0xBB) && (charString[2]==0xBF)) |
|---|
| 71 | { |
|---|
| 72 | charString+=3; |
|---|
| 73 | return String::ENCODING_UTF8; |
|---|
| 74 | } |
|---|
| 75 | break; |
|---|
| 76 | } |
|---|
| 77 | case 0xFE: |
|---|
| 78 | { |
|---|
| 79 | |
|---|
| 80 | if (charString[1]==0xFF) |
|---|
| 81 | { |
|---|
| 82 | charString+=2; |
|---|
| 83 | return String::ENCODING_UTF16_BE; |
|---|
| 84 | } |
|---|
| 85 | break; |
|---|
| 86 | } |
|---|
| 87 | case 0xFF: |
|---|
| 88 | { |
|---|
| 89 | |
|---|
| 90 | |
|---|
| 91 | if (charString[1]==0xFE) |
|---|
| 92 | { |
|---|
| 93 | |
|---|
| 94 | |
|---|
| 95 | if ((charString[2]==0) && (charString[3]==0) && (overrideEncoding != String::ENCODING_UTF16)) |
|---|
| 96 | { |
|---|
| 97 | charString+=4; |
|---|
| 98 | return String::ENCODING_UTF32_LE; |
|---|
| 99 | } |
|---|
| 100 | else |
|---|
| 101 | { |
|---|
| 102 | charString+=2; |
|---|
| 103 | return String::ENCODING_UTF16_LE; |
|---|
| 104 | } |
|---|
| 105 | } |
|---|
| 106 | break; |
|---|
| 107 | } |
|---|
| 108 | case 0x00: |
|---|
| 109 | { |
|---|
| 110 | |
|---|
| 111 | if ((charString[1]==0x00) && (charString[2]==0xFE) && (charString[3]==0xFF)) |
|---|
| 112 | { |
|---|
| 113 | charString+=4; |
|---|
| 114 | return String::ENCODING_UTF32_BE; |
|---|
| 115 | } |
|---|
| 116 | break; |
|---|
| 117 | } |
|---|
| 118 | } |
|---|
| 119 | return String::ENCODING_ASCII; |
|---|
| 120 | } |
|---|
| 121 | |
|---|
| 122 | |
|---|
| 123 | unsigned int getNextCharacter(look_ahead_iterator& charString,String::Encoding encoding) |
|---|
| 124 | { |
|---|
| 125 | |
|---|
| 126 | |
|---|
| 127 | switch(encoding) |
|---|
| 128 | { |
|---|
| 129 | case String::ENCODING_ASCII: |
|---|
| 130 | { |
|---|
| 131 | return *charString++; |
|---|
| 132 | } |
|---|
| 133 | case String::ENCODING_UTF8: |
|---|
| 134 | { |
|---|
| 135 | int char0 = *charString++; |
|---|
| 136 | if (char0 < 0x80) |
|---|
| 137 | { |
|---|
| 138 | return char0; |
|---|
| 139 | } |
|---|
| 140 | int char1 = *charString++; |
|---|
| 141 | if (char0<0xe0) |
|---|
| 142 | { |
|---|
| 143 | return ((char0&0x1f)<<6) | (char1&0x3f); |
|---|
| 144 | } |
|---|
| 145 | int char2 = *charString++; |
|---|
| 146 | if (char0<0xf0) |
|---|
| 147 | { |
|---|
| 148 | return ((char0&0xf)<<12) | ((char1&0x3f)<<6) | (char2&0x3f); |
|---|
| 149 | } |
|---|
| 150 | int char3 = *charString++; |
|---|
| 151 | if (char0<0xf8) |
|---|
| 152 | { |
|---|
| 153 | return ((char0&0x7)<<18) | ((char1&0x3f)<<12) | ((char2&0x3f)<<6) | (char3&0x3f); |
|---|
| 154 | } |
|---|
| 155 | break; |
|---|
| 156 | } |
|---|
| 157 | case String::ENCODING_UTF16_BE: |
|---|
| 158 | { |
|---|
| 159 | int char0 = *charString++; |
|---|
| 160 | int char1 = *charString++; |
|---|
| 161 | if ((char0<=0xD7) || (char0>=0xE0)) |
|---|
| 162 | { |
|---|
| 163 | return (char0<<8) | char1; |
|---|
| 164 | } |
|---|
| 165 | else if ((char0>=0xD8)&&(char0<=0xDB)) |
|---|
| 166 | { |
|---|
| 167 | int char2 = *charString++; |
|---|
| 168 | int char3 = *charString++; |
|---|
| 169 | int highSurrogate = (char0<<8) | char1; |
|---|
| 170 | int lowSurrogate = (char2<<8) | char3; |
|---|
| 171 | if ((char2>=0xDC)&&(char2<=0xDF)) |
|---|
| 172 | { |
|---|
| 173 | |
|---|
| 174 | return ((highSurrogate-0xD800)*0x400) + (lowSurrogate-0xD800) + 0x10000; |
|---|
| 175 | } |
|---|
| 176 | } |
|---|
| 177 | break; |
|---|
| 178 | } |
|---|
| 179 | case String::ENCODING_UTF16_LE: |
|---|
| 180 | { |
|---|
| 181 | int char1 = *charString++; |
|---|
| 182 | int char0 = *charString++; |
|---|
| 183 | if ((char0<=0xD7) || (char0>=0xE0)) |
|---|
| 184 | { |
|---|
| 185 | return (char0<<8) | char1; |
|---|
| 186 | } |
|---|
| 187 | else if ((char0>=0xD8)&&(char0<=0xDB)) |
|---|
| 188 | { |
|---|
| 189 | int char3 = *charString++; |
|---|
| 190 | int char2 = *charString++; |
|---|
| 191 | int highSurrogate = (char0<<8) | char1; |
|---|
| 192 | int lowSurrogate = (char2<<8) | char3; |
|---|
| 193 | if ((char2>=0xDC)&&(char2<=0xDF)) |
|---|
| 194 | { |
|---|
| 195 | |
|---|
| 196 | return ((highSurrogate-0xD800)*0x400) + (lowSurrogate-0xD800) + 0x10000; |
|---|
| 197 | } |
|---|
| 198 | } |
|---|
| 199 | break; |
|---|
| 200 | } |
|---|
| 201 | case String::ENCODING_UTF32_BE: |
|---|
| 202 | { |
|---|
| 203 | int character = ((((int)charString[0])<<24) | (((int)charString[1])<<16) | |
|---|
| 204 | (((int)charString[2])<<8) | charString[3]); |
|---|
| 205 | charString+=4; |
|---|
| 206 | if (character<0x110000) |
|---|
| 207 | { |
|---|
| 208 | |
|---|
| 209 | return character; |
|---|
| 210 | } |
|---|
| 211 | break; |
|---|
| 212 | } |
|---|
| 213 | case String::ENCODING_UTF32_LE: |
|---|
| 214 | { |
|---|
| 215 | int character = ((((int)charString[3])<<24) | (((int)charString[2])<<16) | |
|---|
| 216 | (((int)charString[1])<<8) | charString[0]); |
|---|
| 217 | charString+=4; |
|---|
| 218 | if (character<0x110000) |
|---|
| 219 | { |
|---|
| 220 | |
|---|
| 221 | return character; |
|---|
| 222 | } |
|---|
| 223 | break; |
|---|
| 224 | } |
|---|
| 225 | default: |
|---|
| 226 | { |
|---|
| 227 | |
|---|
| 228 | |
|---|
| 229 | osg::notify(osg::FATAL)<<"Error: Invalid string encoding"<<std::endl; |
|---|
| 230 | break; |
|---|
| 231 | } |
|---|
| 232 | } |
|---|
| 233 | return 0; |
|---|
| 234 | } |
|---|
| 235 | |
|---|
| 236 | |
|---|
| 237 | |
|---|
| 238 | |
|---|
| 239 | |
|---|
| 240 | |
|---|
| 241 | |
|---|
| 242 | String::String(const String& str): |
|---|
| [4401] | 243 | vector_type(str) |
|---|
| [1657] | 244 | { |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | String& String::operator = (const String& str) |
|---|
| 248 | { |
|---|
| 249 | if (&str==this) return *this; |
|---|
| 250 | |
|---|
| 251 | clear(); |
|---|
| 252 | std::copy(str.begin(),str.end(),std::back_inserter(*this)); |
|---|
| 253 | |
|---|
| 254 | return *this; |
|---|
| 255 | } |
|---|
| 256 | |
|---|
| 257 | void String::set(const std::string& text) |
|---|
| 258 | { |
|---|
| 259 | clear(); |
|---|
| [1687] | 260 | for ( std::string::const_iterator it = text.begin(); it != text.end(); ++it ) |
|---|
| 261 | { |
|---|
| 262 | unsigned int charcode = (unsigned char )*it; |
|---|
| 263 | push_back( charcode ); |
|---|
| 264 | } |
|---|
| [1657] | 265 | } |
|---|
| 266 | |
|---|
| 267 | void String::set(const wchar_t* text) |
|---|
| 268 | { |
|---|
| 269 | clear(); |
|---|
| 270 | while(*text) |
|---|
| 271 | { |
|---|
| 272 | push_back(*text++); |
|---|
| 273 | } |
|---|
| 274 | } |
|---|
| 275 | |
|---|
| 276 | void String::set(const std::string& text,Encoding encoding) |
|---|
| 277 | { |
|---|
| 278 | clear(); |
|---|
| 279 | |
|---|
| 280 | look_ahead_iterator itr(text); |
|---|
| 281 | |
|---|
| 282 | if ((encoding == ENCODING_SIGNATURE) || |
|---|
| 283 | (encoding == ENCODING_UTF16) || |
|---|
| 284 | (encoding == ENCODING_UTF32)) |
|---|
| 285 | { |
|---|
| 286 | encoding = findEncoding(itr,encoding); |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | while(itr.valid()) |
|---|
| 290 | { |
|---|
| 291 | unsigned int c = getNextCharacter(itr,encoding); |
|---|
| 292 | if (c) push_back(c); |
|---|
| 293 | } |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | std::string String::createUTF8EncodedString() const |
|---|
| 297 | { |
|---|
| 298 | std::string utf8string; |
|---|
| 299 | for(const_iterator itr=begin(); |
|---|
| 300 | itr!=end(); |
|---|
| 301 | ++itr) |
|---|
| 302 | { |
|---|
| 303 | unsigned int currentChar = *itr; |
|---|
| 304 | if (currentChar < 0x80) |
|---|
| 305 | { |
|---|
| 306 | utf8string+=(char)currentChar; |
|---|
| 307 | } |
|---|
| 308 | else if (currentChar < 0x800) |
|---|
| 309 | { |
|---|
| 310 | utf8string+=(char)(0xc0 | (currentChar>>6)); |
|---|
| [9376] | 311 | utf8string+=(char)(0x80 | (currentChar & 0x3f)); |
|---|
| [1657] | 312 | } |
|---|
| 313 | else |
|---|
| 314 | { |
|---|
| 315 | utf8string+=(char)(0xe0 | (currentChar>>12)); |
|---|
| [9376] | 316 | utf8string+=(char)(0x80 | ((currentChar>>6) & 0x3f)); |
|---|
| 317 | utf8string+=(char)(0x80 | (currentChar & 0x3f)); |
|---|
| [1657] | 318 | } |
|---|
| 319 | } |
|---|
| 320 | return utf8string; |
|---|
| 321 | } |
|---|