- Renamed text.cpp/characters to utf8.cpp/utf8_length, which is more
  appropriate.
This commit is contained in:
Paul Beckingham
2011-05-01 11:10:32 -04:00
parent 0b3281d01d
commit b0ff1ff55b
7 changed files with 40 additions and 38 deletions

View File

@@ -166,3 +166,22 @@ int utf8_sequence (unsigned int character)
}
////////////////////////////////////////////////////////////////////////////////
int utf8_length (const std::string& str)
{
int byteLength = str.length ();
int charLength = byteLength;
const char* data = str.data ();
// Decrement the number of bytes for each byte that matches 0b10??????
// this way only the first byte of any utf8 sequence is counted.
for (int i = 0; i < byteLength; i++)
{
// Extract the first two bits and check whether they are 10
if ((data[i] & 0xC0) == 0x80)
charLength--;
}
return charLength;
}
////////////////////////////////////////////////////////////////////////////////