I18N
- Renamed text.cpp/characters to utf8.cpp/utf8_length, which is more appropriate.
This commit is contained in:
19
src/utf8.cpp
19
src/utf8.cpp
@@ -166,3 +166,22 @@ int utf8_sequence (unsigned int character)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int utf8_length (const std::string& str)
|
||||
{
|
||||
int byteLength = str.length ();
|
||||
int charLength = byteLength;
|
||||
const char* data = str.data ();
|
||||
|
||||
// Decrement the number of bytes for each byte that matches 0b10??????
|
||||
// this way only the first byte of any utf8 sequence is counted.
|
||||
for (int i = 0; i < byteLength; i++)
|
||||
{
|
||||
// Extract the first two bits and check whether they are 10
|
||||
if ((data[i] & 0xC0) == 0x80)
|
||||
charLength--;
|
||||
}
|
||||
|
||||
return charLength;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Reference in New Issue
Block a user