@ -25,6 +25,7 @@ distribution.
# include "Internal.h"
# include "Internal.h"
# include "Export.h"
# include "Export.h"
# include "MiscUtils.h"
# include "MiscUtils.h"
# include "Error.h"
# ifndef LINUX_BUILD
# ifndef LINUX_BUILD
# include <Windows.h>
# include <Windows.h>
@ -37,6 +38,11 @@ distribution.
# include <stdarg.h>
# include <stdarg.h>
# include <sstream>
# include <sstream>
# include <map>
const char * DFHack : : Error : : NullPointer : : what ( ) const throw ( ) {
return " NULL pointer access " ;
}
std : : string stl_sprintf ( const char * fmt , . . . ) {
std : : string stl_sprintf ( const char * fmt , . . . ) {
va_list lst ;
va_list lst ;
@ -149,4 +155,162 @@ uint64_t GetTimeMs64()
return ret ;
return ret ;
}
}
# endif
# endif
/* Character decoding */
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
# define UTF8_ACCEPT 0
# define UTF8_REJECT 12
static const uint8_t utf8d [ ] = {
// The first part of the table maps bytes to character classes that
// to reduce the size of the transition table and create bitmasks.
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
8 , 8 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
10 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 , 3 , 3 , 11 , 6 , 6 , 6 , 5 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 ,
// The second part is a transition table that maps a combination
// of a state of the automaton and a character class to a state.
0 , 12 , 24 , 36 , 60 , 96 , 84 , 12 , 12 , 12 , 48 , 72 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 ,
12 , 0 , 12 , 12 , 12 , 12 , 12 , 0 , 12 , 0 , 12 , 12 , 12 , 24 , 12 , 12 , 12 , 12 , 12 , 24 , 12 , 24 , 12 , 12 ,
12 , 12 , 12 , 12 , 12 , 12 , 12 , 24 , 12 , 12 , 12 , 12 , 12 , 24 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 24 , 12 , 12 ,
12 , 12 , 12 , 12 , 12 , 12 , 12 , 36 , 12 , 36 , 12 , 12 , 12 , 36 , 12 , 12 , 12 , 12 , 12 , 36 , 12 , 36 , 12 , 12 ,
12 , 36 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 ,
} ;
static inline uint32_t
decode ( uint32_t * state , uint32_t * codep , uint8_t byte ) {
uint32_t type = utf8d [ byte ] ;
* codep = ( * state ! = UTF8_ACCEPT ) ?
( byte & 0x3fu ) | ( * codep < < 6 ) :
( 0xff > > type ) & ( byte ) ;
* state = utf8d [ 256 + * state + type ] ;
return * state ;
}
/* Character encoding */
static inline int encode ( uint8_t * out , uint16_t c ) {
if ( c < = 0x7F )
{
out [ 0 ] = c ;
return 1 ;
}
else if ( c < = 0x7FF )
{
out [ 0 ] = ( 0xC0 | ( c > > 6 ) ) ;
out [ 1 ] = ( 0x80 | ( c & 0x3F ) ) ;
return 2 ;
}
else /*if (c <= 0xFFFF)*/
{
out [ 0 ] = ( 0xE0 | ( c > > 12 ) ) ;
out [ 1 ] = ( 0x80 | ( ( c > > 6 ) & 0x3F ) ) ;
out [ 2 ] = ( 0x80 | ( c & 0x3F ) ) ;
return 3 ;
}
}
/* CP437 */
static uint16_t character_table [ 256 ] = {
0 , 0x263A , 0x263B , 0x2665 , 0x2666 , 0x2663 , 0x2660 , 0x2022 , //
0x25D8 , 0x25CB , 0x25D9 , 0x2642 , 0x2640 , 0x266A , 0x266B , 0x263C ,
0x25BA , 0x25C4 , 0x2195 , 0x203C , 0xB6 , 0xA7 , 0x25AC , 0x21A8 , //
0x2191 , 0x2193 , 0x2192 , 0x2190 , 0x221F , 0x2194 , 0x25B2 , 0x25BC ,
0x20 , 0x21 , 0x22 , 0x23 , 0x24 , 0x25 , 0x26 , 0x27 , //
0x28 , 0x29 , 0x2A , 0x2B , 0x2C , 0x2D , 0x2E , 0x2F ,
0x30 , 0x31 , 0x32 , 0x33 , 0x34 , 0x35 , 0x36 , 0x37 , //
0x38 , 0x39 , 0x3A , 0x3B , 0x3C , 0x3D , 0x3E , 0x3F ,
0x40 , 0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , //
0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F ,
0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , //
0x58 , 0x59 , 0x5A , 0x5B , 0x5C , 0x5D , 0x5E , 0x5F ,
0x60 , 0x61 , 0x62 , 0x63 , 0x64 , 0x65 , 0x66 , 0x67 , //
0x68 , 0x69 , 0x6A , 0x6B , 0x6C , 0x6D , 0x6E , 0x6F ,
0x70 , 0x71 , 0x72 , 0x73 , 0x74 , 0x75 , 0x76 , 0x77 , //
0x78 , 0x79 , 0x7A , 0x7B , 0x7C , 0x7D , 0x7E , 0x2302 ,
0xC7 , 0xFC , 0xE9 , 0xE2 , 0xE4 , 0xE0 , 0xE5 , 0xE7 , //
0xEA , 0xEB , 0xE8 , 0xEF , 0xEE , 0xEC , 0xC4 , 0xC5 ,
0xC9 , 0xE6 , 0xC6 , 0xF4 , 0xF6 , 0xF2 , 0xFB , 0xF9 , //
0xFF , 0xD6 , 0xDC , 0xA2 , 0xA3 , 0xA5 , 0x20A7 , 0x192 ,
0xE1 , 0xED , 0xF3 , 0xFA , 0xF1 , 0xD1 , 0xAA , 0xBA , //
0xBF , 0x2310 , 0xAC , 0xBD , 0xBC , 0xA1 , 0xAB , 0xBB ,
0x2591 , 0x2592 , 0x2593 , 0x2502 , 0x2524 , 0x2561 , 0x2562 , 0x2556 , //
0x2555 , 0x2563 , 0x2551 , 0x2557 , 0x255D , 0x255C , 0x255B , 0x2510 ,
0x2514 , 0x2534 , 0x252C , 0x251C , 0x2500 , 0x253C , 0x255E , 0x255F , //
0x255A , 0x2554 , 0x2569 , 0x2566 , 0x2560 , 0x2550 , 0x256C , 0x2567 ,
0x2568 , 0x2564 , 0x2565 , 0x2559 , 0x2558 , 0x2552 , 0x2553 , 0x256B , //
0x256A , 0x2518 , 0x250C , 0x2588 , 0x2584 , 0x258C , 0x2590 , 0x2580 ,
0x3B1 , 0xDF , 0x393 , 0x3C0 , 0x3A3 , 0x3C3 , 0xB5 , 0x3C4 , //
0x3A6 , 0x398 , 0x3A9 , 0x3B4 , 0x221E , 0x3C6 , 0x3B5 , 0x2229 ,
0x2261 , 0xB1 , 0x2265 , 0x2264 , 0x2320 , 0x2321 , 0xF7 , 0x2248 , //
0xB0 , 0x2219 , 0xB7 , 0x221A , 0x207F , 0xB2 , 0x25A0 , 0xA0
} ;
std : : string DF2UTF ( const std : : string & in )
{
std : : string out ;
out . reserve ( in . size ( ) ) ;
uint8_t buf [ 4 ] ;
for ( size_t i = 0 ; i < in . size ( ) ; i + + )
{
int cnt = encode ( buf , character_table [ ( uint8_t ) in [ i ] ] ) ;
out . append ( & buf [ 0 ] , & buf [ cnt ] ) ;
}
return out ;
}
std : : string UTF2DF ( const std : : string & in )
{
// Unicode to normal lookup table
static std : : map < uint32_t , char > ctable ;
if ( ctable . empty ( ) )
{
for ( uint16_t i = 0 ; i < 256 ; i + + )
if ( character_table [ i ] ! = i )
ctable [ character_table [ i ] ] = char ( i ) ;
}
// Actual conversion loop
size_t size = in . size ( ) ;
std : : string out ( size , char ( 0 ) ) ;
uint32_t codepoint = 0 ;
uint32_t state = UTF8_ACCEPT , prev = UTF8_ACCEPT ;
uint32_t pos = 0 ;
for ( unsigned i = 0 ; i < size ; prev = state , i + + ) {
switch ( decode ( & state , & codepoint , uint8_t ( in [ i ] ) ) ) {
case UTF8_ACCEPT :
if ( codepoint < 256 & & character_table [ codepoint ] = = codepoint ) {
out [ pos + + ] = char ( codepoint ) ;
} else {
char v = ctable [ codepoint ] ;
out [ pos + + ] = v ? v : ' ? ' ;
}
break ;
case UTF8_REJECT :
out [ pos + + ] = ' ? ' ;
if ( prev ! = UTF8_ACCEPT ) - - i ;
state = UTF8_ACCEPT ;
break ;
}
}
if ( pos ! = size )
out . resize ( pos ) ;
return out ;
}