2011-12-31 05:14:08 -07:00
|
|
|
/*
|
|
|
|
https://github.com/peterix/dfhack
|
2012-09-29 20:03:37 -06:00
|
|
|
Copyright (c) 2009-2012 Petr Mrázek (peterix@gmail.com)
|
2011-12-31 05:14:08 -07:00
|
|
|
|
|
|
|
This software is provided 'as-is', without any express or implied
|
|
|
|
warranty. In no event will the authors be held liable for any
|
|
|
|
damages arising from the use of this software.
|
|
|
|
|
|
|
|
Permission is granted to anyone to use this software for any
|
|
|
|
purpose, including commercial applications, and to alter it and
|
|
|
|
redistribute it freely, subject to the following restrictions:
|
|
|
|
|
|
|
|
1. The origin of this software must not be misrepresented; you must
|
|
|
|
not claim that you wrote the original software. If you use this
|
|
|
|
software in a product, an acknowledgment in the product documentation
|
|
|
|
would be appreciated but is not required.
|
|
|
|
|
|
|
|
2. Altered source versions must be plainly marked as such, and
|
|
|
|
must not be misrepresented as being the original software.
|
|
|
|
|
|
|
|
3. This notice may not be removed or altered from any source
|
|
|
|
distribution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "Internal.h"
|
|
|
|
#include "Export.h"
|
|
|
|
#include "MiscUtils.h"
|
2012-04-05 09:55:59 -06:00
|
|
|
#include "Error.h"
|
2011-12-31 05:14:08 -07:00
|
|
|
|
|
|
|
#ifndef LINUX_BUILD
|
|
|
|
#include <Windows.h>
|
|
|
|
#else
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <ctime>
|
|
|
|
#endif
|
|
|
|
|
2012-01-08 09:02:12 -07:00
|
|
|
#include <ctype.h>
|
2012-01-02 07:46:24 -07:00
|
|
|
#include <stdarg.h>
|
2012-05-19 09:50:36 -06:00
|
|
|
#include <string.h>
|
2012-01-02 07:46:24 -07:00
|
|
|
|
2012-03-17 02:52:22 -06:00
|
|
|
#include <sstream>
|
2012-04-05 08:10:16 -06:00
|
|
|
#include <map>
|
2012-03-17 02:52:22 -06:00
|
|
|
|
2012-04-05 09:55:59 -06:00
|
|
|
const char *DFHack::Error::NullPointer::what() const throw() {
|
2012-04-29 11:07:39 -06:00
|
|
|
return "DFHack::Error::NullPointer";
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *DFHack::Error::InvalidArgument::what() const throw() {
|
|
|
|
return "DFHack::Error::InvalidArgument";
|
2012-04-05 09:55:59 -06:00
|
|
|
}
|
|
|
|
|
2012-01-02 07:46:24 -07:00
|
|
|
std::string stl_sprintf(const char *fmt, ...) {
|
|
|
|
va_list lst;
|
|
|
|
va_start(lst, fmt);
|
|
|
|
std::string rv = stl_vsprintf(fmt, lst);
|
|
|
|
va_end(lst);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string stl_vsprintf(const char *fmt, va_list args) {
|
|
|
|
std::vector<char> buf;
|
|
|
|
buf.resize(4096);
|
|
|
|
for (;;) {
|
|
|
|
int rsz = vsnprintf(&buf[0], buf.size(), fmt, args);
|
|
|
|
|
|
|
|
if (rsz < 0)
|
|
|
|
buf.resize(buf.size()*2);
|
|
|
|
else if (unsigned(rsz) > buf.size())
|
|
|
|
buf.resize(rsz+1);
|
|
|
|
else
|
|
|
|
return std::string(&buf[0], rsz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-01-08 09:02:12 -07:00
|
|
|
bool split_string(std::vector<std::string> *out,
|
|
|
|
const std::string &str, const std::string &separator, bool squash_empty)
|
|
|
|
{
|
|
|
|
out->clear();
|
|
|
|
|
|
|
|
size_t start = 0, pos;
|
|
|
|
|
|
|
|
if (!separator.empty())
|
|
|
|
{
|
|
|
|
while ((pos = str.find(separator,start)) != std::string::npos)
|
|
|
|
{
|
|
|
|
if (pos > start || !squash_empty)
|
|
|
|
out->push_back(str.substr(start, pos-start));
|
|
|
|
start = pos + separator.size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start < str.size() || !squash_empty)
|
|
|
|
out->push_back(str.substr(start));
|
|
|
|
|
|
|
|
return out->size() > 1;
|
|
|
|
}
|
|
|
|
|
2012-03-17 02:52:22 -06:00
|
|
|
std::string join_strings(const std::string &separator, const std::vector<std::string> &items)
|
|
|
|
{
|
|
|
|
std::stringstream ss;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < items.size(); i++)
|
|
|
|
{
|
|
|
|
if (i)
|
|
|
|
ss << separator;
|
|
|
|
ss << items[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return ss.str();
|
|
|
|
}
|
|
|
|
|
2012-01-08 09:02:12 -07:00
|
|
|
std::string toUpper(const std::string &str)
|
|
|
|
{
|
|
|
|
std::string rv(str.size(),' ');
|
|
|
|
for (unsigned i = 0; i < str.size(); ++i)
|
|
|
|
rv[i] = toupper(str[i]);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string toLower(const std::string &str)
|
|
|
|
{
|
|
|
|
std::string rv(str.size(),' ');
|
|
|
|
for (unsigned i = 0; i < str.size(); ++i)
|
|
|
|
rv[i] = tolower(str[i]);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2012-05-19 09:50:36 -06:00
|
|
|
bool prefix_matches(const std::string &prefix, const std::string &key, std::string *tail)
|
|
|
|
{
|
|
|
|
size_t ksize = key.size();
|
|
|
|
size_t psize = prefix.size();
|
|
|
|
if (ksize < psize || memcmp(prefix.data(), key.data(), psize) != 0)
|
|
|
|
return false;
|
|
|
|
if (tail)
|
|
|
|
tail->clear();
|
|
|
|
if (ksize == psize)
|
|
|
|
return true;
|
|
|
|
if (psize == 0 || prefix[psize-1] == '/')
|
|
|
|
{
|
|
|
|
if (tail) *tail = key.substr(psize);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (key[psize] == '/')
|
|
|
|
{
|
|
|
|
if (tail) *tail = key.substr(psize+1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-12-31 05:14:08 -07:00
|
|
|
#ifdef LINUX_BUILD // Linux
|
|
|
|
uint64_t GetTimeMs64()
|
|
|
|
{
|
|
|
|
struct timeval tv;
|
|
|
|
gettimeofday(&tv, NULL);
|
|
|
|
uint64_t ret = tv.tv_usec;
|
|
|
|
|
|
|
|
// Convert from micro seconds (10^-6) to milliseconds (10^-3)
|
|
|
|
ret /= 1000;
|
|
|
|
// Adds the seconds (10^0) after converting them to milliseconds (10^-3)
|
|
|
|
ret += (tv.tv_sec * 1000);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#else // Windows
|
|
|
|
uint64_t GetTimeMs64()
|
|
|
|
{
|
|
|
|
FILETIME ft;
|
|
|
|
LARGE_INTEGER li;
|
|
|
|
|
|
|
|
// Get the amount of 100 nano seconds intervals elapsed since January 1, 1601 (UTC)
|
|
|
|
// and copy it to a LARGE_INTEGER structure.
|
|
|
|
GetSystemTimeAsFileTime(&ft);
|
|
|
|
li.LowPart = ft.dwLowDateTime;
|
|
|
|
li.HighPart = ft.dwHighDateTime;
|
|
|
|
|
|
|
|
uint64_t ret = li.QuadPart;
|
|
|
|
// Convert from file time to UNIX epoch time.
|
|
|
|
ret -= 116444736000000000LL;
|
|
|
|
// From 100 nano seconds (10^-7) to 1 millisecond (10^-3) intervals
|
|
|
|
ret /= 10000;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2012-04-05 08:10:16 -06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Character decoding */
|
|
|
|
|
|
|
|
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
|
|
|
#define UTF8_ACCEPT 0
|
|
|
|
#define UTF8_REJECT 12
|
|
|
|
|
|
|
|
static const uint8_t utf8d[] = {
|
|
|
|
// The first part of the table maps bytes to character classes that
|
|
|
|
// to reduce the size of the transition table and create bitmasks.
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
|
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
|
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
|
|
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
|
|
|
|
|
|
|
// The second part is a transition table that maps a combination
|
|
|
|
// of a state of the automaton and a character class to a state.
|
|
|
|
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
|
|
|
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
|
|
|
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
|
|
|
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
|
|
|
12,36,12,12,12,12,12,12,12,12,12,12,
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline uint32_t
|
|
|
|
decode(uint32_t* state, uint32_t* codep, uint8_t byte) {
|
|
|
|
uint32_t type = utf8d[byte];
|
|
|
|
|
|
|
|
*codep = (*state != UTF8_ACCEPT) ?
|
|
|
|
(byte & 0x3fu) | (*codep << 6) :
|
|
|
|
(0xff >> type) & (byte);
|
|
|
|
|
|
|
|
*state = utf8d[256 + *state + type];
|
|
|
|
return *state;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Character encoding */
|
|
|
|
|
|
|
|
static inline int encode(uint8_t *out, uint16_t c) {
|
|
|
|
if (c <= 0x7F)
|
|
|
|
{
|
|
|
|
out[0] = c;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else if (c <= 0x7FF)
|
|
|
|
{
|
|
|
|
out[0] = (0xC0 | (c >> 6));
|
|
|
|
out[1] = (0x80 | (c & 0x3F));
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
else /*if (c <= 0xFFFF)*/
|
|
|
|
{
|
|
|
|
out[0] = (0xE0 | (c >> 12));
|
|
|
|
out[1] = (0x80 | ((c >> 6) & 0x3F));
|
|
|
|
out[2] = (0x80 | (c & 0x3F));
|
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* CP437 */
|
|
|
|
|
|
|
|
static uint16_t character_table[256] = {
|
|
|
|
0, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, //
|
|
|
|
0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C,
|
|
|
|
0x25BA, 0x25C4, 0x2195, 0x203C, 0xB6, 0xA7, 0x25AC, 0x21A8, //
|
|
|
|
0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC,
|
|
|
|
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, //
|
|
|
|
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
|
|
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, //
|
|
|
|
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
|
|
|
|
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, //
|
|
|
|
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
|
|
|
|
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, //
|
|
|
|
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
|
|
|
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, //
|
|
|
|
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
|
|
|
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, //
|
|
|
|
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x2302,
|
|
|
|
0xC7, 0xFC, 0xE9, 0xE2, 0xE4, 0xE0, 0xE5, 0xE7, //
|
|
|
|
0xEA, 0xEB, 0xE8, 0xEF, 0xEE, 0xEC, 0xC4, 0xC5,
|
|
|
|
0xC9, 0xE6, 0xC6, 0xF4, 0xF6, 0xF2, 0xFB, 0xF9, //
|
|
|
|
0xFF, 0xD6, 0xDC, 0xA2, 0xA3, 0xA5, 0x20A7, 0x192,
|
|
|
|
0xE1, 0xED, 0xF3, 0xFA, 0xF1, 0xD1, 0xAA, 0xBA, //
|
|
|
|
0xBF, 0x2310, 0xAC, 0xBD, 0xBC, 0xA1, 0xAB, 0xBB,
|
|
|
|
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, //
|
|
|
|
0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
|
|
|
|
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, //
|
|
|
|
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
|
|
|
|
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, //
|
|
|
|
0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
|
|
|
|
0x3B1, 0xDF, 0x393, 0x3C0, 0x3A3, 0x3C3, 0xB5, 0x3C4, //
|
|
|
|
0x3A6, 0x398, 0x3A9, 0x3B4, 0x221E, 0x3C6, 0x3B5, 0x2229,
|
|
|
|
0x2261, 0xB1, 0x2265, 0x2264, 0x2320, 0x2321, 0xF7, 0x2248, //
|
|
|
|
0xB0, 0x2219, 0xB7, 0x221A, 0x207F, 0xB2, 0x25A0, 0xA0
|
|
|
|
};
|
|
|
|
|
|
|
|
std::string DF2UTF(const std::string &in)
|
|
|
|
{
|
|
|
|
std::string out;
|
|
|
|
out.reserve(in.size());
|
|
|
|
|
|
|
|
uint8_t buf[4];
|
|
|
|
for (size_t i = 0; i < in.size(); i++)
|
|
|
|
{
|
|
|
|
int cnt = encode(buf, character_table[(uint8_t)in[i]]);
|
|
|
|
out.append(&buf[0], &buf[cnt]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string UTF2DF(const std::string &in)
|
|
|
|
{
|
|
|
|
// Unicode to normal lookup table
|
|
|
|
static std::map<uint32_t, char> ctable;
|
|
|
|
|
|
|
|
if (ctable.empty())
|
|
|
|
{
|
|
|
|
for (uint16_t i = 0; i < 256; i++)
|
|
|
|
if (character_table[i] != i)
|
|
|
|
ctable[character_table[i]] = char(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Actual conversion loop
|
|
|
|
size_t size = in.size();
|
|
|
|
std::string out(size, char(0));
|
|
|
|
|
|
|
|
uint32_t codepoint = 0;
|
|
|
|
uint32_t state = UTF8_ACCEPT, prev = UTF8_ACCEPT;
|
|
|
|
uint32_t pos = 0;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < size; prev = state, i++) {
|
|
|
|
switch (decode(&state, &codepoint, uint8_t(in[i]))) {
|
|
|
|
case UTF8_ACCEPT:
|
|
|
|
if (codepoint < 256 && character_table[codepoint] == codepoint) {
|
|
|
|
out[pos++] = char(codepoint);
|
|
|
|
} else {
|
|
|
|
char v = ctable[codepoint];
|
|
|
|
out[pos++] = v ? v : '?';
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case UTF8_REJECT:
|
|
|
|
out[pos++] = '?';
|
|
|
|
if (prev != UTF8_ACCEPT) --i;
|
|
|
|
state = UTF8_ACCEPT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pos != size)
|
|
|
|
out.resize(pos);
|
|
|
|
return out;
|
|
|
|
}
|