dfhack/plugins/devel/check-structures-sanity/dispatch.cpp

868 lines
26 KiB
C++

#include "check-structures-sanity.h"
#include "df/large_integer.h"
Checker::Checker(color_ostream & out) :
out(out),
checked_count(0),
error_count(0),
maxerrors(~size_t(0)),
maxerrors_reported(false),
enums(false),
sizes(false),
unnamed(false),
failfast(false),
noprogress(!out.is_console()),
maybepointer(false)
{
Core::getInstance().p->getMemRanges(mapped);
}
color_ostream & Checker::fail(int line, const QueueItem & item, const CheckedStructure & cs)
{
error_count++;
out << COLOR_LIGHTRED << "sanity check failed (line " << line << "): ";
out << COLOR_RESET << (cs.identity ? cs.identity->getFullName() : "?");
out << " (accessed as " << item.path << "): ";
out << COLOR_YELLOW;
if (maxerrors && maxerrors != ~size_t(0))
maxerrors--;
return out;
}
bool Checker::queue_item(const QueueItem & item, const CheckedStructure & cs)
{
if (data.count(item.ptr))
{
// already checked
auto existing = data.at(item.ptr);
if (cs.identity != existing.second.identity)
{
if (cs.identity->type() == IDTYPE_CLASS && existing.second.identity->type() == IDTYPE_CLASS && get_vtable_name(item, cs, false) && static_cast<virtual_identity *>(cs.identity)->is_instance(static_cast<virtual_ptr>(const_cast<void *>(item.ptr))))
{
return false;
}
FAIL("TODO: handle merging structures: " << data.at(item.ptr).first << " overlaps " << item.path << " (same pointer)");
}
return false;
}
auto ptr_end = PTR_ADD(item.ptr, cs.full_size());
auto prev = data.lower_bound(item.ptr);
if (prev != data.cbegin())
{
prev--;
if (uintptr_t(prev->first) + prev->second.second.full_size() > uintptr_t(item.ptr))
{
// placeholder algorithm
if (auto sid = dynamic_cast<struct_identity *>(prev->second.second.identity))
{
auto offset = uintptr_t(item.ptr) - uintptr_t(prev->first);
for (auto field = sid->getFields(); field->mode != struct_field_info::END; field++)
{
if (field->offset == offset)
{
if (field->mode == struct_field_info::SUBSTRUCT && field->type == cs.identity)
{
return false;
}
UNEXPECTED;
}
}
}
// TODO
FAIL("TODO: handle merging structures: " << prev->second.first << " overlaps " << item.path << " (backward)");
}
}
auto overlap = data.lower_bound(item.ptr);
auto overlap_end = data.lower_bound(ptr_end);
while (overlap != overlap_end)
{
// TODO
FAIL("TODO: handle merging structures: " << overlap->second.first << " overlaps " << item.path << " (forward)");
overlap++;
}
data[item.ptr] = std::make_pair(item.path, cs);
queue.push_back(item);
return true;
}
void Checker::queue_globals()
{
auto fields = df::global::_identity.getFields();
for (auto field = fields; field->mode != struct_field_info::END; field++)
{
if (!field->offset)
{
UNEXPECTED;
continue;
}
// offset is the position of the DFHack pointer to this global.
auto ptr = *reinterpret_cast<const void **>(field->offset);
QueueItem item(stl_sprintf("df.global.%s", field->name), ptr);
CheckedStructure cs(field);
if (!ptr)
{
FAIL("unknown global address");
continue;
}
if (!strcmp(field->name, "enabler"))
{
// don't care about libgraphics as we have the source code
continue;
}
queue_item(item, cs);
}
}
bool Checker::process_queue()
{
if (queue.empty())
{
return false;
}
auto item = std::move(queue.front());
queue.pop_front();
auto cs = data.find(item.ptr);
if (cs == data.end())
{
// happens if pointer is determined to be part of a larger structure
return true;
}
dispatch_item(item, cs->second.second);
return true;
}
void Checker::dispatch_item(const QueueItem & base, const CheckedStructure & cs)
{
if (!is_valid_dereference(base, cs))
{
return;
}
if (!cs.count)
{
dispatch_single_item(base, cs);
return;
}
if (sizes && !cs.inside_structure)
{
if (auto allocated_size = get_allocated_size(base))
{
auto expected_size = cs.identity->byte_size();
if (cs.allocated_count)
expected_size *= cs.allocated_count;
else if (cs.count)
expected_size *= cs.count;
if (cs.identity->type() == IDTYPE_CLASS && get_vtable_name(base, cs, true))
{
if (cs.count)
{
UNEXPECTED;
}
auto virtual_type = virtual_identity::get(static_cast<virtual_ptr>(const_cast<void *>(base.ptr)));
expected_size = virtual_type->byte_size();
}
auto & item = base;
if (allocated_size > expected_size)
{
FAIL("identified structure is too small (expected " << expected_size << " bytes, but there are " << allocated_size << " bytes allocated)");
}
else if (allocated_size < expected_size)
{
FAIL("identified structure is too big (expected " << expected_size << " bytes, but there are " << allocated_size << " bytes allocated)");
}
}
else
{
UNEXPECTED;
}
}
auto ptr = base.ptr;
auto size = cs.identity->byte_size();
for (size_t i = 0; i < cs.count; i++)
{
QueueItem item(base, i, ptr);
dispatch_single_item(item, cs);
ptr = PTR_ADD(ptr, size);
}
}
void Checker::dispatch_single_item(const QueueItem & item, const CheckedStructure & cs)
{
checked_count++;
if (!maxerrors)
{
if (!maxerrors_reported)
{
FAIL("error limit reached. bailing out with " << (queue.size() + 1) << " items remaining in the queue.");
maxerrors_reported = true;
}
queue.clear();
return;
}
switch (cs.identity->type())
{
case IDTYPE_GLOBAL:
case IDTYPE_FUNCTION:
UNEXPECTED;
break;
case IDTYPE_PRIMITIVE:
dispatch_primitive(item, cs);
break;
case IDTYPE_POINTER:
dispatch_pointer(item, cs);
break;
case IDTYPE_CONTAINER:
dispatch_container(item, cs);
break;
case IDTYPE_PTR_CONTAINER:
dispatch_ptr_container(item, cs);
break;
case IDTYPE_BIT_CONTAINER:
dispatch_bit_container(item, cs);
break;
case IDTYPE_BITFIELD:
dispatch_bitfield(item, cs);
break;
case IDTYPE_ENUM:
dispatch_enum(item, cs);
break;
case IDTYPE_STRUCT:
dispatch_struct(item, cs);
break;
case IDTYPE_CLASS:
dispatch_class(item, cs);
break;
case IDTYPE_BUFFER:
dispatch_buffer(item, cs);
break;
case IDTYPE_STL_PTR_VECTOR:
dispatch_stl_ptr_vector(item, cs);
break;
case IDTYPE_OPAQUE:
break;
case IDTYPE_UNION:
dispatch_untagged_union(item, cs);
break;
}
}
void Checker::dispatch_primitive(const QueueItem & item, const CheckedStructure & cs)
{
if (cs.identity == df::identity_traits<std::string>::get())
{
check_stl_string(item);
}
else if (cs.identity == df::identity_traits<char *>::get())
{
// TODO check c strings
UNEXPECTED;
}
else if (cs.identity == df::identity_traits<bool>::get())
{
auto val = *reinterpret_cast<const uint8_t *>(item.ptr);
if (val > 1 && val != 0xd2)
{
FAIL("invalid value for bool: " << int(val));
}
}
else if (auto int_id = dynamic_cast<df::integer_identity_base *>(cs.identity))
{
check_possible_pointer(item, cs);
// TODO check ints?
}
else if (auto float_id = dynamic_cast<df::float_identity_base *>(cs.identity))
{
// TODO check floats?
}
else
{
UNEXPECTED;
}
}
void Checker::dispatch_pointer(const QueueItem & item, const CheckedStructure & cs)
{
auto target_ptr = validate_and_dereference<const void *>(item);
if (!target_ptr)
{
return;
}
#ifdef DFHACK64
if (uintptr_t(target_ptr) == 0xd2d2d2d2d2d2d2d2)
#else
if (uintptr_t(target_ptr) == 0xd2d2d2d2)
#endif
{
return;
}
QueueItem target_item(item.path, target_ptr);
auto target = static_cast<pointer_identity *>(cs.identity)->getTarget();
if (!target)
{
check_unknown_pointer(item);
return;
}
CheckedStructure target_cs(target);
// 256 is an arbitrarily chosen size threshold
if (cs.count || target->byte_size() <= 256)
{
// target is small, or we are inside an array of pointers; handle now
if (queue_item(target_item, target_cs))
{
// we insert it into the queue to make sure we're not stuck in a loop
// get it back out of the queue to prevent the queue growing too big
queue.pop_back();
dispatch_item(target_item, target_cs);
}
}
else
{
// target is large and not part of an array; handle later
queue_item(target_item, target_cs);
}
}
void Checker::dispatch_container(const QueueItem & item, const CheckedStructure & cs)
{
auto identity = static_cast<container_identity *>(cs.identity);
auto base_container = identity->getFullName(nullptr);
if (base_container == "vector<void>")
{
check_stl_vector(item, identity->getItemType(), identity->getIndexEnumType());
}
else if (base_container == "deque<void>")
{
// TODO: check deque?
}
else
{
UNEXPECTED;
}
}
void Checker::dispatch_ptr_container(const QueueItem & item, const CheckedStructure & cs)
{
auto identity = static_cast<container_identity *>(cs.identity);
auto base_container = identity->getFullName(nullptr);
{
UNEXPECTED;
}
}
void Checker::dispatch_bit_container(const QueueItem & item, const CheckedStructure & cs)
{
auto identity = static_cast<container_identity *>(cs.identity);
auto base_container = identity->getFullName(nullptr);
if (base_container == "BitArray<>")
{
// TODO: check DF bit array
}
else if (base_container == "vector<bool>")
{
// TODO: check stl bit vector
}
else
{
UNEXPECTED;
}
}
void Checker::dispatch_bitfield(const QueueItem & item, const CheckedStructure & cs)
{
check_possible_pointer(item, cs);
if (!enums)
{
return;
}
auto bitfield_type = static_cast<bitfield_identity *>(cs.identity);
uint64_t bitfield_value;
switch (bitfield_type->byte_size())
{
case 1:
bitfield_value = validate_and_dereference<uint8_t>(item);
// don't check for uninitialized; too small to be sure
break;
case 2:
bitfield_value = validate_and_dereference<uint16_t>(item);
if (bitfield_value == 0xd2d2)
{
bitfield_value = 0;
}
break;
case 4:
bitfield_value = validate_and_dereference<uint32_t>(item);
if (bitfield_value == 0xd2d2d2d2)
{
bitfield_value = 0;
}
break;
case 8:
bitfield_value = validate_and_dereference<uint64_t>(item);
if (bitfield_value == 0xd2d2d2d2d2d2d2d2)
{
bitfield_value = 0;
}
break;
default:
UNEXPECTED;
bitfield_value = 0;
break;
}
auto num_bits = bitfield_type->getNumBits();
auto bits = bitfield_type->getBits();
for (int i = 0; i < 64; i++)
{
if (!(num_bits & 1))
{
bitfield_value >>= 1;
continue;
}
bitfield_value >>= 1;
if (i >= num_bits || !bits[i].size)
{
FAIL("bitfield bit " << i << " is out of range");
}
else if (unnamed && bits[i].size > 0 && !bits[i].name)
{
FAIL("bitfield bit " << i << " is unnamed");
}
else if (unnamed && !bits[i + bits[i].size].name)
{
FAIL("bitfield bit " << i << " (part of a field starting at bit " << (i + bits[i].size) << ") is unnamed");
}
}
}
void Checker::dispatch_enum(const QueueItem & item, const CheckedStructure & cs)
{
check_possible_pointer(item, cs);
if (!enums)
{
return;
}
auto enum_type = static_cast<enum_identity *>(cs.identity);
auto enum_value = get_int_value(item, enum_type->getBaseType());
if (enum_type->byte_size() == 2 && uint16_t(enum_value) == 0xd2d2)
{
return;
}
else if (enum_type->byte_size() == 4 && uint32_t(enum_value) == 0xd2d2d2d2)
{
return;
}
else if (enum_type->byte_size() == 8 && uint64_t(enum_value) == 0xd2d2d2d2d2d2d2d2)
{
return;
}
auto enum_name = get_enum_item_key(enum_type, enum_value);
if (!enum_name)
{
FAIL("enum value (" << enum_value << ") is out of range");
return;
}
if (unnamed && !*enum_name)
{
FAIL("enum value (" << enum_value << ") is unnamed");
}
}
void Checker::dispatch_struct(const QueueItem & item, const CheckedStructure & cs)
{
auto identity = static_cast<struct_identity *>(cs.identity);
for (auto p = identity; p; p = p->getParent())
{
auto fields = p->getFields();
if (!fields)
{
continue;
}
for (auto field = fields; field->mode != struct_field_info::END; field++)
{
dispatch_field(item, cs, fields, field);
}
}
}
void Checker::dispatch_field(const QueueItem & item, const CheckedStructure & cs, const struct_field_info *fields, const struct_field_info *field)
{
if (field->mode == struct_field_info::OBJ_METHOD ||
field->mode == struct_field_info::CLASS_METHOD)
{
return;
}
auto field_ptr = PTR_ADD(item.ptr, field->offset);
QueueItem field_item(item, field->name, field_ptr);
CheckedStructure field_cs(field);
auto tag_field = find_union_tag(fields, field);
if (tag_field)
{
auto tag_ptr = PTR_ADD(item.ptr, tag_field->offset);
QueueItem tag_item(item, tag_field->name, tag_ptr);
CheckedStructure tag_cs(tag_field);
if (tag_cs.identity->isContainer())
{
dispatch_tagged_union_vector(field_item, tag_item, field_cs, tag_cs);
}
else
{
dispatch_tagged_union(field_item, tag_item, field_cs, tag_cs);
}
return;
}
dispatch_item(field_item, field_cs);
}
void Checker::dispatch_class(const QueueItem & item, const CheckedStructure & cs)
{
auto vtable_name = get_vtable_name(item, cs);
if (!vtable_name)
{
// bail out now because virtual_identity::get will crash
return;
}
auto base_identity = static_cast<virtual_identity *>(cs.identity);
auto vptr = static_cast<virtual_ptr>(const_cast<void *>(item.ptr));
auto identity = virtual_identity::get(vptr);
if (!identity)
{
FAIL("unidentified subclass of " << base_identity->getFullName() << ": " << vtable_name);
return;
}
if (base_identity != identity && !base_identity->is_subclass(identity))
{
FAIL("expected subclass of " << base_identity->getFullName() << ", but got " << identity->getFullName());
return;
}
if (data.count(item.ptr) && data.at(item.ptr).first == item.path)
{
// TODO: handle cases where this may overlap later data
data.at(item.ptr).second.identity = identity;
}
dispatch_struct(QueueItem(item.path + "<" + identity->getFullName() + ">", item.ptr), CheckedStructure(identity));
}
void Checker::dispatch_buffer(const QueueItem & item, const CheckedStructure & cs)
{
auto identity = static_cast<container_identity *>(cs.identity);
auto item_identity = identity->getItemType();
dispatch_item(item, CheckedStructure(item_identity, identity->byte_size() / item_identity->byte_size(), static_cast<enum_identity *>(identity->getIndexEnumType()), true));
}
void Checker::dispatch_stl_ptr_vector(const QueueItem & item, const CheckedStructure & cs)
{
auto identity = static_cast<container_identity *>(cs.identity);
auto ptr_type = wrap_in_pointer(identity->getItemType());
check_stl_vector(item, ptr_type, identity->getIndexEnumType());
}
void Checker::dispatch_tagged_union(const QueueItem & item, const QueueItem & tag_item, const CheckedStructure & cs, const CheckedStructure & tag_cs)
{
if (tag_cs.identity->type() != IDTYPE_ENUM || cs.identity->type() != IDTYPE_UNION)
{
UNEXPECTED;
return;
}
auto tag_identity = static_cast<enum_identity *>(tag_cs.identity);
auto tag_value = get_int_value(tag_item, tag_identity->getBaseType());
auto tag_name = get_enum_item_key(tag_identity, tag_value);
if (!tag_name)
{
FAIL("tagged union tag (accessed as " << tag_item.path << ") value (" << tag_value << ") not defined in enum " << tag_cs.identity->getFullName());
return;
}
if (!*tag_name)
{
FAIL("tagged union tag (accessed as " << tag_item.path << ") value (" << tag_value << ") is unnamed");
return;
}
auto union_type = static_cast<union_identity *>(cs.identity);
for (auto field = union_type->getFields(); field->mode != struct_field_info::END; field++)
{
if (strcmp(field->name, *tag_name))
{
continue;
}
if (field->offset != 0)
{
UNEXPECTED;
}
dispatch_item(QueueItem(item, field->name, item.ptr), field);
return;
}
auto union_data_ptr = reinterpret_cast<const uint8_t *>(item.ptr);
uint8_t padding_byte = *union_data_ptr;
if (padding_byte == 0x00 || padding_byte == 0xd2 || padding_byte == 0xff)
{
bool all_padding = true;
for (size_t i = 0; i < union_type->byte_size(); i++)
{
if (union_data_ptr[i] != padding_byte)
{
all_padding = false;
break;
}
}
// don't ask for fields if it's all padding
if (all_padding)
{
return;
}
}
FAIL("tagged union missing " << *tag_name << " field to match tag (accessed as " << tag_item.path << ") value (" << tag_value << ")");
}
void Checker::dispatch_tagged_union_vector(const QueueItem & item, const QueueItem & tag_item, const CheckedStructure & cs, const CheckedStructure & tag_cs)
{
auto union_container_identity = static_cast<container_identity *>(cs.identity);
CheckedStructure union_item_cs(union_container_identity->getItemType());
if (union_container_identity->type() != IDTYPE_CONTAINER)
{
// assume pointer container
union_item_cs.identity = wrap_in_pointer(union_item_cs.identity);
}
auto tag_container_identity = static_cast<container_identity *>(tag_cs.identity);
auto tag_container_base = tag_container_identity->getFullName(nullptr);
if (tag_container_base == "vector<void>")
{
auto vec_union = validate_vector_size(item, union_item_cs);
CheckedStructure tag_item_cs(tag_container_identity->getItemType());
auto vec_tag = validate_vector_size(tag_item, tag_item_cs);
if (!vec_union.first || !vec_tag.first)
{
// invalid vectors (already warned)
return;
}
if (!vec_union.second.count && !vec_tag.second.count)
{
// empty vectors
return;
}
if (vec_union.second.count != vec_tag.second.count)
{
FAIL("tagged union vector is " << vec_union.second.count << " elements, but tag vector (accessed as " << tag_item.path << ") is " << vec_tag.second.count << " elements");
}
for (size_t i = 0; i < vec_union.second.count && i < vec_tag.second.count; i++)
{
dispatch_tagged_union(QueueItem(item, i, vec_union.first), QueueItem(tag_item, i, vec_tag.first), union_item_cs, tag_item_cs);
vec_union.first = PTR_ADD(vec_union.first, union_item_cs.identity->byte_size());
vec_tag.first = PTR_ADD(vec_tag.first, tag_item_cs.identity->byte_size());
}
}
else if (tag_container_base == "vector<bool>")
{
// TODO
UNEXPECTED;
}
else
{
UNEXPECTED;
}
}
void Checker::dispatch_untagged_union(const QueueItem & item, const CheckedStructure & cs)
{
// special case for large_integer weirdness
if (cs.identity == df::identity_traits<df::large_integer>::get())
{
// it's 16 bytes on 64-bit linux due to a messy header in libgraphics
// but only the first 8 bytes are ever used
dispatch_primitive(item, CheckedStructure(df::identity_traits<int64_t>::get(), 0, nullptr, cs.inside_structure));
return;
}
UNEXPECTED;
}
void Checker::check_unknown_pointer(const QueueItem & item)
{
const static CheckedStructure cs(nullptr, 0, nullptr, true);
if (auto allocated_size = get_allocated_size(item))
{
FAIL("pointer to a block of " << allocated_size << " bytes of allocated memory");
if (allocated_size >= MIN_SIZE_FOR_SUGGEST && known_types_by_size.count(allocated_size))
{
FAIL("known types of this size: " << join_strings(", ", known_types_by_size.at(allocated_size)));
}
// check recursively if it's the right size for a pointer
// or if it starts with what might be a valid pointer
QueueItem ptr_item(item, "?ptr?", item.ptr);
if (allocated_size == sizeof(void *) || (allocated_size > sizeof(void *) && is_valid_dereference(ptr_item, 1, true)))
{
CheckedStructure ptr_cs(df::identity_traits<void *>::get());
if (queue_item(ptr_item, ptr_cs))
{
queue.pop_back();
dispatch_pointer(ptr_item, ptr_cs);
}
}
}
#ifndef WIN32
else if (auto str = validate_stl_string_pointer(&item.ptr))
{
FAIL("untyped pointer is actually stl-string with value \"" << *str << "\" (length " << str->length() << ")");
}
#endif
else if (auto vtable_name = get_vtable_name(QueueItem(item.path, &item.ptr), cs, true))
{
FAIL("pointer to a vtable: " << vtable_name);
}
else if (sizes)
{
//FAIL("pointer to memory with no size information");
}
}
void Checker::check_stl_vector(const QueueItem & item, type_identity *item_identity, type_identity *eid)
{
auto vec_items = validate_vector_size(item, CheckedStructure(item_identity));
// skip bad pointer vectors
if (item.path.length() > 4 && item.path.substr(item.path.length() - 4) == ".bad" && item_identity->type() == IDTYPE_POINTER)
{
return;
}
if (vec_items.first && vec_items.second.count)
{
QueueItem items_item(item.path, vec_items.first);
queue_item(items_item, vec_items.second);
}
}
void Checker::check_stl_string(const QueueItem & item)
{
const static CheckedStructure cs(df::identity_traits<std::string>::get(), 0, nullptr, true);
#ifdef WIN32
struct string_data
{
union
{
uintptr_t start;
char local_data[16];
};
size_t length;
size_t capacity;
};
#else
struct string_data
{
struct string_data_inner
{
size_t length;
size_t capacity;
int32_t refcount;
} *ptr;
};
#endif
auto string = reinterpret_cast<const string_data *>(item.ptr);
#ifdef WIN32
bool is_local = string->capacity < 16;
const char *start = is_local ? &string->local_data[0] : reinterpret_cast<const char *>(string->start);
ptrdiff_t length = string->length;
ptrdiff_t capacity = string->capacity;
#else
if (!is_valid_dereference(QueueItem(item, "?ptr?", string->ptr), 1))
{
// nullptr is NOT okay here
FAIL("invalid string pointer " << stl_sprintf("%p", string->ptr));
return;
}
if (!is_valid_dereference(QueueItem(item, "?hdr?", string->ptr - 1), sizeof(*string->ptr)))
{
return;
}
const char *start = reinterpret_cast<const char *>(string->ptr);
ptrdiff_t length = (string->ptr - 1)->length;
ptrdiff_t capacity = (string->ptr - 1)->capacity;
#endif
if (length < 0)
{
FAIL("string length is negative (" << length << ")");
}
else if (capacity < 0)
{
FAIL("string capacity is negative (" << capacity << ")");
}
else if (capacity < length)
{
FAIL("string capacity (" << capacity << ") is less than length (" << length << ")");
}
#ifndef WIN32
const std::string empty_string;
auto empty_string_data = reinterpret_cast<const string_data *>(&empty_string);
if (sizes && string->ptr != empty_string_data->ptr)
{
size_t allocated_size = get_allocated_size(QueueItem(item, "?hdr?", string->ptr - 1));
size_t expected_size = sizeof(*string->ptr) + capacity + 1;
if (!allocated_size)
{
FAIL("pointer does not appear to be a string");
}
else if (allocated_size != expected_size)
{
FAIL("allocated string data size (" << allocated_size << ") does not match expected size (" << expected_size << ")");
}
}
#endif
}
void Checker::check_possible_pointer(const QueueItem & item, const CheckedStructure & cs)
{
if (sizes && maybepointer && uintptr_t(item.ptr) % sizeof(void *) == 0)
{
auto ptr = validate_and_dereference<const void *>(item, true);
QueueItem ptr_item(item, "?maybe_pointer?", ptr);
if (ptr && is_valid_dereference(ptr_item, 1, true))
{
check_unknown_pointer(ptr_item);
}
}
}