diff --git a/docs/changelog.txt b/docs/changelog.txt index 60d858551..2e5b482f0 100644 --- a/docs/changelog.txt +++ b/docs/changelog.txt @@ -64,6 +64,9 @@ changelog.txt uses a syntax similar to RST, with a few special sequences: ## API +## Internals +- ``dfhack.internal``: added memory analysis functions: ``msizeAddress``, ``getHeapState``, ``heapTakeSnapshot``, ``isAddressInHeap``, ``isAddressActiveInHeap``, ``isAddressUsedAfterFreeInHeap``, ``getAddressSizeInHeap``, and ``getRootAddressOfHeapObject`` + ## Lua - ``overlay.reload()``: has been renamed to ``overlay.rescan()`` so as not to conflict with the global ``reload()`` function. If you are developing an overlay, please take note of the new function name for reloading your overlay during development. - ``gui``: changed frame naming scheme to ``FRAME_X`` rather than ``X_FRAME``, and added aliases for backwards compatibility. (for example ``BOLD_FRAME`` is now called ``FRAME_BOLD``) diff --git a/docs/dev/Lua API.rst b/docs/dev/Lua API.rst index 52ca4d935..1c1d75358 100644 --- a/docs/dev/Lua API.rst +++ b/docs/dev/Lua API.rst @@ -2768,6 +2768,54 @@ and are only documented here for completeness: Returns a numeric identifier of the current thread. +* ``dfhack.internal.msizeAddress(address)`` + + Returns the allocation size of an address. + Does not require a heap snapshot. This function will crash on an invalid pointer. + Windows only. + +* ``dfhack.internal.getHeapState()`` + + Returns the state of the heap. 0 == ok or empty, 1 == heap bad ptr, 2 == heap bad begin, 3 == heap bad node. + Does not require a heap snapshot. This may be unsafe to use directly from lua if the heap is corrupt. + Windows only. + +* ``dfhack.internal.heapTakeSnapshot()`` + + Clears any existing heap snapshot, and takes an internal heap snapshot for later consumption. + Windows only. + Returns the same values as getHeapState() + +* ``dfhack.internal.isAddressInHeap(address)`` + + Checks if an address is a member of the heap. It may be dangling. + Requires a heap snapshot. + +* ``dfhack.internal.isAddressActiveInHeap(address)`` + + Checks if an address is a member of the heap, and actively in use (ie valid). + Requires a heap snapshot. + +* ``dfhack.internal.isAddressUsedAfterFreeInHeap(address)`` + + Checks if an address is a member of the heap, but is not currently allocated (ie use after free). + Requires a heap snapshot. + Note that Windows eagerly removes freed pointers from the heap, so this is unlikely to trigger. + +* ``dfhack.internal.getAddressSizeInHeap(address)`` + + Gets the allocated size of a member of the heap. Useful for detecting misaligns, as this does not return block size. + Requires a heap snapshot. + +* ``dfhack.internal.getRootAddressOfHeapObject(address)`` + + Gets the base heap allocation address of a address that lies internally within a piece of allocated memory. + Eg, if you have a heap allocated struct and call this function on the address of the second member, + it will return the address of the struct. + Returns 0 if the address is not found. + Requires a heap snapshot. + + .. _lua-core-context: Core interpreter context diff --git a/library/LuaApi.cpp b/library/LuaApi.cpp index 08903c7bd..0a737875a 100644 --- a/library/LuaApi.cpp +++ b/library/LuaApi.cpp @@ -2830,12 +2830,186 @@ static int8_t getModstate() { return Core::getInstance().getModstate(); } static std::string internal_strerror(int n) { return strerror(n); } static std::string internal_md5(std::string s) { return md5_wrap.getHashFromString(s); } +struct heap_pointer_info +{ + size_t address = 0; + size_t size = 0; + int status = 0; +}; + +//fixed sized, sorted +static std::vector heap_data; + +//when dfhack upgrades to c++17, this would do well as a std::optional +static std::pair heap_find(uintptr_t address) +{ + auto it = std::lower_bound(heap_data.begin(), heap_data.end(), address, + [](heap_pointer_info t, uintptr_t address) + { + return t.address < address; + }); + + if (it == heap_data.end() || it->address != address) + return {false, heap_pointer_info()}; + + return {true, *it}; +} + +//this function only allocates the first time it is called +static int heap_take_snapshot() +{ + #ifdef _WIN32 + size_t max_entries = 256 * 1024 * 1024 / sizeof(heap_pointer_info); + + //clearing the vector is guaranteed not to deallocate the memory + heap_data.clear(); + heap_data.reserve(max_entries); + + _HEAPINFO hinfo; + hinfo._pentry = nullptr; + int heap_status = 0; + + while ((heap_status = _heapwalk(&hinfo)) == _HEAPOK && heap_data.size() < max_entries) + { + heap_pointer_info inf; + inf.address = reinterpret_cast(hinfo._pentry); + inf.size = hinfo._size; + inf.status = hinfo._useflag; //0 == _FREEENTRY, 1 == _USEDENTRY + + heap_data.push_back(inf); + } + + //sort by address + std::sort(heap_data.begin(), heap_data.end(), + [](heap_pointer_info t1, heap_pointer_info t2) + { + return t1.address < t2.address; + }); + + if (heap_status == _HEAPEMPTY || heap_status == _HEAPEND) + return 0; + + if (heap_status == _HEAPBADPTR) + return 1; + + if (heap_status == _HEAPBADBEGIN) + return 2; + + if (heap_status == _HEAPBADNODE) + return 3; + #endif + + return 0; +} + +static int get_heap_state() +{ + #ifdef _WIN32 + int heap_status = _heapchk(); + + if (heap_status == _HEAPEMPTY || heap_status == _HEAPOK) + return 0; + + if (heap_status == _HEAPBADPTR) + return 1; + + if (heap_status == _HEAPBADBEGIN) + return 2; + + if (heap_status == _HEAPBADNODE) + return 3; + #endif + + return 0; +} + +static bool is_address_in_heap(uintptr_t ptr) +{ + return heap_find(ptr).first; +} + +static bool is_address_active_in_heap(uintptr_t ptr) +{ + std::pair inf = heap_find(ptr); + + if (!inf.first) + return false; + + return inf.second.status == 1; +} + +static bool is_address_used_after_free_in_heap(uintptr_t ptr) +{ + std::pair inf = heap_find(ptr); + + if (!inf.first) + return false; + + return inf.second.status != 1; +} + +static int get_address_size_in_heap(uintptr_t ptr) +{ + std::pair inf = heap_find(ptr); + + if (!inf.first) + return -1; + + return inf.second.size; +} + +//eg if I have a struct, does any address lie within the struct? +static uintptr_t get_root_address_of_heap_object(uintptr_t ptr) +{ + //find the first element strictly greater than our pointer + auto it = std::upper_bound(heap_data.begin(), heap_data.end(), ptr, [](uintptr_t ptr, heap_pointer_info t1) + { + return ptr < t1.address; + }); + + //if we're at the start of the snapshot, no elements are less than our pointer + //therefore it is invalid + if (it == heap_data.begin()) + return 0; + + //get the first element less than or equal to ours + it--; + + //our pointer is only valid if we lie in the first pointer lower in memory than it + if (ptr >= it->address && ptr < it->address + it->size) + return it->address; + + return 0; +} + +//msize crashes if you pass an invalid pointer to it, only use it if you *know* the thing you're looking at +//is in the heap/valid +static int msize_address(uintptr_t ptr) +{ + #ifdef _WIN32 + void* vptr = reinterpret_cast(ptr); + + if (vptr) + return _msize(vptr); + #endif + + return -1; +} + static const LuaWrapper::FunctionReg dfhack_internal_module[] = { WRAP(getImageBase), WRAP(getRebaseDelta), WRAP(getModstate), WRAPN(strerror, internal_strerror), WRAPN(md5, internal_md5), + WRAPN(heapTakeSnapshot, heap_take_snapshot), + WRAPN(getHeapState, get_heap_state), + WRAPN(isAddressInHeap, is_address_in_heap), + WRAPN(isAddressActiveInHeap, is_address_active_in_heap), + WRAPN(isAddressUsedAfterFreeInHeap, is_address_used_after_free_in_heap), + WRAPN(getAddressSizeInHeap, get_address_size_in_heap), + WRAPN(getRootAddressOfHeapObject, get_root_address_of_heap_object), + WRAPN(msizeAddress, msize_address), { NULL, NULL } };