From 084d28b0aedc93b2b4d7880a9f3bfbd0236102c9 Mon Sep 17 00:00:00 2001 From: 20k Date: Wed, 25 Jan 2023 18:40:27 +0000 Subject: [PATCH 1/6] Reworked heap debugging + tools implementation --- docs/dev/Lua API.rst | 48 +++++++++++++ library/LuaApi.cpp | 165 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) diff --git a/docs/dev/Lua API.rst b/docs/dev/Lua API.rst index 9f2386660..b5e66ef8b 100644 --- a/docs/dev/Lua API.rst +++ b/docs/dev/Lua API.rst @@ -2768,6 +2768,54 @@ and are only documented here for completeness: Returns a numeric identifier of the current thread. +* ``dfhack.internal.msizeAddress(address)`` + + Returns the allocation size of an address. + Does not require a heap snapshot. This function will crash on an invalid pointer. + Windows only. + +* ``dfhack.internal.getHeapState()`` + + Returns the state of the heap. 0 == ok or empty, 1 == heap bad ptr, 2 == heap bad begin, 3 == heap bad node. + Does not require a heap snapshot. This may be unsafe to use directly from lua if the heap is corrupt. + Windows only. + +* ``dfhack.internal.heapTakeSnapshot()`` + + Clears any existing heap snapshot, and takes an internal heap snapshot for later consumption. + Windows only. + Returns the same values as getHeapState() + +* ``dfhack.internal.isAddressInHeap(address)`` + + Checks if an address is a member of the heap. It may be dangling. + Requires a heap snapshot. + +* ``dfhack.internal.isAddressActiveInHeap(address)`` + + Checks if an address is a member of the heap, and actively in use (ie valid). + Requires a heap snapshot. + +* ``dfhack.internal.isAddressUsedAfterFreeInHeap(address)`` + + Checks if an address is a member of the heap, but is not currently allocated (ie use after free). + Requires a heap snapshot. + Note that Windows eagerly removes freed pointers from the heap, so this is unlikely to trigger. + +* ``dfhack.internal.getAddressSizeInHeap(address)`` + + Gets the allocated size of a member of the heap. Useful for detecting misaligns, as this does not return block size. + Requires a heap snapshot. + +* ``dfhack.internal.getRootAddressOfHeapObject(address)`` + + Gets the base heap allocation address of a address that lies internally within a piece of allocated memory. + Eg, if you have a heap allocated struct and call this function on the address of the second member, + it will return the address of the struct. + Returns 0 if the address is not found. + Requires a heap snapshot. + + .. _lua-core-context: Core interpreter context diff --git a/library/LuaApi.cpp b/library/LuaApi.cpp index c9bdc3021..babf2f7c0 100644 --- a/library/LuaApi.cpp +++ b/library/LuaApi.cpp @@ -2833,12 +2833,177 @@ static int8_t getModstate() { return Core::getInstance().getModstate(); } static std::string internal_strerror(int n) { return strerror(n); } static std::string internal_md5(std::string s) { return md5_wrap.getHashFromString(s); } +struct heap_pointer_info +{ + size_t size = 0; + int status = 0; +}; + +static std::map snapshot; + +static int heap_take_snapshot() +{ + #ifdef _WIN32 + snapshot.clear(); + + std::vector> entries; + //heap allocating while iterating the heap is suboptimal + entries.reserve(256*1024*1024); + + _HEAPINFO hinfo; + int heapstatus; + int numLoops; + hinfo._pentry = NULL; + numLoops = 0; + while((heapstatus = _heapwalk(&hinfo)) == _HEAPOK && + numLoops < 1024*1024*1024) + { + heap_pointer_info inf; + inf.size = hinfo._size; + inf.status = hinfo._useflag; //0 == _FREEENTRY, 1 == _USEDENTRY + + entries.push_back({hinfo._pentry, inf}); + + numLoops++; + } + + for (auto i : entries) + { + uintptr_t val = 0; + memcpy(&val, &i.first, sizeof(void*)); + snapshot[val] = i.second; + } + + if (heapstatus == _HEAPEMPTY || heapstatus == _HEAPEND) + return 0; + + if (heapstatus == _HEAPBADPTR) + return 1; + + if (heapstatus == _HEAPBADBEGIN) + return 2; + + if (heapstatus == _HEAPBADNODE) + return 3; + #endif + + return 0; +} + +static void* address_to_pointer(uintptr_t ptr) +{ + void* as_ptr = nullptr; + memcpy((void*)&as_ptr, &ptr, sizeof(uintptr_t)); + + return as_ptr; +} + +//this function probably should not allocate. Then again we're shimming through lua which.... probably does +static int get_heap_state() +{ + #ifdef _WIN32 + int heapstatus = _heapchk(); + + if (heapstatus == _HEAPEMPTY || heapstatus == _HEAPOK) + return 0; + + if (heapstatus == _HEAPBADPTR) + return 1; + + if (heapstatus == _HEAPBADBEGIN) + return 2; + + if (heapstatus == _HEAPBADNODE) + return 3; + #endif + + return 0; +} + +static bool is_address_in_heap(uintptr_t ptr) +{ + return snapshot.find(ptr) != snapshot.end(); +} + +static bool is_address_active_in_heap(uintptr_t ptr) +{ + auto it = snapshot.find(ptr); + + if (it == snapshot.end()) + return false; + + return it->second.status == 1; +} + +static bool is_address_used_after_free_in_heap(uintptr_t ptr) +{ + auto it = snapshot.find(ptr); + + if (it == snapshot.end()) + return false; + + return it->second.status != 1; +} + +static int get_address_size_in_heap(uintptr_t ptr) +{ + auto it = snapshot.find(ptr); + + if (it == snapshot.end()) + return -1; + + return it->second.size; +} + +//eg if I have a struct, does any address lie within the struct? +static uintptr_t get_root_address_of_heap_object(uintptr_t ptr) +{ + //find the first element strictly greater than our pointer + auto it = snapshot.upper_bound(ptr); + + //if we're at the start of the snapshot, no elements are less than our pointer + //therefore it is invalid + if (it == snapshot.begin()) + return 0; + + //get the first element less than or equal to ours + it--; + + //our pointer is only valid if we lie in the first pointer lower in memory than it + if (ptr >= it->first && ptr < it->first + it->second.size) + return it->first; + + return 0; +} + +//msize crashes if you pass an invalid pointer to it, only use it if you *know* the thing you're looking at +//is in the heap/valid +static int msize_address(uintptr_t ptr) +{ + void* vptr = address_to_pointer(ptr); + + #ifdef _WIN32 + if (vptr) + return _msize(vptr); + #endif + + return -1; +} + static const LuaWrapper::FunctionReg dfhack_internal_module[] = { WRAP(getImageBase), WRAP(getRebaseDelta), WRAP(getModstate), WRAPN(strerror, internal_strerror), WRAPN(md5, internal_md5), + WRAPN(heapTakeSnapshot, heap_take_snapshot), + WRAPN(getHeapState, get_heap_state), + WRAPN(isAddressInHeap, is_address_in_heap), + WRAPN(isAddressActiveInHeap, is_address_active_in_heap), + WRAPN(isAddressUsedAfterFreeInHeap, is_address_used_after_free_in_heap), + WRAPN(getAddressSizeInHeap, get_address_size_in_heap), + WRAPN(getRootAddressOfHeapObject, get_root_address_of_heap_object), + WRAPN(msizeAddress, msize_address), { NULL, NULL } }; From 5cc6293407f68302113c1737ef8bc07074135e2c Mon Sep 17 00:00:00 2001 From: 20k Date: Fri, 27 Jan 2023 06:04:55 +0000 Subject: [PATCH 2/6] fix unused variable on linux --- library/LuaApi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/LuaApi.cpp b/library/LuaApi.cpp index babf2f7c0..04f771928 100644 --- a/library/LuaApi.cpp +++ b/library/LuaApi.cpp @@ -2980,9 +2980,9 @@ static uintptr_t get_root_address_of_heap_object(uintptr_t ptr) //is in the heap/valid static int msize_address(uintptr_t ptr) { + #ifdef _WIN32 void* vptr = address_to_pointer(ptr); - #ifdef _WIN32 if (vptr) return _msize(vptr); #endif From 5a7debfc777fb8c3bfe2b5fc11dfdec92fd551f3 Mon Sep 17 00:00:00 2001 From: 20k Date: Fri, 27 Jan 2023 06:12:24 +0000 Subject: [PATCH 3/6] cleanup, linux fix --- library/LuaApi.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/library/LuaApi.cpp b/library/LuaApi.cpp index 04f771928..873b0bcd2 100644 --- a/library/LuaApi.cpp +++ b/library/LuaApi.cpp @@ -2869,8 +2869,7 @@ static int heap_take_snapshot() for (auto i : entries) { - uintptr_t val = 0; - memcpy(&val, &i.first, sizeof(void*)); + uintptr_t val = reinterpret_cast(i.first); snapshot[val] = i.second; } @@ -2890,14 +2889,6 @@ static int heap_take_snapshot() return 0; } -static void* address_to_pointer(uintptr_t ptr) -{ - void* as_ptr = nullptr; - memcpy((void*)&as_ptr, &ptr, sizeof(uintptr_t)); - - return as_ptr; -} - //this function probably should not allocate. Then again we're shimming through lua which.... probably does static int get_heap_state() { @@ -2981,7 +2972,7 @@ static uintptr_t get_root_address_of_heap_object(uintptr_t ptr) static int msize_address(uintptr_t ptr) { #ifdef _WIN32 - void* vptr = address_to_pointer(ptr); + void* vptr = reinterpret_cast(ptr); if (vptr) return _msize(vptr); From 18160da82e7c7e723affef6b949e0da0c97bcd81 Mon Sep 17 00:00:00 2001 From: 20k Date: Mon, 6 Mar 2023 18:10:43 +0000 Subject: [PATCH 4/6] rework to be allocation free, cleanup --- library/LuaApi.cpp | 102 ++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 42 deletions(-) diff --git a/library/LuaApi.cpp b/library/LuaApi.cpp index 873b0bcd2..5e5fa7ff3 100644 --- a/library/LuaApi.cpp +++ b/library/LuaApi.cpp @@ -2835,76 +2835,91 @@ static std::string internal_md5(std::string s) { return md5_wrap.getHashFromStri struct heap_pointer_info { + size_t address = 0; size_t size = 0; int status = 0; }; -static std::map snapshot; +//fixed sized, sorted +static std::vector heap_data; +//when dfhack upgrades to c++17, this would do well as a std::optional +static std::pair heap_find(uintptr_t address) +{ + auto it = std::lower_bound(heap_data.begin(), heap_data.end(), address, + [](heap_pointer_info t, uintptr_t address) + { + return t.address < address; + }); + + if (it == heap_data.end() || it->address != address) + return {false, heap_pointer_info()}; + + return {true, *it}; +} + +//this function only allocates the first time it is called static int heap_take_snapshot() { #ifdef _WIN32 - snapshot.clear(); + size_t max_entries = 256 * 1024 * 1024 / sizeof(heap_pointer_info); - std::vector> entries; - //heap allocating while iterating the heap is suboptimal - entries.reserve(256*1024*1024); + //clearing the vector is guaranteed not to deallocate the memory + heap_data.clear(); + heap_data.reserve(max_entries); _HEAPINFO hinfo; - int heapstatus; - int numLoops; - hinfo._pentry = NULL; - numLoops = 0; - while((heapstatus = _heapwalk(&hinfo)) == _HEAPOK && - numLoops < 1024*1024*1024) + hinfo._pentry = nullptr; + int heap_status = 0; + + while ((heap_status = _heapwalk(&hinfo)) == _HEAPOK && heap_data.size() < max_entries) { heap_pointer_info inf; + inf.address = reinterpret_cast(hinfo._pentry); inf.size = hinfo._size; inf.status = hinfo._useflag; //0 == _FREEENTRY, 1 == _USEDENTRY - entries.push_back({hinfo._pentry, inf}); - - numLoops++; + heap_data.push_back(inf); } - for (auto i : entries) + //sort by address + std::sort(heap_data.begin(), heap_data.end(), + [](heap_pointer_info t1, heap_pointer_info t2) { - uintptr_t val = reinterpret_cast(i.first); - snapshot[val] = i.second; - } + return t1.address < t2.address; + }); - if (heapstatus == _HEAPEMPTY || heapstatus == _HEAPEND) + if (heap_status == _HEAPEMPTY || heap_status == _HEAPEND) return 0; - if (heapstatus == _HEAPBADPTR) + if (heap_status == _HEAPBADPTR) return 1; - if (heapstatus == _HEAPBADBEGIN) + if (heap_status == _HEAPBADBEGIN) return 2; - if (heapstatus == _HEAPBADNODE) + if (heap_status == _HEAPBADNODE) return 3; #endif return 0; } -//this function probably should not allocate. Then again we're shimming through lua which.... probably does static int get_heap_state() { #ifdef _WIN32 - int heapstatus = _heapchk(); + int heap_status = _heapchk(); - if (heapstatus == _HEAPEMPTY || heapstatus == _HEAPOK) + if (heap_status == _HEAPEMPTY || heap_status == _HEAPOK) return 0; - if (heapstatus == _HEAPBADPTR) + if (heap_status == _HEAPBADPTR) return 1; - if (heapstatus == _HEAPBADBEGIN) + if (heap_status == _HEAPBADBEGIN) return 2; - if (heapstatus == _HEAPBADNODE) + if (heap_status == _HEAPBADNODE) return 3; #endif @@ -2913,56 +2928,59 @@ static int get_heap_state() static bool is_address_in_heap(uintptr_t ptr) { - return snapshot.find(ptr) != snapshot.end(); + return heap_find(ptr).first; } static bool is_address_active_in_heap(uintptr_t ptr) { - auto it = snapshot.find(ptr); + std::pair inf = heap_find(ptr); - if (it == snapshot.end()) + if (!inf.first) return false; - return it->second.status == 1; + return inf.second.status == 1; } static bool is_address_used_after_free_in_heap(uintptr_t ptr) { - auto it = snapshot.find(ptr); + std::pair inf = heap_find(ptr); - if (it == snapshot.end()) + if (!inf.first) return false; - return it->second.status != 1; + return inf.second.status != 1; } static int get_address_size_in_heap(uintptr_t ptr) { - auto it = snapshot.find(ptr); + std::pair inf = heap_find(ptr); - if (it == snapshot.end()) + if (!inf.first) return -1; - return it->second.size; + return inf.second.size; } //eg if I have a struct, does any address lie within the struct? static uintptr_t get_root_address_of_heap_object(uintptr_t ptr) { //find the first element strictly greater than our pointer - auto it = snapshot.upper_bound(ptr); + auto it = std::upper_bound(heap_data.begin(), heap_data.end(), ptr, [](uintptr_t ptr, heap_pointer_info t1) + { + return ptr < t1.address; + }); //if we're at the start of the snapshot, no elements are less than our pointer //therefore it is invalid - if (it == snapshot.begin()) + if (it == heap_data.begin()) return 0; //get the first element less than or equal to ours it--; //our pointer is only valid if we lie in the first pointer lower in memory than it - if (ptr >= it->first && ptr < it->first + it->second.size) - return it->first; + if (ptr >= it->address && ptr < it->address + it->size) + return it->address; return 0; } From c782873f868099fab121038ec6bf8999604d7b5d Mon Sep 17 00:00:00 2001 From: Myk Date: Sat, 20 May 2023 04:58:08 -0700 Subject: [PATCH 5/6] Update changelog.txt --- docs/changelog.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelog.txt b/docs/changelog.txt index 60d858551..fdd9ca795 100644 --- a/docs/changelog.txt +++ b/docs/changelog.txt @@ -64,6 +64,9 @@ changelog.txt uses a syntax similar to RST, with a few special sequences: ## API +## Internal +- ``dfhack.internal``: added memory analysis functions: ``msizeAddress``, ``getHeapState``, ``heapTakeSnapshot``, ``isAddressInHeap``, ``isAddressActiveInHeap``, ``isAddressUsedAfterFreeInHeap``, ``getAddressSizeInHeap``, and ``getRootAddressOfHeapObject`` + ## Lua - ``overlay.reload()``: has been renamed to ``overlay.rescan()`` so as not to conflict with the global ``reload()`` function. If you are developing an overlay, please take note of the new function name for reloading your overlay during development. - ``gui``: changed frame naming scheme to ``FRAME_X`` rather than ``X_FRAME``, and added aliases for backwards compatibility. (for example ``BOLD_FRAME`` is now called ``FRAME_BOLD``) From 8d73385aaf5babc6a47b8c0ab0b1539d4fe77ed7 Mon Sep 17 00:00:00 2001 From: Myk Date: Sat, 20 May 2023 05:01:07 -0700 Subject: [PATCH 6/6] Update changelog.txt --- docs/changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.txt b/docs/changelog.txt index fdd9ca795..2e5b482f0 100644 --- a/docs/changelog.txt +++ b/docs/changelog.txt @@ -64,7 +64,7 @@ changelog.txt uses a syntax similar to RST, with a few special sequences: ## API -## Internal +## Internals - ``dfhack.internal``: added memory analysis functions: ``msizeAddress``, ``getHeapState``, ``heapTakeSnapshot``, ``isAddressInHeap``, ``isAddressActiveInHeap``, ``isAddressUsedAfterFreeInHeap``, ``getAddressSizeInHeap``, and ``getRootAddressOfHeapObject`` ## Lua