Port profiler to lua 5.3 and dfhack

Changes include
* table.getn(obj) -> #obj
* Making sure string.rep gets an integer parameter
* Optimized profiling hooks (call profiler cost from factor 40 to 10)
* Specialized parameter name lookup code for c++ __index metamod calls
* Collect source lines in time sampling variant
* Simplified prevent to always filter all children
develop
Pauli 2018-06-06 18:12:55 +03:00
parent 96ba5116f7
commit e3df5cd765
1 changed files with 252 additions and 218 deletions

@ -97,11 +97,15 @@ IN THE SOFTWARE.
--]] --]]
local _ENV = mkmodule('profiler')
-- --
-- All profiler related stuff is stored in the top level table '_profiler' -- All profiler related stuff is stored in the top level table '_profiler'
-- --
_profiler = {} local _profiler = {}
local DEFAULT_FILTERED_FUNC = 1
local DEFAULT_MISSING_FUNC = 3
-- --
@ -127,35 +131,11 @@ function newProfiler(variant, sampledelay)
newprof[k] = v newprof[k] = v
end end
newprof.variant = variant newprof.variant = variant
newprof.sampledelay = sampledelay or 100000 newprof.sampledelay = sampledelay or 10000
return newprof return newprof
end end
--
-- This function starts the profiler. It will do nothing
-- if this (or any other) profiler is already running.
--
function _profiler.start(self)
if _profiler.running then
return
end
-- Start the profiler. This begins by setting up internal profiler state
_profiler.running = self
self.rawstats = {}
self.callstack = {}
if self.variant == "time" then
self.lastclock = os.clock()
debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
elseif self.variant == "call" then
debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
else
print("Profiler method must be 'time' or 'call'.")
sys.exit(1)
end
end
-- --
-- This function stops the profiler. It will do nothing -- This function stops the profiler. It will do nothing
-- if a profiler is not running, and nothing if it isn't -- if a profiler is not running, and nothing if it isn't
@ -166,192 +146,212 @@ function _profiler.stop(self)
return return
end end
-- Stop the profiler. -- Stop the profiler.
debug.sethook( nil ) debug.sethook()
_profiler.running = nil _profiler.running = nil
end end
local function get_stats(rawstats, prevented_functions, func, depth)
if prevented_functions[func] then
-- return nil
-- Simple wrapper to handle the hook. You should not
-- be calling this directly. Duplicated to reduce overhead.
--
function _profiler_hook_wrapper_by_call(action)
if _profiler.running == nil then
debug.sethook( nil )
end
_profiler.running:_internal_profile_by_call(action)
end
function _profiler_hook_wrapper_by_time(action)
if _profiler.running == nil then
debug.sethook( nil )
end end
_profiler.running:_internal_profile_by_time(action) local stats = rawstats[func]
if not stats then
depth = depth + 1
ci = debug.getinfo(depth,"fnS")
if ci.what == "C" and ci.name == "__index" then
local value1
_, value1 = debug.getlocal(depth, 1)
value1 = tostring(value1)
value1 = value1:match("^<([^:]*):") or value1
ci.short_src = value1
end
stats = {
func = ci.func,
anon = ci.name == nil,
count = 0,
time = 0,
profile_time = 0,
anon_child_time = 0,
name_child_time = 0,
children = {},
children_time = {},
currentline = {},
func_info = ci,
}
rawstats[func] = stats
end
return stats
end end
-- --
-- This is the main by-function-call function of the profiler and should not -- Simple wrapper to handle the hook. You should not
-- be called except by the hook wrapper -- be calling this directly. Duplicated to reduce overhead.
-- --
function _profiler._internal_profile_by_call(self,action) local function _profiler_hook_wrapper_by_call(action)
-- Since we can obtain the 'function' for the item we've had call us, we local entry = os.clock()
-- can use that... local self = _profiler.running
local caller_info = debug.getinfo( 3 ) local depth, stack = self.depth, self.stack
if caller_info == nil then if action == "call" then
print "No caller_info" local parent = stack[depth]
if parent and parent.should_not_profile > 0 then
parent.should_not_profile = parent.should_not_profile + 1
parent.profile_time = parent.profile_time + (os.clock() - entry)
return return
end end
local ci = debug.getinfo(2,"f")
--SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>")) local func = ci.func
local rawstats = self.rawstats
-- Retrieve the most recent activation record... local stats = get_stats(rawstats, self.prevented_functions, func, 2)
local latest_ar = nil depth = depth + 1
if table.getn(self.callstack) > 0 then self.depth = depth
latest_ar = self.callstack[table.getn(self.callstack)] local this = stack[depth]
if not this then
this = {
should_not_profile = stats and 0 or 1,
stats = stats,
clock_start = entry,
profile_time = 0,
}
stack[depth] = this
else
this.should_not_profile = stats and 0 or 1
this.stats = stats
this.clock_start = entry
end end
this.profile_time = (os.clock() - entry)
-- Are we allowed to profile this function? else
local should_not_profile = 0 if depth == 0 then
for k,v in pairs(self.prevented_functions) do return
if k == caller_info.func then
should_not_profile = v
end end
local this = stack[depth]
if this.should_not_profile > 0 then
this.should_not_profile = this.should_not_profile - 1
if this.should_not_profile == 0 then
self.depth = depth - 1
local parent = stack[self.depth]
if parent then
local time = entry - this.start_time - this.profile_time
parent.stats.anon_child_time = parent.stats.anon_child_time + time
parent.profile_time = parent.profile_time + this.profile_time + (os.clock() - entry)
end end
-- Also check the top activation record... else
if latest_ar then this.profile_time = this.profile_time + (os.clock() - entry)
if latest_ar.should_not_profile == 2 then
should_not_profile = 2
end end
return
end end
local time = entry - this.clock_start - this.profile_time
depth = depth - 1
self.depth = depth
local parent = stack[depth]
-- Now then, are we in 'call' or 'return' ? local this_stats,parent_stats = this.stats,parent and parent.stats or nil
-- print("Profile:", caller_info.name, "SNP:", should_not_profile, local func = this_stats.func
-- "Action:", action )
if action == "call" then this_stats.count = this_stats.count + 1
-- Making a call... this_stats.time = this_stats.time + time
local this_ar = {} this_stats.profile_time = this_stats.profile_time + this.profile_time
this_ar.should_not_profile = should_not_profile if not parent then return end
this_ar.parent_ar = latest_ar if this_stats.anon then
this_ar.anon_child = 0 parent_stats.anon_child_time = parent_stats.anon_child_time + time
this_ar.name_child = 0
this_ar.children = {}
this_ar.children_time = {}
this_ar.clock_start = os.clock()
-- Last thing to do on a call is to insert this onto the ar stack...
table.insert( self.callstack, this_ar )
else
local this_ar = latest_ar
if this_ar == nil then
return -- No point in doing anything if no upper activation record
end
-- Right, calculate the time in this function...
this_ar.clock_end = os.clock()
this_ar.this_time = this_ar.clock_end - this_ar.clock_start
-- Now, if we have a parent, update its call info...
if this_ar.parent_ar then
this_ar.parent_ar.children[caller_info.func] =
(this_ar.parent_ar.children[caller_info.func] or 0) + 1
this_ar.parent_ar.children_time[caller_info.func] =
(this_ar.parent_ar.children_time[caller_info.func] or 0 ) +
this_ar.this_time
if caller_info.name == nil then
this_ar.parent_ar.anon_child =
this_ar.parent_ar.anon_child + this_ar.this_time
else else
this_ar.parent_ar.name_child = parent_stats.name_child_time = parent_stats.name_child_time + time
this_ar.parent_ar.name_child + this_ar.this_time
end
end
-- Now if we're meant to record information about ourselves, do so...
if this_ar.should_not_profile == 0 then
local inforec = self:_get_func_rec(caller_info.func,1)
inforec.count = inforec.count + 1
inforec.time = inforec.time + this_ar.this_time
inforec.anon_child_time = inforec.anon_child_time + this_ar.anon_child
inforec.name_child_time = inforec.name_child_time + this_ar.name_child
inforec.func_info = caller_info
for k,v in pairs(this_ar.children) do
inforec.children[k] = (inforec.children[k] or 0) + v
inforec.children_time[k] =
(inforec.children_time[k] or 0) + this_ar.children_time[k]
end end
local ch = parent_stats.children[func]
if not ch then
parent_stats.children[func] = 1
parent_stats.children_time[func] = time
else
parent_stats.children[func] = ch + 1
parent_stats.children_time[func] = parent_stats.children_time[func] + time
end end
parent.profile_time = parent.profile_time + this.profile_time + (os.clock() - entry)
-- Last thing to do on return is to drop the last activation record...
table.remove( self.callstack, table.getn( self.callstack ) )
end end
end end
local function _profiler_hook_wrapper_by_time()
-- local self = _profiler.running
-- This is the main by-time internal function of the profiler and should not
-- be called except by the hook wrapper
--
function _profiler._internal_profile_by_time(self,action)
-- we do this first so we add the minimum amount of extra time to this call
local timetaken = os.clock() - self.lastclock local timetaken = os.clock() - self.lastclock
local rawstats = self.rawstats
local depth = 3 local prevented = self.prevented_functions
local at_top = true local depth = 2
local last_caller local caller = debug.getinfo(depth,'fl')
local caller = debug.getinfo(depth) local child
if not caller then
return
end
local cf = caller.func
if cf then
child = get_stats(rawstats, prevented, cf, depth)
if child then
child.count = child.count + 1
child.time = child.time + timetaken
local line = caller.currentline
child.currentline[line] = (child.currentline[line] or 0) + 1
else
cf = DEFAULT_FILTERED_FUNC
end
else
cf = DEFAULT_MISSING_FUNC
end
depth = 3
local caller = debug.getinfo(depth,'f')
while caller do while caller do
if not caller.func then caller.func = "(tail call)" end if caller.func then
if self.prevented_functions[caller.func] == nil then local this = get_stats(rawstats, prevented, caller.func, depth)
local info = self:_get_func_rec(caller.func, 1, caller) if this then
info.count = info.count + 1 this.time = this.time + timetaken
info.time = info.time + timetaken if not child or child.anon then
if last_caller then this.anon_child_time = this.anon_child_time + timetaken
-- we're not the head, so update the "children" times also
if last_caller.name then
info.name_child_time = info.name_child_time + timetaken
else else
info.anon_child_time = info.anon_child_time + timetaken this.name_child_time = this.name_child_time + timetaken
end end
info.children[last_caller.func] = local ch = this.children[cf]
(info.children[last_caller.func] or 0) + 1 if ch then
info.children_time[last_caller.func] = this.children[cf] = ch + 1
(info.children_time[last_caller.func] or 0) + timetaken this.children_time[cf] = this.children_time[cf] + timetaken
else
this.children[cf] = 1
this.children_time[cf] = timetaken
end
cf = caller.func
else
cf = DEFAULT_FILTERED_FUNC
end end
child = this
else
cf = DEFAULT_MISSING_FUNC
child = nil
end end
depth = depth + 1 depth = depth + 1
last_caller = caller caller = debug.getinfo(depth, 'f')
caller = debug.getinfo(depth)
end end
self.lastclock = os.clock() self.lastclock = os.clock()
end end
-- --
-- This returns a (possibly empty) function record for -- This function starts the profiler. It will do nothing
-- the specified function. It is for internal profiler use. -- if this (or any other) profiler is already running.
-- --
function _profiler._get_func_rec(self,func,force,info) function _profiler.start(self)
-- Find the function ref for 'func' (if force and not present, create one) if _profiler.running then
local ret = self.rawstats[func] return
if ret == nil and force ~= 1 then end
return nil -- Start the profiler. This begins by setting up internal profiler state
_profiler.running = self
assert(_profiler.running)
self.rawstats = {}
self.stack = {}
self.depth = 0
if self.variant == "time" then
self.lastclock = os.clock()
debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
elseif self.variant == "call" then
debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
else
error("Profiler method must be 'time' or 'call'.")
end end
if ret == nil then
-- Build a new function statistics table
ret = {}
ret.func = func
ret.count = 0
ret.time = 0
ret.anon_child_time = 0
ret.name_child_time = 0
ret.children = {}
ret.children_time = {}
ret.func_info = info
self.rawstats[func] = ret
end
return ret
end end
-- --
-- This writes a profile report to the output file object. If -- This writes a profile report to the output file object. If
-- sort_by_total_time is nil or false the output is sorted by -- sort_by_total_time is nil or false the output is sorted by
@ -375,7 +375,7 @@ function _profiler.report( self, outfile, sort_by_total_time )
terms.single = "call" terms.single = "call"
terms.pastverb = "called" terms.pastverb = "called"
else else
assert(false) error("Profiler method must be 'time' or 'call'.")
end end
local total_time = 0 local total_time = 0
@ -400,37 +400,46 @@ function _profiler.report( self, outfile, sort_by_total_time )
) )
end end
for i=1,table.getn(ordering) do for i=1,#ordering do
local func = ordering[i] local func = ordering[i]
local record = self.rawstats[func] local record = self.rawstats[func]
local thisfuncname = " " .. self:_pretty_name(func) .. " " local thisfuncname = " " .. self:_pretty_name(func) .. " "
if string.len( thisfuncname ) < 42 then if string.len( thisfuncname ) < 42 then
thisfuncname = thisfuncname =
string.rep( "-", (42 - string.len(thisfuncname))/2 ) .. thisfuncname string.rep( "-", math.floor((42 - string.len(thisfuncname))/2) ) .. thisfuncname
thisfuncname = thisfuncname =
thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) ) thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) )
end end
local child_count = 0
for _,v in pairs(record.children) do
child_count = child_count + v
end
total_time = total_time + ( record.time - ( record.anon_child_time + total_time = total_time + ( record.time - ( record.anon_child_time +
record.name_child_time ) ) record.name_child_time ) )
outfile:write( string.rep( "-", 19 ) .. thisfuncname .. outfile:write( string.rep( "-", 19 ) .. thisfuncname ..
string.rep( "-", 19 ) .. "\n" ) string.rep( "-", 19 ) .. "\n" )
outfile:write( terms.capitalized.." count: " .. outfile:write( terms.capitalized.." count: " ..
string.format( "%4d", record.count ) .. "\n" ) string.format( "%5d", record.count ) .. "\n" )
outfile:write( terms.capitalized.." count in children: " ..
string.format( "%5d", child_count ) .. "\n" )
outfile:write( "Time spend total: " .. outfile:write( "Time spend total: " ..
string.format( "%4.3f", record.time ) .. "s\n" ) string.format( "%4.3f", record.time ) .. "s\n" )
outfile:write( "Time spent in children: " .. outfile:write( "Time spent in children: " ..
string.format("%4.3f",record.anon_child_time+record.name_child_time) .. string.format("%4.3f",record.anon_child_time+record.name_child_time) ..
"s\n" ) "s\n" )
outfile:write( "Time spent in profiler: " ..
string.format("%4.3f",record.profile_time) ..
"s\n" )
local timeinself = local timeinself =
record.time - (record.anon_child_time + record.name_child_time) record.time - (record.anon_child_time + record.name_child_time)
outfile:write( "Time spent in self: " .. outfile:write( "Time spent in self: " ..
string.format("%4.3f", timeinself) .. "s\n" ) string.format("%4.3f", timeinself) .. "s\n" )
outfile:write( "Time spent per " .. terms.single .. ": " .. outfile:write( "Time spent per " .. terms.single .. ": " ..
string.format("%4.5f", record.time/record.count) .. string.format("%4.6f", record.time/(record.count+(self.variant == "time" and child_count or 0))) ..
"s/" .. terms.single .. "\n" ) "s/" .. terms.single .. "\n" )
outfile:write( "Time spent in self per "..terms.single..": " .. outfile:write( "Time spent in self per "..terms.single..": " ..
string.format( "%4.5f", timeinself/record.count ) .. "s/" .. string.format( "%4.6f", record.count > 0 and timeinself/record.count or 0.0 ) .. "s/" ..
terms.single.."\n" ) terms.single.."\n" )
-- Report on each child in the form -- Report on each child in the form
@ -444,12 +453,34 @@ function _profiler.report( self, outfile, sort_by_total_time )
outfile:write( "\n" ) -- extra separation line outfile:write( "\n" ) -- extra separation line
added_blank = 1 added_blank = 1
end end
outfile:write( "Child " .. self:_pretty_name(k) .. local pretty_name
string.rep( " ", 41-string.len(self:_pretty_name(k)) ) .. " " .. if k == DEFAULT_FILTERED_FUNC then
pretty_name = "(Filtered function)"
elseif k == DEFAULT_MISSING_FUNC then
pretty_name = "(Function pointer missing)"
else
pretty_name = self:_pretty_name(k)
end
outfile:write( "Child " .. pretty_name ..
string.rep( " ", 41-string.len(pretty_name) ) .. " " ..
terms.pastverb.." " .. string.format("%6d", v) ) terms.pastverb.." " .. string.format("%6d", v) )
outfile:write( " times. Took " .. outfile:write( " times. Took " ..
string.format("%4.2f", record.children_time[k] ) .. "s\n" ) string.format("%4.3f", record.children_time[k] ) .. "s\n" )
end
end
local lines = {}
for line,v in pairs(record.currentline) do
if line >= 0 then
lines[#lines+1] = line
end
end end
table.sort(lines)
for i=1,#lines do
local line = lines[i]
local v = record.currentline[line]
-- @todo How about reading the source code from the file?
outfile:write( ("%6d %s in line %d\n"):format(v, terms.pastverb, line))
end end
outfile:write( "\n" ) -- extra separation line outfile:write( "\n" ) -- extra separation line
@ -476,20 +507,20 @@ function _profiler.lua_report(self,outfile)
local functonum = {} local functonum = {}
for func,record in pairs(self.rawstats) do for func,record in pairs(self.rawstats) do
table.insert(ordering, func) table.insert(ordering, func)
functonum[func] = table.getn(ordering) functonum[func] = #ordering
end end
outfile:write( outfile:write(
"-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" ) "-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
outfile:write( "-- Function names\nfuncnames = {}\n" ) outfile:write( "-- Function names\nfuncnames = {}\n" )
for i=1,table.getn(ordering) do for i=1,#ordering do
local thisfunc = ordering[i] local thisfunc = ordering[i]
outfile:write( "funcnames[" .. i .. "] = " .. outfile:write( "funcnames[" .. i .. "] = " ..
string.format("%q", self:_pretty_name(thisfunc)) .. "\n" ) string.format("%q", self:_pretty_name(thisfunc)) .. "\n" )
end end
outfile:write( "\n" ) outfile:write( "\n" )
outfile:write( "-- Function times\nfunctimes = {}\n" ) outfile:write( "-- Function times\nfunctimes = {}\n" )
for i=1,table.getn(ordering) do for i=1,#ordering do
local thisfunc = ordering[i] local thisfunc = ordering[i]
local record = self.rawstats[thisfunc] local record = self.rawstats[thisfunc]
outfile:write( "functimes[" .. i .. "] = { " ) outfile:write( "functimes[" .. i .. "] = { " )
@ -500,7 +531,7 @@ function _profiler.lua_report(self,outfile)
end end
outfile:write( "\n" ) outfile:write( "\n" )
outfile:write( "-- Child links\nchildren = {}\n" ) outfile:write( "-- Child links\nchildren = {}\n" )
for i=1,table.getn(ordering) do for i=1,#ordering do
local thisfunc = ordering[i] local thisfunc = ordering[i]
local record = self.rawstats[thisfunc] local record = self.rawstats[thisfunc]
outfile:write( "children[" .. i .. "] = { " ) outfile:write( "children[" .. i .. "] = { " )
@ -513,11 +544,11 @@ function _profiler.lua_report(self,outfile)
end end
outfile:write( "\n" ) outfile:write( "\n" )
outfile:write( "-- Child call counts\nchildcounts = {}\n" ) outfile:write( "-- Child call counts\nchildcounts = {}\n" )
for i=1,table.getn(ordering) do for i=1,#ordering do
local thisfunc = ordering[i] local thisfunc = ordering[i]
local record = self.rawstats[thisfunc] local record = self.rawstats[thisfunc]
outfile:write( "children[" .. i .. "] = { " ) outfile:write( "childcounts[" .. i .. "] = { " )
for k,v in record.children do for k,v in pairs(record.children) do
if functonum[k] then -- non-recorded functions will be ignored now if functonum[k] then -- non-recorded functions will be ignored now
outfile:write( v .. ", " ) outfile:write( v .. ", " )
end end
@ -526,10 +557,10 @@ function _profiler.lua_report(self,outfile)
end end
outfile:write( "\n" ) outfile:write( "\n" )
outfile:write( "-- Child call time\nchildtimes = {}\n" ) outfile:write( "-- Child call time\nchildtimes = {}\n" )
for i=1,table.getn(ordering) do for i=1,#ordering do
local thisfunc = ordering[i] local thisfunc = ordering[i]
local record = self.rawstats[thisfunc]; local record = self.rawstats[thisfunc];
outfile:write( "children[" .. i .. "] = { " ) outfile:write( "childtimes[" .. i .. "] = { " )
for k,v in pairs(record.children) do for k,v in pairs(record.children) do
if functonum[k] then -- non-recorded functions will be ignored now if functonum[k] then -- non-recorded functions will be ignored now
outfile:write( record.children_time[k] .. ", " ) outfile:write( record.children_time[k] .. ", " )
@ -560,14 +591,17 @@ function _profiler._pretty_name(self,func)
name = " :" name = " :"
end end
if info.namewhat ~= nil then
name = name .. info.namewhat .. ":"
end
if info.name == nil then if info.name == nil then
name = name .. "<"..tostring(func) .. ">" name = name .. "<"..tostring(func) .. ">"
else else
name = name .. info.name name = name .. info.name
end end
if info.source then if info.short_src then
name = name .. "@" .. info.source name = name .. "@" .. info.short_src
else else
if info.what == "C" then if info.what == "C" then
name = name .. "@?" name = name .. "@?"
@ -588,29 +622,29 @@ end
-- --
-- This allows you to specify functions which you do -- This allows you to specify functions which you do
-- not want profiled. Setting level to 1 keeps the -- not want profiled.
-- function from being profiled. Setting level to 2
-- keeps both the function and its children from
-- being profiled.
-- --
-- BUG: 2 will probably act exactly like 1 in "time" mode. -- BUG: 2 will probably act exactly like 1 in "time" mode.
-- If anyone cares, let me (zorba) know and it can be fixed. -- If anyone cares, let me (zorba) know and it can be fixed.
-- --
function _profiler.prevent(self, func, level) function _profiler.prevent(self, func, enable)
self.prevented_functions[func] = (level or 1) if enable then
self.prevented_functions[func] = true
else
self.prevented_functions[func] = nil
end
end end
_profiler.prevented_functions = { _profiler.prevented_functions = {
[_profiler.start] = 2, [_profiler.start] = true,
[_profiler.stop] = 2, [_profiler.stop] = true,
[_profiler._internal_profile_by_time] = 2, [_profiler_hook_wrapper_by_time] = true,
[_profiler._internal_profile_by_call] = 2, [_profiler_hook_wrapper_by_call] = true,
[_profiler_hook_wrapper_by_time] = 2, [_profiler.prevent] = true,
[_profiler_hook_wrapper_by_call] = 2, [_profiler.report] = true,
[_profiler.prevent] = 2, [_profiler.lua_report] = true,
[_profiler._get_func_rec] = 2, [_profiler._pretty_name] = true
[_profiler.report] = 2,
[_profiler.lua_report] = 2,
[_profiler._pretty_name] = 2
} }
return _ENV