#!/usr/bin/perl use strict; use warnings; use XML::LibXML; our @lines_rb; my @lines_cpp; my @include_cpp; my %offsets; sub indent_rb(&) { my ($sub) = @_; my @lines; { local @lines_rb; $sub->(); @lines = map { " " . $_ } @lines_rb; } push @lines_rb, @lines } sub rb_ucase { my ($name) = @_; return $name if ($name eq uc($name)); return join("", map { ucfirst $_ } (split('_', $name))); } my %item_renderer = ( 'global' => \&render_item_global, 'number' => \&render_item_number, 'container' => \&render_item_container, 'compound' => \&render_item_compound, 'pointer' => \&render_item_pointer, 'static-array' => \&render_item_staticarray, 'primitive' => \&render_item_primitive, 'bytes' => \&render_item_bytes, ); my %global_types; sub render_global_enum { my ($name, $type) = @_; my $rbname = rb_ucase($name); push @lines_rb, "class $rbname < MemHack::Enum"; indent_rb { render_enum_fields($type); }; push @lines_rb, "end\n"; } sub render_enum_fields { my ($type) = @_; my $value = -1; push @lines_rb, "ENUM = Hash.new"; push @lines_rb, "NUME = Hash.new"; my %attr_type; my %attr_list; for my $attr ($type->findnodes('child::enum-attr')) { my $rbattr = rb_ucase($attr->getAttribute('name')); my $typeattr = $attr->getAttribute('type-name'); # find how we need to encode the attribute values: string, symbol (for enums), raw (number, bool) if ($typeattr) { if ($global_types{$typeattr}) { $attr_type{$rbattr} = 'symbol'; } else { $attr_type{$rbattr} = 'naked'; } } else { $attr_type{$rbattr} = 'quote'; } my $def = $attr->getAttribute('default-value'); if ($attr->getAttribute('is-list')) { push @lines_rb, "$rbattr = Hash.new { |h, k| h[k] = [] }"; $attr_list{$rbattr} = 1; } elsif ($def) { $def = ":$def" if ($attr_type{$rbattr} eq 'symbol'); $def =~ s/'/\\'/g if ($attr_type{$rbattr} eq 'quote'); $def = "'$def'" if ($attr_type{$rbattr} eq 'quote'); push @lines_rb, "$rbattr = Hash.new($def)"; } else { push @lines_rb, "$rbattr = Hash.new"; } } for my $item ($type->findnodes('child::enum-item')) { $value = $item->getAttribute('value') || ($value+1); my $elemname = $item->getAttribute('name'); # || "unk_$value"; if ($elemname) { my $rbelemname = rb_ucase($elemname); push @lines_rb, "ENUM[$value] = :$rbelemname ; NUME[:$rbelemname] = $value"; for my $iattr ($item->findnodes('child::item-attr')) { my $ian = $iattr->getAttribute('name'); my $iav = $iattr->getAttribute('value'); my $rbattr = rb_ucase($ian); my $op = ($attr_list{$rbattr} ? '<<' : '='); $iav = ":$iav" if ($attr_type{$rbattr} eq 'symbol'); $iav =~ s/'/\\'/g if ($attr_type{$rbattr} eq 'quote'); $iav = "'$iav'" if ($attr_type{$rbattr} eq 'quote'); $lines_rb[$#lines_rb] .= " ; ${rbattr}[:$rbelemname] $op $iav"; } } } } sub render_global_bitfield { my ($name, $type) = @_; push @lines_cpp, "}" if @include_cpp; push @lines_cpp, "void cpp_$name(FILE *fout) {"; push @include_cpp, $name; my $rbname = rb_ucase($name); push @lines_rb, "class $rbname < MemHack::Compound"; indent_rb { render_bitfield_fields($type); }; push @lines_rb, "end\n"; } sub render_bitfield_fields { my ($type) = @_; push @lines_rb, "field(:_whole, 0) {"; indent_rb { render_item_number($type, ''); }; push @lines_rb, "}"; my $shift = 0; for my $field ($type->findnodes('child::ld:field')) { my $count = $field->getAttribute('count') || 1; my $name = $field->getAttribute('name'); my $type = $field->getAttribute('type-name'); my $enum = rb_ucase($type) if ($type and $global_types{$type}); $name = $field->getAttribute('ld:anon-name') if (!$name); print "bitfield $name !number\n" if (!($field->getAttribute('ld:meta') eq 'number')); if ($count == 1) { push @lines_rb, "field(:$name, 0) { bit $shift }" if ($name); } elsif ($enum) { push @lines_rb, "field(:$name, 0) { bits $shift, $count, $enum }" if ($name); } else { push @lines_rb, "field(:$name, 0) { bits $shift, $count }" if ($name); } $shift += $count; } } sub render_global_struct { my ($name, $type) = @_; my $rbname = rb_ucase($name); my $cppns = "df::$name"; push @lines_cpp, "}" if @include_cpp; push @lines_cpp, "void cpp_$name(FILE *fout) {"; push @include_cpp, $name; push @lines_rb, "class $rbname < MemHack::Compound"; indent_rb { my $sz = query_cpp("sizeof($cppns)"); push @lines_rb, "sizeof $sz"; render_struct_fields($type, "$cppns"); }; push @lines_rb, "end\n"; } my %seen_class; sub render_global_class { my ($name, $type) = @_; my $rbname = rb_ucase($name); # ensure pre-definition of ancestors my $parent = $type->getAttribute('inherits-from'); render_global_class($parent, $global_types{$parent}) if ($parent and !$seen_class{$parent}); return if $seen_class{$name}; $seen_class{$name}++; my $rtti_name; if ($type->getAttribute('ld:meta') eq 'class-type') { $rtti_name = $type->getAttribute('original-name') || $type->getAttribute('type-name') || $name; } my $rbparent = ($parent ? rb_ucase($parent) : 'MemHack::Compound'); my $cppns = "df::$name"; push @lines_cpp, "}" if @include_cpp; push @lines_cpp, "void cpp_$name(FILE *fout) {"; push @include_cpp, $name; push @lines_rb, "class $rbname < $rbparent"; indent_rb { my $sz = query_cpp("sizeof($cppns)"); push @lines_rb, "sizeof $sz"; push @lines_rb, "rtti_classname :$rtti_name" if $rtti_name; render_struct_fields($type, "$cppns"); my $vms = $type->findnodes('child::virtual-methods')->[0]; render_class_vmethods($vms) if $vms; }; push @lines_rb, "end\n"; } sub render_struct_fields { my ($type, $cppns) = @_; for my $field ($type->findnodes('child::ld:field')) { my $name = $field->getAttribute('name'); $name = $field->getAttribute('ld:anon-name') if (!$name); if (!$name and $field->getAttribute('ld:anon-compound')) { render_struct_fields($field, $cppns); } next if (!$name); my $offset = get_offset($cppns, $name); push @lines_rb, "field(:$name, $offset) {"; indent_rb { render_item($field, "$cppns"); }; push @lines_rb, "}"; } } sub render_class_vmethods { my ($vms) = @_; my $voff = 0; for my $meth ($vms->findnodes('child::vmethod')) { my $name = $meth->getAttribute('name'); if ($name) { my @argnames; my @argargs; for my $arg ($meth->findnodes('child::ld:field')) { my $nr = $#argnames + 1; my $argname = lcfirst($arg->getAttribute('name') || "arg$nr"); push @argnames, $argname; if ($arg->getAttribute('ld:meta') eq 'global' and $arg->getAttribute('ld:subtype') eq 'enum') { push @argargs, rb_ucase($arg->getAttribute('type-name')) . ".to_i($argname)"; } else { push @argargs, $argname; } } push @lines_rb, "def $name(" . join(', ', @argnames) . ')'; indent_rb { my $args = join('', map { ", $_" } @argargs); my $call = "DFHack.vmethod_call(self, $voff$args)"; my $ret = $meth->findnodes('child::ret-type')->[0]; render_class_vmethod_ret($call, $ret); }; push @lines_rb, 'end'; } # on linux, the destructor uses 2 entries $voff += 4 if $meth->getAttribute('is-destructor') and $^O =~ /linux/i; $voff += 4; } } sub render_class_vmethod_ret { my ($call, $ret) = @_; if (!$ret) { push @lines_rb, "$call ; nil"; return; } my $retmeta = $ret->getAttribute('ld:meta') || ''; if ($retmeta eq 'global') { # enum my $retname = $ret->getAttribute('type-name'); if ($retname and $global_types{$retname} and $global_types{$retname}->getAttribute('ld:meta') eq 'enum-type') { push @lines_rb, rb_ucase($retname) . ".to_sym($call)"; } else { print "vmethod global nonenum $call\n"; push @lines_rb, $call; } } elsif ($retmeta eq 'number') { my $retsubtype = $ret->getAttribute('ld:subtype'); my $retbits = $ret->getAttribute('ld:bits'); push @lines_rb, "val = $call"; if ($retsubtype eq 'bool') { push @lines_rb, "(val & 1) != 0"; } elsif ($ret->getAttribute('ld:unsigned')) { push @lines_rb, "val & ((1 << $retbits) - 1)"; } else { # signed push @lines_rb, "val &= ((1 << $retbits) - 1)"; push @lines_rb, "((val >> ($retbits-1)) & 1) == 0 ? val : val - (1 << $retbits)"; } } elsif ($retmeta eq 'pointer') { push @lines_rb, "ptr = $call"; push @lines_rb, "class << self"; indent_rb { render_item($ret->findnodes('child::ld:item')->[0]); }; push @lines_rb, "end._at(ptr) if ptr != 0"; } else { print "vmethod unkret $call\n"; push @lines_rb, $call; } } sub render_global_objects { my (@objects) = @_; my @global_objects; my $sname = 'global_objects'; my $rbname = rb_ucase($sname); push @lines_cpp, "}" if @include_cpp; push @lines_cpp, "void cpp_$sname(FILE *fout) {"; push @include_cpp, $sname; push @lines_rb, "class $rbname < MemHack::Compound"; indent_rb { for my $obj (@objects) { my $oname = $obj->getAttribute('name'); my $addr = "DFHack.get_global_address('$oname')"; push @lines_rb, "addr = $addr"; push @lines_rb, "if addr != 0"; indent_rb { push @lines_rb, "field(:$oname, addr) {"; my $item = $obj->findnodes('child::ld:item')->[0]; indent_rb { render_item($item, 'df::global'); }; push @lines_rb, "}"; }; push @lines_rb, "end"; push @global_objects, $oname; } }; push @lines_rb, "end"; indent_rb { push @lines_rb, "Global = GlobalObjects.new._at(0)"; for my $obj (@global_objects) { push @lines_rb, "def self.$obj ; Global.$obj ; end"; push @lines_rb, "def self.$obj=(v) ; Global.$obj = v ; end"; } }; } sub render_item { my ($item, $pns) = @_; return if (!$item); my $meta = $item->getAttribute('ld:meta'); my $renderer = $item_renderer{$meta}; if ($renderer) { $renderer->($item, $pns); } else { print "no render item $meta\n"; } } sub render_item_global { my ($item, $pns) = @_; my $typename = $item->getAttribute('type-name'); my $subtype = $item->getAttribute('ld:subtype'); if ($subtype and $subtype eq 'enum') { render_item_number($item, $pns); } else { my $rbname = rb_ucase($typename); push @lines_rb, "global :$rbname"; } } sub render_item_number { my ($item, $pns) = @_; my $subtype = $item->getAttribute('ld:subtype'); my $meta = $item->getAttribute('ld:meta'); my $initvalue = $item->getAttribute('init-value'); my $typename = $item->getAttribute('type-name'); undef $typename if ($meta and $meta eq 'bitfield-type'); $typename = rb_ucase($typename) if $typename; $typename = $pns if (!$typename and $subtype and $subtype eq 'enum'); # compound enum $initvalue = 1 if ($initvalue and $initvalue eq 'true'); $initvalue = ":$initvalue" if ($initvalue and $typename and $initvalue =~ /[a-zA-Z]/); $initvalue ||= 'nil' if $typename; $subtype = $item->getAttribute('base-type') if (!$subtype or $subtype eq 'enum' or $subtype eq 'bitfield'); $subtype = 'int32_t' if (!$subtype); if ($subtype eq 'int64_t') { push @lines_rb, 'number 64, true'; } elsif ($subtype eq 'uint32_t') { push @lines_rb, 'number 32, false'; } elsif ($subtype eq 'int32_t') { push @lines_rb, 'number 32, true'; } elsif ($subtype eq 'uint16_t') { push @lines_rb, 'number 16, false'; } elsif ($subtype eq 'int16_t') { push @lines_rb, 'number 16, true'; } elsif ($subtype eq 'uint8_t') { push @lines_rb, 'number 8, false'; } elsif ($subtype eq 'int8_t') { push @lines_rb, 'number 8, false'; } elsif ($subtype eq 'bool') { push @lines_rb, 'number 8, true'; } elsif ($subtype eq 's-float') { push @lines_rb, 'float'; return; } else { print "no render number $subtype\n"; return; } $lines_rb[$#lines_rb] .= ", $initvalue" if ($initvalue); $lines_rb[$#lines_rb] .= ", $typename" if ($typename); } sub render_item_compound { my ($item, $pns) = @_; my $cppns = $pns . '::' . $item->getAttribute('ld:typedef-name'); my $subtype = $item->getAttribute('ld:subtype'); my @namecomponents = split('::', $cppns); shift @namecomponents; my $classname = join('_', map { rb_ucase($_) } @namecomponents); if (!$subtype || $subtype eq 'bitfield') { push @lines_rb, "compound(:$classname) {"; indent_rb { if (!$subtype) { render_struct_fields($item, $cppns); } else { render_bitfield_fields($item); } }; push @lines_rb, "}" } elsif ($subtype eq 'enum') { push @lines_rb, "class ::DFHack::$classname < MemHack::Enum"; indent_rb { # declare constants render_enum_fields($item); }; push @lines_rb, "end\n"; # actual field render_item_number($item, $classname); } else { print "no render compound $subtype\n"; } } sub render_item_container { my ($item, $pns) = @_; my $subtype = $item->getAttribute('ld:subtype'); my $rbmethod = join('_', split('-', $subtype)); my $tg = $item->findnodes('child::ld:item')->[0]; my $indexenum = $item->getAttribute('index-enum'); if ($tg) { if ($rbmethod eq 'df_linked_list') { push @lines_rb, "$rbmethod {"; } else { my $tglen = get_tglen($tg, $pns); push @lines_rb, "$rbmethod($tglen) {"; } indent_rb { render_item($tg, $pns); }; push @lines_rb, "}"; } elsif ($indexenum) { $indexenum = rb_ucase($indexenum); push @lines_rb, "$rbmethod($indexenum)"; } else { push @lines_rb, "$rbmethod"; } } sub render_item_pointer { my ($item, $pns) = @_; my $tg = $item->findnodes('child::ld:item')->[0]; my $ary = $item->getAttribute('is-array'); if ($ary and $ary eq 'true') { my $tglen = get_tglen($tg, $pns); push @lines_rb, "pointer_ary($tglen) {"; } else { push @lines_rb, "pointer {"; } indent_rb { render_item($tg, $pns); }; push @lines_rb, "}"; } sub render_item_staticarray { my ($item, $pns) = @_; my $count = $item->getAttribute('count'); my $tg = $item->findnodes('child::ld:item')->[0]; my $tglen = get_tglen($tg, $pns); my $indexenum = $item->getAttribute('index-enum'); if ($indexenum) { $indexenum = rb_ucase($indexenum); push @lines_rb, "static_array($count, $tglen, $indexenum) {"; } else { push @lines_rb, "static_array($count, $tglen) {"; } indent_rb { render_item($tg, $pns); }; push @lines_rb, "}"; } sub render_item_primitive { my ($item, $pns) = @_; my $subtype = $item->getAttribute('ld:subtype'); if ($subtype eq 'stl-string') { push @lines_rb, "stl_string"; } else { print "no render primitive $subtype\n"; } } sub render_item_bytes { my ($item, $pns) = @_; my $subtype = $item->getAttribute('ld:subtype'); if ($subtype eq 'padding') { } elsif ($subtype eq 'static-string') { my $size = $item->getAttribute('size'); push @lines_rb, "static_string($size)"; } else { print "no render bytes $subtype\n"; } } sub get_offset { my ($cppns, $fname) = @_; return query_cpp("offsetof($cppns, $fname)"); } sub get_tglen { my ($tg, $cppns) = @_; if (!$tg) { return 'nil'; } my $meta = $tg->getAttribute('ld:meta'); if ($meta eq 'number') { return $tg->getAttribute('ld:bits')/8; } elsif ($meta eq 'pointer') { return 4; } elsif ($meta eq 'container') { my $subtype = $tg->getAttribute('ld:subtype'); if ($subtype eq 'stl-vector') { return query_cpp("sizeof(std::vector)"); } elsif ($subtype eq 'df-linked-list') { return 12; } else { print "cannot tglen container $subtype\n"; } } elsif ($meta eq 'compound') { my $cname = $tg->getAttribute('ld:typedef-name'); return query_cpp("sizeof(${cppns}::$cname)"); } elsif ($meta eq 'static-array') { my $count = $tg->getAttribute('count'); my $ttg = $tg->findnodes('child::ld:item')->[0]; my $ttgl = get_tglen($ttg, $cppns); if ($ttgl =~ /^\d+$/) { return $count * $ttgl; } else { return "$count*$ttgl"; } } elsif ($meta eq 'global') { my $typename = $tg->getAttribute('type-name'); my $subtype = $tg->getAttribute('ld:subtype'); if ($subtype and $subtype eq 'enum') { my $base = $tg->getAttribute('base-type') || 'int32_t'; if ($base eq 'int32_t') { return 4; } elsif ($base eq 'int16_t') { return 2; } elsif ($base eq 'int8_t') { return 1; } else { print "cannot tglen enum $base\n"; } } else { return query_cpp("sizeof(df::$typename)"); } } elsif ($meta eq 'primitive') { my $subtype = $tg->getAttribute('ld:subtype'); if ($subtype eq 'stl-string') { return query_cpp("sizeof(std::string)"); } else { print "cannot tglen primitive $subtype\n"; } } else { print "cannot tglen $meta\n"; } } my %query_cpp_cache; sub query_cpp { my ($query) = @_; my $ans = $offsets{$query}; return $ans if (defined($ans)); my $cached = $query_cpp_cache{$query}; return $cached if (defined($cached)); $query_cpp_cache{$query} = 1; push @lines_cpp, " fprintf(fout, \"%s = %d\\n\", \"$query\", $query);"; return "'$query'"; } my $input = $ARGV[0] || '../../library/include/df/codegen.out.xml'; # run once with output = 'ruby-autogen.cpp' # compile # execute, save output to 'ruby-autogen.offsets' # re-run this script with output = 'ruby-autogen.rb' and offsetfile = 'ruby-autogen.offsets' # delete binary # delete offsets my $output = $ARGV[1] or die "need output file"; my $offsetfile = $ARGV[2]; my $memstruct = $ARGV[3]; if ($offsetfile) { open OF, "<$offsetfile"; while (my $line = ) { chomp($line); my ($key, $val) = split(' = ', $line); $offsets{$key} = $val; } close OF; } my $doc = XML::LibXML->new()->parse_file($input); $global_types{$_->getAttribute('type-name')} = $_ foreach $doc->findnodes('/ld:data-definition/ld:global-type'); my @nonenums; for my $name (sort { $a cmp $b } keys %global_types) { my $type = $global_types{$name}; my $meta = $type->getAttribute('ld:meta'); if ($meta eq 'enum-type') { render_global_enum($name, $type); } else { push @nonenums, $name; } } for my $name (@nonenums) { my $type = $global_types{$name}; my $meta = $type->getAttribute('ld:meta'); if ($meta eq 'struct-type') { render_global_struct($name, $type); } elsif ($meta eq 'class-type') { render_global_class($name, $type); } elsif ($meta eq 'bitfield-type') { render_global_bitfield($name, $type); } else { print "no render global type $meta\n"; } } render_global_objects($doc->findnodes('/ld:data-definition/ld:global-object')); open FH, ">$output"; if ($output =~ /\.cpp$/) { print FH "#include \"DataDefs.h\"\n"; print FH "#include \"df/$_.h\"\n" for @include_cpp; print FH "#include \n"; print FH "#include \n"; print FH "$_\n" for @lines_cpp; print FH "}\n"; print FH "int main(int argc, char **argv) {\n"; print FH " FILE *fout;\n"; print FH " if (argc < 2) return 1;\n"; print FH " fout = fopen(argv[1], \"w\");\n"; print FH " cpp_$_(fout);\n" for @include_cpp; print FH " fclose(fout);\n"; print FH " return 0;\n"; print FH "}\n"; } else { if ($memstruct) { open MH, "<$memstruct"; print FH "$_" while(); close MH; } print FH "module DFHack\n"; print FH "$_\n" for @lines_rb; print FH "end\n"; } close FH;