diff --git a/docs/Lua API.rst b/docs/Lua API.rst index 5c73dbbd0..bc1e3b0ee 100644 --- a/docs/Lua API.rst +++ b/docs/Lua API.rst @@ -2859,6 +2859,170 @@ utils A ``prompt_input`` ``checkfun`` that verifies a number input. +argparse +======== + +The ``argparse`` module provides functions to help scripts process commandline +parameters. + +* ``argparse.processArgs(args, validArgs)`` + + A basic commandline processing function with simple syntax, useful if your + script doesn't need the more advanced features of + ``argparse.processArgsGetopt()``. + + If ``validArgs`` is specified, it should contain a set of valid option names + (without the leading dashes). For example:: + + argparse.processArgs(args, utils.invert{'opt1', 'opt2', 'opt3'}) + + ``processArgs`` returns a map of option names it found in ``args`` to: + + - the token that came after the option + - ``''`` if the next token was another option + - a list of strings if the next token was ``'['`` (see below) + + Options in ``args`` from the commandline can be prefixed with either one dash + (``'-'``) or two dashes (``'--'``). The user can add a backslash before the + dash to allow a string to be identified as an option value instead of another + option. For example: ``yourscript --opt1 \-arg1``. + + If a ``'['`` token is found in ``args``, the subsequent tokens will be + interpreted as elements of a list until the matching closing ``']'`` is found. + Brackets can be nested, but the inner brackets will be added to the list of + tokens as literal ``'['`` and ``']'`` strings. + + Example commandlines:: + + yourscript --optName --opt2 + yourscript --optName value + yourscript --optName [ list of values ] + yourscript --optName [ list of [ nested values ] [ in square brackets ] ] + yourscript --optName \--value + + Note that ``processArgs`` does not support non-option ("positional") + parameters. They are supported by ``processArgsGetopt`` (see below). + +* ``argparse.processArgsGetopt(args, optionActions)`` + + A fully-featured commandline processing function, with behavior based on the + popular ``getopt`` library. You would use this instead of the simpler + ``processArgs`` function if any of the following are true: + + * You want both short (e.g. ``-f``) and aliased long-form (e.g. + ``--filename``) options + * You have commandline components that are not arguments to options (e.g. you + want to run your script like ``yourscript command --verbose arg1 arg2 arg3`` + instead of + ``yourscript command --verbose --opt1 arg1 --opt2 arg2 --opt3 arg3)``. + * You want the convenience of combining options into shorter strings (e.g. + ``'-abcarg'`` instead of ``'-a -b -c arg``) + * You want to be able to parse and validate the option arguments as the + commandline is being processed, as opposed to validating everything after + commandline processing is complete. + + Commandlines processed by ``processArgsGetopt`` can have both "short" and + "long" options, with each short option often having a long-form alias that + behaves exactly the same as the short form. Short options have properties that + make them very easy to type quickly by users who are familiar with your script + options. Long options, on the other hand, are easily understandable by + everyone and are useful in places where clarity is more important than + brevity, e.g. in example commands. Each option can be configured to take an + argument, which will be the string token that follows the option name on the + commandline. + + Short options are a single letter long and are specified on a commandline by + prefixing them with a single dash (e.g. the short option ``a`` would appear + on the commandline as ``-a``). Multiple successive short options that do not + take arguments can be combined into a single option string (e.g. ``'-abc'`` + instead of ``'-a -b -c'``). Moreover, the argument for a short option can be + appended directly to the single-letter option without an intervening space + (e.g. ``-d param`` can be written as ``-dparam``). These two convenience + shorthand forms can be combined, allowing groups of short parameters to be + written together, as long as at most the last short option takes an argument + (e.g. combining the previous two examples into ``-abcdparam``) + + Long options focus on clarity. They are usually entire words, or several words + combined with hypens (``-``) or underscores (``_``). If they take an argument, + the argument can be separated from the option name by a space or an equals + sign (``=``). For example, the following two commandlines are equivalent: + ``yourscript --style pretty`` and ``yourscript --style=pretty``. + + Another reason to use long options is if they represent an esoteric parameter + that you don't expect to be commonly used and that you don't want to "waste" a + single-letter option on. In this case, you can define a long option without a + corresponding short option. + + ``processArgsGetopt`` takes two parameters:: + + args: list of space-separated strings the user wrote on the commandline + optionActions: list of option specifications + + and returns a list of positional parameters -- that is, all strings that are + neither options nor argruments to options. Options and positional parameters + can appear in any order on the commandline, as long as arguments to options + immediately follow the option itself. + + Each option specification in ``optionActions`` has the following format: + ``{shortOptionName, longOptionAlias, hasArg=boolean, handler=fn}`` + + * ``shortOptionName`` is a one-character string (or ``''`` or ``nil`` if the + parameter only has a long form). Numbers cannot be short options, and + negative numbers (e.g. ``'-10'``) will be interpreted as positional + parameters and returned in the positional parameters list. + * ``longOptionAlias`` is an optional longer form of the short option name. If + no short option name is specified, then this element is required. + * ``hasArg`` indicates whether the handler function for the option takes a + parameter. + * ``handler`` is the handler function for the option. If ``hasArg`` is + ``true`` then the next token on the commandline is passed to the handler + function as an argument. + + Example usage:: + + local args = {...} + local open_readonly, filename = false, nil -- set defaults + + local positionals = argparse.processArgsGetopt(args, { + {'r', handler=function() open_readonly = true end}, + {'f', 'filename', hasArg=true, + handler=function(optarg) filename = optarg end} + }) + + In this example, if ``args`` is ``{'first', '-rf', 'fname', 'second'}`` or, + equivalently, ``{'first', '-r', '--filename', 'myfile.txt', 'second'}`` (note + the double dash in front of the long option alias), then ``open_readonly`` + will be ``true``, ``filename`` will be ``'myfile.txt'`` and ``positionals`` + will be ``{'first', 'second'}``. + +* ``argparse.stringList(arg, arg_name, list_length)`` + + Parses a comma-separated sequence of strings and returns a lua list. Leading + and trailing spaces are trimmed from the strings. If ``arg_name`` is + specified, it is used to make error messages more useful. If ``list_length`` + is specified and greater than ``0``, then exactly that number of elements must + be found or the function will error. Example:: + + stringList('hello , world,alist', 'words') => {'hello', 'world', 'alist'} + +* ``argparse.numberList(arg, arg_name, list_length)`` + + Parses a comma-separated sequence of numeric strings and returns a list of + the discovered numbers (as numbers, not strings). If ``arg_name`` is + specified, it is used to make error messages more useful. If ``list_length`` + is specified and greater than ``0``, exactly that number of elements must be + found or the function will error. Example:: + + numberList('10, -20 , 30.5') => {10, -20, 30.5} + +* ``argparse.coords(arg, arg_name, skip_validation)`` + + Parses a comma-separated coordinate string and returns a coordinate table of + ``{x, y, z}``. If the string ``'here'`` is passed, returns the coordinates of + the active game cursor, or throws an error if the cursor is not active. This + function also verifies that the coordinates are valid for the current map and + throws if they are not (unless ``skip_validation`` is set to true). + dumper ====== diff --git a/library/lua/argparse.lua b/library/lua/argparse.lua index 2d6adae22..ee170c190 100644 --- a/library/lua/argparse.lua +++ b/library/lua/argparse.lua @@ -1,17 +1,11 @@ +-- Docs at https://docs.dfhack.org/en/stable/docs/Lua%20API.html#argparse + local _ENV = mkmodule('argparse') local getopt = require('3rdparty.alt_getopt') local guidm = require('gui.dwarfmode') function processArgs(args, validArgs) - --[[ - standardized argument processing for scripts - -argName value - -argName [list of values] - -argName [list of [nested values] -that can be [whatever] format of matched square brackets] - -arg1 \-arg3 - escape sequences - --]] local result = {} local argName local bracketDepth = 0 @@ -29,7 +23,7 @@ function processArgs(args, validArgs) else argName = nil end - elseif string.sub(arg,1,1) == '\\' then + elseif arg:startswith('\\') then if bracketDepth == 0 then result[argName] = string.sub(arg,2) argName = nil @@ -44,59 +38,46 @@ function processArgs(args, validArgs) table.insert(result[argName], arg) end end - elseif string.sub(arg,1,1) == '-' then - argName = string.sub(arg,2) + elseif arg:startswith('-') then + argName = string.sub(arg, arg:startswith('--') and 3 or 2) if validArgs and not validArgs[argName] then - error('error: invalid arg: ' .. i .. ': ' .. argName) + qerror('error: invalid arg: ' .. i .. ': ' .. argName) end if result[argName] then - error('duplicate arg: ' .. i .. ': ' .. argName) + qerror('duplicate arg: ' .. i .. ': ' .. argName) end - if i+1 > #args or string.sub(args[i+1],1,1) == '-' then + if i+1 > #args or args[i+1]:startswith('-') then result[argName] = '' argName = nil else result[argName] = {} end else - error('error parsing arg ' .. i .. ': ' .. arg) + qerror('error parsing arg ' .. i .. ': ' .. arg) end end return result end --- processes commandline options according to optionActions and returns all --- argument strings that are not options. Options and non-option strings can --- appear in any order, and single-letter options that do not take arguments --- can be combined into a single option string (e.g. '-abc' is the same as --- '-a -b -c' if options 'a' and 'b' do not take arguments. --- --- Numbers cannot be options and negative numbers (e.g. -10) will be interpreted --- as positional parameters and returned in the nonoptions list. +-- See online docs for full usage info. -- --- optionActions is a vector with elements in the following format: --- {shortOptionName, longOptionAlias, hasArg=boolean, handler=fn} --- shortOptionName and handler are required. If the option takes an argument, --- it will be passed to the handler function. --- longOptionAlias is optional. --- hasArg defaults to false. --- To have an option that has only a long form, pass nil or '' as the --- shortOptionName. +-- Quick example: -- --- example usage: +-- local args = {...} +-- local open_readonly, filename = false, nil -- set defaults -- --- local filename = nil --- local open_readonly = false --- local nonoptions = processArgsGetopt(args, { +-- local positionals = argparse.processArgsGetopt(args, { -- {'r', handler=function() open_readonly = true end}, -- {'f', 'filename', hasArg=true, -- handler=function(optarg) filename = optarg end} --- }) +-- }) -- --- when args is {'first', '-f', 'fname', 'second'} or, equivalently, --- {'first', '--filename', 'fname', 'second'} (note the double dash in front of --- the long option alias), then filename will be fname and nonoptions will --- contain {'first', 'second'}. +-- In this example, if args is {'first', '-rf', 'fname', 'second'} or, +-- equivalently, {'first', '-r', '--filename', 'myfile.txt', 'second'} (note the +-- double dash in front of the long option alias), then: +-- open_readonly == true +-- filename == 'myfile.txt' +-- positionals == {'first', 'second'}. function processArgsGetopt(args, optionActions) local sh_opts, long_opts = '', {} local handlers = {} @@ -148,12 +129,6 @@ local function arg_error(arg_name, fmt, ...) qerror(('%s'..fmt):format(prefix, ...)) end --- Parses a comma-separated sequence of strings and returns a lua list. Spaces --- are trimmed from the strings. If is specified, it is used to make --- error messages more useful. If is specified and greater than 0, --- exactly that number of elements must be found or the function will error. --- Example: --- stringList('hello , world,list', 'words') => {'hello', 'world', 'list'} function stringList(arg, arg_name, list_length) if not list_length then list_length = 0 end local list = arg:split(',') @@ -167,12 +142,6 @@ function stringList(arg, arg_name, list_length) return list end --- Parses a comma-separated sequence of numeric strings and returns a list of --- the discovered numbers (as numbers, not strings). If is specified, --- it is used to make error messages more useful. If is specified --- and greater than 0, exactly that number of elements must be found or the --- function will error. Example: --- numberList('10, -20 , 30.5') => {10, -20, 30.5} function numberList(arg, arg_name, list_length) local strings = stringList(arg, arg_name, list_length) for i,str in ipairs(strings) do @@ -194,11 +163,6 @@ local function check_nonnegative_int(val, arg_name) return val end --- Parses a comma-separated coordinate string and returns a coordinate table of --- {x=x, y=y, z=z}. If the string 'here' is passed, returns the coordinates of --- the active game cursor, or throws an error if the cursor is not active. This --- function also verifies that the coordinates are valid for the current map and --- throws if they are not (unless is set to true). function coords(arg, arg_name, skip_validation) if arg == 'here' then local cursor = guidm.getCursorPos() diff --git a/test/library/argparse.lua b/test/library/argparse.lua index 868d05b1c..4e5c48acc 100644 --- a/test/library/argparse.lua +++ b/test/library/argparse.lua @@ -1,6 +1,53 @@ local argparse = require('argparse') local guidm = require('gui.dwarfmode') +function test.processArgs() + local validArgs = {opt1=true, opt2=true} + + expect.table_eq({}, argparse.processArgs({}, validArgs)) + expect.table_eq({opt1=''}, argparse.processArgs({'-opt1'}, validArgs)) + expect.table_eq({opt1=''}, argparse.processArgs({'--opt1'}, validArgs)) + + expect.table_eq({opt1='arg'}, + argparse.processArgs({'-opt1', 'arg'}, validArgs)) + expect.table_eq({opt1='arg'}, + argparse.processArgs({'--opt1', 'arg'}, validArgs)) + + expect.table_eq({opt1='', opt2=''}, + argparse.processArgs({'--opt1', '-opt2'}, validArgs)) + expect.table_eq({opt1='', opt2=''}, + argparse.processArgs({'--opt1', '--opt2'},validArgs)) + + expect.table_eq({opt1='', opt2='arg'}, + argparse.processArgs({'--opt1', '-opt2', 'arg'}, validArgs)) + expect.table_eq({opt1='', opt2='arg'}, + argparse.processArgs({'--opt1', '--opt2', 'arg'},validArgs)) + + expect.table_eq({opt1={}}, + argparse.processArgs({'-opt1', '[', ']'}, validArgs)) + expect.table_eq({opt1={'a'}}, + argparse.processArgs({'--opt1', '[', 'a', ']'}, validArgs)) + expect.table_eq({opt1={'a', '[', 'nested', 'string', ']'}}, + argparse.processArgs({'-opt1', '[', 'a', '[', 'nested', + 'string', ']', ']'}, + validArgs)) + + expect.table_eq({opt1='-value'}, + argparse.processArgs({'-opt1', '\\-value'}, validArgs)) + expect.table_eq({opt1='--value'}, + argparse.processArgs({'-opt1', '\\--value'}, validArgs)) + + expect.table_eq({unvalidated_opt='value'}, + argparse.processArgs({'-unvalidated_opt', 'value'}, nil)) + + expect.error_match('invalid arg', + function() argparse.processArgs({'-opt3'}, validArgs) end) + expect.error_match('duplicate arg', + function() argparse.processArgs({'-opt1', '--opt1'}, validArgs) end) + expect.error_match('error parsing arg', + function() argparse.processArgs({'justastring'}, validArgs) end) +end + function test.processArgsGetopt_happy_path() local quiet, verbose, name