gajim-plural/gajim/command_system/mapping.py

# Copyright (C) 2009-2010  Alexander Cherniuk <ts33kr@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
The module contains routines to parse command arguments and map them to
the command handler's positonal and keyword arguments.

Mapping is done in two stages: 1) parse arguments into positional
arguments and options; 2) adapt them to the specific command handler
according to the command properties.
"""

import re
from operator import itemgetter

from gajim.command_system.errors import DefinitionError, CommandError

# Quite complex piece of regular expression logic to parse options and
# arguments. Might need some tweaking along the way.
ARG_PATTERN = re.compile(r'(\'|")?(?P<body>(?(1).+?|\S+))(?(1)\1)')
OPT_PATTERN = re.compile(r'(?<!\w)--?(?P<key>[\w-]+)(?:(?:=|\s)(\'|")?(?P<value>(?(2)[^-]+?|[^-\s]+))(?(2)\2))?')

# Option keys needs to be encoded to a specific encoding as Python does
# not allow to expand dictionary with raw unicode strings as keys from a
# **kwargs.
KEY_ENCODING = 'UTF-8'

# Defines how complete representation of command usage (generated based
# on command handler argument specification) will be rendered.
USAGE_PATTERN = 'Usage: %s %s'

def parse_arguments(arguments):
    """
    Simple yet effective and sufficient in most cases parser which
    parses command arguments and returns them as two lists.

    First list represents positional arguments as (argument, position),
    and second representing options as (key, value, position) tuples,
    where position is a (start, end) span tuple of where it was found in
    the string.

    Options may be given in --long or -short format. As --option=value
    or --option value or -option value. Keys without values will get
    None as value.

    Arguments and option values that contain spaces may be given as 'one
    two three' or "one two three"; that is between single or double
    quotes.
    """
    args, opts = [], []

    def intersects_opts(given_start, given_end):
        """
        Check if given span intersects with any of options.
        """
        for key, value, (start, end) in opts:
            if given_start >= start and given_end <= end:
                return True
        return False

    def intersects_args(given_start, given_end):
        """
        Check if given span intersects with any of arguments.
        """
        for arg, (start, end) in args:
            if given_start >= start and given_end <= end:
                return True
        return False

    for match in re.finditer(OPT_PATTERN, arguments):
        if match:
            key = match.group('key')
            value = match.group('value') or None
            position = match.span()
            opts.append((key, value, position))

    for match in re.finditer(ARG_PATTERN, arguments):
        if match:
            body = match.group('body')
            position = match.span()
            args.append((body, position))

    # Primitive but sufficiently effective way of disposing of
    # conflicted sectors. Remove any arguments that intersect with
    # options.
    for arg, position in args[:]:
        if intersects_opts(*position):
            args.remove((arg, position))

    # Primitive but sufficiently effective way of disposing of
    # conflicted sectors. Remove any options that intersect with
    # arguments.
    for key, value, position in opts[:]:
        if intersects_args(*position):
            opts.remove((key, value, position))

    return args, opts

def adapt_arguments(command, arguments, args, opts):
    """
    Adapt args and opts got from the parser to a specific handler by
    means of arguments specified on command definition. That is
    transform them to *args and **kwargs suitable for passing to a
    command handler.

    Dashes (-) in the option names will be converted to underscores. So
    you can map --one-more-option to a one_more_option=None.

    If the initial value of a keyword argument is a boolean (False in
    most cases) - then this option will be treated as a switch, that is
    an option which does not take an argument. If a switch is followed
    by an argument - then this argument will be treated just like a
    normal positional argument.
    """
    spec_args, spec_kwargs, var_args, var_kwargs = command.extract_specification()
    norm_kwargs = dict(spec_kwargs)

    # Quite complex piece of neck-breaking logic to extract raw
    # arguments if there is more, then one positional argument specified
    # by the command.  In case if it's just one argument which is the
    # collector - this is fairly easy. But when it's more then one
    # argument - the neck-breaking logic of how to retrieve residual
    # arguments as a raw, all in one piece string, kicks in.
    if command.raw:
        if arguments:
            spec_fix = 1 if command.source else 0
            spec_len = len(spec_args) - spec_fix
            arguments_end = len(arguments) - 1

            # If there are any optional arguments given they should be
            # either an unquoted postional argument or part of the raw
            # argument. So we find all optional arguments that can
            # possibly be unquoted argument and append them as is to the
            # args.
            for key, value, (start, end) in opts[:spec_len]:
                if value:
                    end -= len(value) + 1
                    args.append((arguments[start:end], (start, end)))
                    args.append((value, (end, end + len(value) + 1)))
                else:
                    args.append((arguments[start:end], (start, end)))

            # We need in-place sort here because after manipulations
            # with options order of arguments might be wrong and we just
            # can't have more complex logic to not let that happen.
            args.sort(key=itemgetter(1))

            if spec_len > 1:
                try:
                    stopper, (start, end) = args[spec_len - 2]
                except IndexError:
                    raise CommandError(_("Missing arguments"), command)

                # The essential point of the whole play. After
                # boundaries are being determined (supposingly correct)
                # we separate raw part from the rest of arguments, which
                # should be normally processed.
                raw = arguments[end:]
                raw = raw.strip() or None

                if not raw and not command.empty:
                    raise CommandError(_("Missing arguments"), command)

                # Discard residual arguments and all of the options as
                # raw command does not support options and if an option
                # is given it is rather a part of a raw argument.
                args = args[:spec_len - 1]
                opts = []

                args.append((raw, (end, arguments_end)))
            else:
                # Substitue all of the arguments with only one, which
                # contain raw and unprocessed arguments as a string. And
                # discard all the options, as raw command does not
                # support them.
                args = [(arguments, (0, arguments_end))]
                opts = []
        else:
            if command.empty:
                args.append((None, (0, 0)))
            else:
                raise CommandError(_("Missing arguments"), command)

    # The first stage of transforming options we have got to a format
    # that can be used to associate them with declared keyword
    # arguments.  Substituting dashes (-) in their names with
    # underscores (_).
    for index, (key, value, position) in enumerate(opts):
        if '-' in key:
            opts[index] = (key.replace('-', '_'), value, position)

    # The second stage of transforming options to an associatable state.
    # Expanding short, one-letter options to a verbose ones, if
    # corresponding optin has been given.
    if command.expand:
        expanded = []
        for spec_key, spec_value in norm_kwargs.items():
            letter = spec_key[0] if len(spec_key) > 1 else None
            if letter and letter not in expanded:
                for index, (key, value, position) in enumerate(opts):
                    if key == letter:
                        expanded.append(letter)
                        opts[index] = (spec_key, value, position)
                        break

    # Detect switches and set their values accordingly. If any of them
    # carries a value - append it to args.
    for index, (key, value, position) in enumerate(opts):
        if isinstance(norm_kwargs.get(key), bool):
            opts[index] = (key, True, position)
            if value:
                args.append((value, position))

    # Sorting arguments and options (just to be sure) in regarding to
    # their positions in the string.
    args.sort(key=itemgetter(1))
    opts.sort(key=itemgetter(2))

    # Stripping down position information supplied with arguments and
    # options as it won't be needed again.
    args = list(map(lambda t: t[0], args))
    opts = list(map(lambda t: (t[0], t[1]), opts))

    # If command has extra option enabled - collect all extra arguments
    # and pass them to a last positional argument command defines as a
    # list.
    if command.extra:
        if not var_args:
            spec_fix = 1 if not command.source else 2
            spec_len = len(spec_args) - spec_fix
            extra = args[spec_len:]
            args = args[:spec_len]
            args.append(extra)
        else:
            raise DefinitionError("Can not have both, extra and *args")

    # Detect if positional arguments overlap keyword arguments. If so
    # and this is allowed by command options - then map them directly to
    # their options, so they can get propert further processings.
    spec_fix = 1 if command.source else 0
    spec_len = len(spec_args) - spec_fix
    if len(args) > spec_len:
        if command.overlap:
            overlapped = args[spec_len:]
            args = args[:spec_len]
            for arg, (spec_key, spec_value) in zip(overlapped, spec_kwargs):
                opts.append((spec_key, arg))
        else:
            raise CommandError(_("Too many arguments"), command)

    # Detect every switch and ensure it will not receive any arguments.
    # Normally this does not happen unless overlapping is enabled.
    for key, value in opts:
        initial = norm_kwargs.get(key)
        if isinstance(initial, bool):
            if not isinstance(value, bool):
                raise CommandError("%s: Switch can not take an argument" % key, command)

    # Inject the source arguments as a string as a first argument, if
    # command has enabled the corresponding option.
    if command.source:
        args.insert(0, arguments)

    # Return *args and **kwargs in the form suitable for passing to a
    # command handler and being expanded.
    return tuple(args), dict(opts)

def generate_usage(command, complete=True):
    """
    Extract handler's arguments specification and wrap them in a
    human-readable format usage information. If complete is given - then
    USAGE_PATTERN will be used to render the specification completly.
    """
    spec_args, spec_kwargs, var_args, var_kwargs = command.extract_specification()

    # Remove some special positional arguments from the specifiaction,
    # but store their names so they can be used for usage info
    # generation.
    sp_source = spec_args.pop(0) if command.source else None
    sp_extra = spec_args.pop() if command.extra else None

    kwargs = []
    letters = []

    for key, value in spec_kwargs:
        letter = key[0]
        key = key.replace('_', '-')

        if isinstance(value, bool):
            value = str()
        else:
            value = '=%s' % value

        if letter not in letters:
            kwargs.append('-(-%s)%s%s' % (letter, key[1:], value))
            letters.append(letter)
        else:
            kwargs.append('--%s%s' % (key, value))

    usage = str()
    args = str()

    if command.raw:
        spec_len = len(spec_args) - 1
        if spec_len:
            args += ('<%s>' % ', '.join(spec_args[:spec_len])) + ' '
        args += ('(|%s|)' if command.empty else '|%s|') % spec_args[-1]
    else:
        if spec_args:
            args += '<%s>' % ', '.join(spec_args)
        if var_args or sp_extra:
            args += (' ' if spec_args else str()) + '<<%s>>' % (var_args or sp_extra)

    usage += args

    if kwargs or var_kwargs:
        if kwargs:
            usage += (' ' if args else str()) + '[%s]' % ', '.join(kwargs)
        if var_kwargs:
            usage += (' ' if args else str()) + '[[%s]]' % var_kwargs

    # Native name will be the first one if it is included. Otherwise,
    # names will be in the order they were specified.
    if len(command.names) > 1:
        names = '%s (%s)' % (command.first_name, ', '.join(command.names[1:]))
    else:
        names = command.first_name

    return USAGE_PATTERN % (names, usage) if complete else usage