// =============================================================== //
//                                                                 //
//   File      : adlang1.cxx                                       //
//   Purpose   :                                                   //
//                                                                 //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //

#include "gb_aci_impl.h"
#include "gb_key.h"

#include "TreeNode.h"

#include <aw_awar_defs.hxx>

#include <adGene.h>
#include <ad_cb.h>

#include <arb_defs.h>
#include <arb_strbuf.h>
#include <arb_file.h>
#include <arb_strarray.h>
#include <arb_sort.h>

#include <cctype>
#include <cmath>
#include <algorithm>

// hook for 'export_sequence'

static gb_export_sequence_cb get_export_sequence = NULp;

NOT4PERL void GB_set_export_sequence_hook(gb_export_sequence_cb escb) {
    gb_assert(!get_export_sequence || !escb); // avoid unwanted overwrite
    get_export_sequence = escb;
}

using namespace GBL_IMPL;

namespace GBL_IMPL {
    // global ACI/SRT debug switch
    int traceACI    = 0;
    int traceIndent = 0;

    void print_trace(const char *text) {
        FILE *DUMPOUT = stdout;

        gb_assert(traceACI>0);
        if (traceIndent>0) {
            ConstStrArray line;
            GBT_split_string(line, text, "\n", SPLIT_DROPEMPTY);
            for (unsigned L = 0; L<line.size(); ++L) {
                for (int i = 0; i<traceIndent; ++i) {
                    fputc(' ', DUMPOUT);
                }
                fputs(line[L], DUMPOUT);
                fputc('\n', DUMPOUT);
            }
        }
        else {
            fputs(text, DUMPOUT);
        }
        fflush(DUMPOUT);
    }

    GB_ERROR trace_params(const GBL_streams& param, gbl_param *ppara, const char *com) {
        GB_ERROR error = NULp;
        int      i;

        int argc = param.size();
        for (i=0; i<argc; i++) {
            gbl_param  *para;
            const char *argument = param.get(i);

            for (para = ppara; para && !error; para = para->next) {
                if (para->param_name) { // NULp means param is inactive (see PARAM_IF)
                    int len = strlen(para->param_name);

                    if (strncmp(argument, para->param_name, len) == 0) {
                        const char *value = argument+len; // set to start of value

                        if (para->type == GB_BIT) {
                            // GB_BIT is special cause param_name does NOT contain trailing '='

                            if (!value[0]) { // only 'name' -> handle like 'name=1'
                                ;
                            }
                            else if (value[0] == '=') {
                                value++;
                            }
                        }

                        switch (para->type) {
                            case GB_STRING:
                                *(const char **)para->varaddr  = value;
                                break;

                            case GB_INT:
                                STATIC_ASSERT(sizeof(int) == sizeof(nat)); // assumed by GBL_PARAM_UINT
                                *(int *)para->varaddr = atoi(value);
                                break;

                            case GB_BIT:
                                // 'param=' is same as 'param' or 'param=1' (historical reason, don't change)
                                *(int *)para->varaddr = (value[0] ? atoi(value) : 1);
                                break;

                            case GB_BYTE:
                                *(char *)para->varaddr = *value; // this may use the terminal zero-byte (e.g. for p1 in 'p0=0,p1=,p2=2' )
                                if (value[0] && value[1]) { // found at least 2 chars
                                    GB_warningf("Only one character expected in value '%s' of param '%s' - rest is ignored", value, para->param_name);
                                }
                                break;

                            default:
                                gb_assert(0);
                                error = GBS_global_string("Parameter '%s': Unknown type %i (internal error)", para->param_name, para->type);
                                break;
                        }
                        break; // accept parameter
                    }
                }
            }

            if (!error && !para) { // no parameter found for argument
                int pcount = 0;
                for (para = ppara; para; para = para->next) pcount++;

                gbl_param **params;
                ARB_calloc(params, pcount);
                {
                    int k;
                    for (k = 0, para = ppara; para; para = para->next) params[k++] = para;
                }

                GBS_strstruct str(1000);

                for (pcount--; pcount>=0; pcount--) {
                    para = params[pcount];
                    if (para->param_name) {
                        str.cat("  ");
                        str.cat(para->param_name);
                        switch (para->type) {
                            case GB_STRING: str.cat("STRING"); break;
                            case GB_INT:    str.cat("INT");    break;
                            case GB_FLOAT:  str.cat("FLOAT");  break;
                            case GB_BYTE:   str.cat("CHAR");   break;
                            case GB_BIT:    str.cat("    ");   break;
                            default:        str.cat("????"); gb_assert(0); break;
                        }
                        str.cat("\t\t;");
                        str.cat(para->help_text);
                        str.cat("\n");
                    }
                }

                freenull(params);

                return GB_export_errorf("Unknown Parameter '%s' in command '%s'\n  PARAMETERS:\n%s",
                                        argument, com, str.get_data());
            }
        }

        return error;
    }
};



// -------------------------
//      String functions

static int gbl_stricmp(const char *s1, const char *s2) {
    // case insensitive strcmp
    int i;
    for (i = 0; ; ++i) {
        char c1 = tolower(s1[i]);
        char c2 = tolower(s2[i]);

        if (c1 == c2) {
            if (!c1) break; // equal strings
        }
        else {
            if (c1<c2) return -1;
            return 1;
        }
    }
    return 0;
}
static int gbl_strincmp(const char *s1, const char *s2, int size2) {
    // case insensitive strcmp
    int i;
    for (i = 0; i<size2; ++i) {
        char c1 = tolower(s1[i]);
        char c2 = tolower(s2[i]);

        if (c1 == c2) {
            if (!c1) break; // equal strings
        }
        else {
            if (c1<c2) return -1;
            return 1;
        }
    }
    return 0;
}
static const char *gbl_stristr(const char *haystack, const char *needle) {
    // case insensitive strstr
    const char *hp          = haystack;
    char        c1          = toupper(needle[0]);
    char        c2          = tolower(c1);
    int         needle_size = strlen(needle);

    if (c1 == c2) {
        hp = strchr(hp, c1);
        while (hp) {
            if (gbl_strincmp(hp, needle, needle_size) == 0) return hp;
            hp = strchr(hp+1, c1);
        }
    }
    else {
        while (hp) {
            const char *h1 = strchr(hp, c1);
            const char *h2 = strchr(hp, c2);

            if (h1 && h2) {
                if (h1<h2) {
                    if (gbl_strincmp(h1, needle, needle_size) == 0) return h1;
                    hp = h1+1;
                }
                else {
                    gb_assert(h1>h2);
                    if (gbl_strincmp(h2, needle, needle_size) == 0) return h2;
                    hp = h2+1;
                }
            }
            else {
                if (h1) { hp = h1; }
                else if (h2) { hp = h2; c1 = c2; }
                else { hp = NULp; }

                while (hp) {
                    if (gbl_strincmp(hp, needle, needle_size) == 0) return hp;
                    hp = strchr(hp+1, c1);
                }
            }
        }
    }
    return NULp;
}

inline int approve_pos(int pos, int len) { return pos<0 ? (-pos<len ? len+pos : 0) : pos; }

static GB_ERROR gbl_mid_streams(const GBL_streams& arg_input, GBL_streams& arg_out, int start, int end) {
    // used as well to copy all streams (e.g. by 'dd')
    for (int i=0; i<arg_input.size(); i++) {
        const char *p   = arg_input.get(i);
        int         len = strlen(p);

        int s = approve_pos(start, len);
        int e = approve_pos(end, len);

        char *res;
        if (s >= len || e<s) {
            res = ARB_strdup("");
        }
        else {
            gb_assert(s >= 0);
            res = ARB_strpartdup(p+s, p+e);
        }
        arg_out.insert(res);
    }
    return NULp;
}

static GB_ERROR gbl_trace(GBL_command_arguments *args) {
    int tmp_trace;

    EXPECT_PARAMS(args, 1, "0|1");

    tmp_trace = atoi(args->get_param(0));
    if (tmp_trace<0 || tmp_trace>1) return GBS_global_string("Illegal value %i to trace", tmp_trace);

    if (tmp_trace != traceACI) {
        traceACI = 1;
        print_trace(GBS_global_string("%sctivated ACI trace\n", tmp_trace ? "A" : "De-a"));
        traceACI = tmp_trace;
    }

    return gbl_mid_streams(args->input, args->output, 0, -1); // copy all streams
}

/* ---------------------------------------------------------------------------------------
 * Binary operators work on pairs of values.
 * Three different operational modes are implemented for all binary operators:
 *
 * 1. inputstreams|operator
 *
 *    The number of inputstreams has to be even and the operator will be
 *    applied to pairs of them.
 *
 *    Example : a;b;c;d;e;f | plus
 *    Result  : a+b;c+d;e+f
 *
 * 2. inputstreams|operator(x)
 *
 *    The number of inputstreams has to be at least 1.
 *    The operator is applied to each inputstream.
 *
 *    Example : a;b;c | plus(d)
 *    Result  : a+d;b+d;c+d
 *
 * 3. operator(x, y)
 *
 *    @@@ this decription does not match behavior!
 *    @@@ check description in helpfile as well
 *
 *    Inputstreams will be ignored and the operator is applied
 *    to the arguments
 *
 *    Example : a;b | plus(c,d)
 *    Result  : c+d
 */

template <typename T>
GB_ERROR gbl_apply_binary_operator(GBL_command_arguments *args, char *(*op)(const char *, const char *, T), T client_data) {
    GB_ERROR error = NULp;
    switch (args->param_count()) {
        case 0:
            gb_assert(args->set_params_checked());
            if (args->input.size() == 0) error = "Expect at least two input streams if called with 0 parameters";
            else if (args->input.size()%2) error = "Expect an even number of input streams if called with 0 parameters";
            else {
                int inputpairs = args->input.size()/2;
                int i;
                for (i = 0; i<inputpairs; ++i) {
                    PASS_2_OUT(args, op(args->input.get(i*2), args->input.get(i*2+1), client_data));
                }
            }
            break;

        case 1:
            gb_assert(args->set_params_checked());
            if (args->input.size() == 0) error = "Expect at least one input stream if called with 1 parameter";
            else {
                int         i;
                const char *argument = args->get_param(0);
                for (i = 0; i<args->input.size(); ++i) {
                    PASS_2_OUT(args, op(args->input.get(i), argument, client_data));
                }
            }
            break;

        case 2:
            gb_assert(args->set_params_checked());
            for (int i = 0; i<args->input.size(); ++i) {
                char *result1       = args->get_callEnv().interpret_subcommand(args->input.get(i), args->get_param(0)); // @@@ EVALUATED_PARAM (#768)
                if (!result1) error = GB_await_error();
                else {
                    char *result2       = args->get_callEnv().interpret_subcommand(args->input.get(i), args->get_param(1)); // @@@ EVALUATED_PARAM (#768)
                    if (!result2) error = GB_await_error();
                    else {
                        PASS_2_OUT(args, op(result1, result2, client_data));
                        free(result2);
                    }
                    free(result1);
                }
            }
            break;

        default:
            error = check_optional_parameters(args, 0, NULp, 2, "Expr1[,Expr2]", true, false);
            break;
    }

    return error;
}

// --------------------------------
//      escape/unescape strings

static char *unEscapeString(const char *escapedString) {
    // replaces all \x by x
    char *result = nulldup(escapedString);
    char *to     = result;
    char *from   = result;

    while (1) {
        char c = *from++;
        if (!c) break;

        if (c=='\\') {
            *to++ = *from++;
        }
        else {
            *to++ = c;
        }
    }
    *to = 0;
    return result;
}
static char *escapeString(const char *unescapedString) {
    // replaces all '\' and '"' by '\\' and '\"'
    int         len    = strlen(unescapedString);
    char       *result = ARB_alloc<char>(2*len+1);
    char       *to     = result;
    const char *from   = unescapedString;

    while (1) {
        char c = *from++;
        if (!c) break;

        if (c=='\\' || c == '\"') {
            *to++ = '\\';
            *to++ = c;
        }
        else {
            *to++ = c;
        }
    }
    *to = 0;
    return result;
}

// ---------------------------------
//      the commands themselves:

static GB_ERROR gbl_quote(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); i++) {
        FORMAT_2_OUT(args, "\"%s\"", args->input.get(i));
    }
    return NULp;
}
static GB_ERROR gbl_unquote(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); i++) {
        const char *str = args->input.get(i);
        int         len = strlen(str);

        if (str[0] == '\"' && str[len-1] == '\"') {
            PASS_2_OUT(args, ARB_strpartdup(str+1, str+len-2));
        }
        else {
            IN_2_OUT(args, i);
        }
    }
    return NULp;
}

static GB_ERROR gbl_escape(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); i++) {
        char *escaped = escapeString(args->input.get(i));
        PASS_2_OUT(args, escaped);
    }
    return NULp;
}
static GB_ERROR gbl_unescape(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); i++) {
        char *unescaped = unEscapeString(args->input.get(i));
        PASS_2_OUT(args, unescaped);
    }
    return NULp;
}

static GB_ERROR gbl_command(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"ACI command\"");

    GB_ERROR  error   = NULp;
    char     *command = unEscapeString(args->get_param(0));

    if (traceACI) {
        print_trace(GBS_global_string("executing command '%s'\n", command));
    }

    for (int i=0; i<args->input.size() && !error; i++) {
        char *result = args->get_callEnv().interpret_subcommand(args->input.get(i), command);
        if (!result) error = GB_await_error();
        else PASS_2_OUT(args, result);
    }
    free(command);
    return error;
}

static GB_ERROR gbl_eval(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"expression evaluating to ACI command\"");

    GB_ERROR  error   = NULp;
    char     *to_eval = unEscapeString(args->get_param(0));
    TRACE_ACI(GBS_global_string("evaluating '%s'\n", to_eval));

    char *command = args->get_callEnv().interpret_subcommand("", to_eval); // evaluate independent
    if (!command) error = GB_await_error();
    else {
        TRACE_ACI(GBS_global_string("executing  '%s'\n", command));

        for (int i=0; i<args->input.size() && !error; i++) {
            char *result       = args->get_callEnv().interpret_subcommand(args->input.get(i), command);
            if (!result) error = GB_await_error();
            else  PASS_2_OUT(args, result);
        }
        free(command);
    }
    free(to_eval);
    return error;
}

class DefinedCommands : virtual Noncopyable {
    GB_HASH *cmds;
public:
    DefinedCommands() { cmds = GBS_create_dynaval_hash(100, GB_MIND_CASE, GBS_dynaval_free); }
    ~DefinedCommands() { GBS_free_hash(cmds); }

    void set(const char *name, char* cmd) { GBS_dynaval_free(GBS_write_hash(cmds, name, (long)cmd)); } // takes ownership of 'cmd'!
    const char *get(const char *name) const { return (const char *)GBS_read_hash(cmds, name); }
};

static DefinedCommands defined_commands;

static GB_ERROR gbl_define(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    EXPECT_PARAMS(args, 2, "name, \"ACI command\"");

    const char *name = args->get_param(0);
    char       *cmd  = unEscapeString(args->get_param(1));

    defined_commands.set(name, cmd);
    TRACE_ACI(GBS_global_string("defining command '%s'='%s'\n", name, cmd));
    return NULp;
}

static GB_ERROR gbl_do(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "definedCommandName");

    GB_ERROR    error = NULp;
    const char *name  = args->get_param(0);
    const char *cmd   = defined_commands.get(name);

    if (!cmd) {
        error = GBS_global_string("Can't do undefined command '%s' - use define(%s, ...) first", name, name);
    }
    else {
        TRACE_ACI(GBS_global_string("executing defined command '%s'='%s' on %i streams\n", name, cmd, args->input.size()));

        for (int i=0; i<args->input.size() && !error; i++) {
            char *result       = args->get_callEnv().interpret_subcommand(args->input.get(i), cmd);
            if (!result) error = GB_await_error();
            else  PASS_2_OUT(args, result);
        }
    }
    return error;
}

static GB_ERROR gbl_streams(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    FORMAT_2_OUT(args, "%i", args->input.size());
    return NULp;
}

static GB_ERROR expect_used_in_genome_db(GBL_command_arguments *args) {
    if (GEN_is_genome_db(args->get_gb_main(), -1)) return NULp;
    return GBS_global_string("ACI command '%s' can only be used in genome databases.", args->get_cmdName());
}

static GB_ERROR apply_to_origin(GBL_command_arguments *args, bool organism) {
    EXPECT_PARAMS(args, 1, "\"ACI command\"");
    EXPECT_ITEM_REFERENCED(args);

    GB_ERROR error = expect_used_in_genome_db(args);
    if (!error) {
        if (!GEN_is_pseudo_gene_species(args->get_item_ref())) {
            error = GBS_global_string("'%s' applies to gene-species only", args->get_cmdName());
        }
        else {
            GBDATA *gb_origin = NULp;
            if (organism) {
                gb_origin = GEN_find_origin_organism(args->get_item_ref(), NULp);
            }
            else {
                gb_origin = GEN_find_origin_gene(args->get_item_ref(), NULp);
            }

            if (!error && !gb_origin) error = GB_await_error();

            if (!error) {
                char         *command = unEscapeString(args->get_param(0));
                GBL_call_env  callEnv(gb_origin, args->get_env()); // refer to gb_origin for subcommands
                // Note: if calling env has a FieldTracker, field access from 'command' is not tracked.
                //       That access applies to different item.

                for (int i=0; i<args->input.size() && !error; i++) {
                    char *result       = callEnv.interpret_subcommand(args->input.get(i), command);
                    if (!result) error = GB_await_error();
                    else         PASS_2_OUT(args, result);
                }

                free(command);
            }
        }
    }
    return error;
}

static GB_ERROR gbl_origin_gene(GBL_command_arguments *args) { return apply_to_origin(args, false); }
static GB_ERROR gbl_origin_organism(GBL_command_arguments *args) { return apply_to_origin(args, true); }


static GB_ERROR applyToItemFoundByKey(GBL_command_arguments *args, const char *itemname, GBDATA *gb_item_data, const char *key) {
    GB_ERROR  error        = NULp;
    char     *command      = unEscapeString(args->get_param(0));

    for (int i=0; i<args->input.size() && !error; i++) {
        const char *in = args->input.get(i);
        if (in[0]) { // silently ignore empty input streams
            GBDATA *gb_item = NULp;
            {
                GBDATA *gb_field = GB_find_string(gb_item_data, key, in, GB_IGNORE_CASE, SEARCH_GRANDCHILD);
                if (gb_field) {
                    gb_item = GB_get_father(gb_field);
                }
                else {
                    error = GBS_global_string("No %s with %s '%s' found.", itemname, key, in);
                }
            }
            if (gb_item) {
                GBL_call_env callEnv(gb_item, args->get_env()); // refer to gb_item for subcommands
                // Note: if calling env has a FieldTracker, field access from 'command' is not tracked.
                //       That access applies to different item.

                char *result       = callEnv.interpret_subcommand("", command);
                if (!result) error = GB_await_error();
                else  PASS_2_OUT(args, result);
            }
            else {
                if (!error) error = GB_await_error();
            }
        }
    }

    free(command);
    return error;
}
static GB_ERROR gbl_findspec(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"ACI command\"");
    return applyToItemFoundByKey(args, "species", GBT_get_species_data(args->get_gb_main()), "name");
}
static GB_ERROR gbl_findacc(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"ACI command\"");
    return applyToItemFoundByKey(args, "species", GBT_get_species_data(args->get_gb_main()), "acc");
}

static GB_ERROR gbl_findgene(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"ACI command\"");
    EXPECT_ITEM_REFERENCED(args);

    GB_ERROR error = expect_used_in_genome_db(args);
    if (!error) {
        GBDATA *gb_item = args->get_item_ref();
        if (GEN_is_organism(gb_item)) {
            error = applyToItemFoundByKey(args, "gene", GEN_find_gene_data(gb_item), "name");
        }
        else if (strcmp(GB_read_key_pntr(gb_item), "gene") == 0) {
            // if applied to gene -> find "brother" of gene
            GBDATA *gb_organism = GB_get_grandfather(gb_item);
            if (GEN_is_organism(gb_organism)) {
                error = applyToItemFoundByKey(args, "gene", GEN_find_gene_data(gb_organism), "name");
            }
            else {
                error = "'findgene' cannot be used here (was applied to 'gene', but could not find gene-owning organism)";
            }
        }
        else {
            error = GBS_global_string("'findgene' cannot be applied to '%s' (need an organism)",
                                      GBT_get_name_or_description(gb_item));
        }
    }
    return error;
}

class Tab {
    bool tab[256];
public:
    Tab(bool take, const char *invert) {
        bool init = !take;
        for (int i = 0; i<256; ++i) tab[i] = init;
        for (int i = 0; invert[i]; ++i) tab[safeCharIndex(invert[i])] = take;
    }
    bool operator[](int i) const { return tab[i]; }
};

inline GB_ERROR count_by_tab(GBL_command_arguments *args, const Tab& tab) {
    for (int i=0; i<args->input.size(); ++i) {
        long        sum = 0;            // count frequencies
        const char *p   = args->input.get(i);

        while (*p) sum += tab[safeCharIndex(*(p++))];
        FORMAT_2_OUT(args, "%li", sum);
    }
    return NULp;
}
inline GB_ERROR remove_by_tab(GBL_command_arguments *args, const Tab& tab) {
    GBS_strstruct buf(1000);
    for (int i=0; i<args->input.size(); ++i) {
        buf.erase();
        for (const char *p = args->input.get(i); *p; p++) {
            if (!tab[(unsigned int)*p]) {
                buf.put(*p);
            }
        }
        PASS_2_OUT(args, buf.get_copy());
    }
    return NULp;
}

static GB_ERROR gbl_count(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"characters to count\"");
    return count_by_tab(args, Tab(true, args->get_param(0)));
}
static GB_ERROR gbl_len(GBL_command_arguments *args) {
    EXPECT_OPTIONAL_PARAMS(args, 0, NULp, 1, "\"characters not to count\"");
    const char *exclude = args->get_optional_param(0, "");
    return count_by_tab(args, Tab(false, exclude));
}
static GB_ERROR gbl_remove(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"characters to remove\"");
    return remove_by_tab(args, Tab(true, args->get_param(0)));
}
static GB_ERROR gbl_keep(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"characters to keep\"");
    return remove_by_tab(args, Tab(false, args->get_param(0)));
}


static char *binop_compare(const char *arg1, const char *arg2, bool case_sensitive) {
    int result;

    if (case_sensitive) result = strcmp(arg1, arg2);
    else result                = gbl_stricmp(arg1, arg2);

    return GBS_global_string_copy("%i", result<0 ? -1 : (result>0 ? 1 : 0));
}
static char *binop_equals(const char *arg1, const char *arg2, bool case_sensitive) {
    int result;

    if (case_sensitive) result = strcmp(arg1, arg2);
    else result                = gbl_stricmp(arg1, arg2);

    return GBS_global_string_copy("%i", result == 0 ? 1 : 0);
}
static char *binop_contains(const char *arg1, const char *arg2, bool case_sensitive) {
    const char *found = NULp;

    if (!arg2[0]) return strdup("0"); // do not report matches of empty string

    if (case_sensitive) found = strstr(arg1, arg2);
    else found                = gbl_stristr(arg1, arg2);

    return GBS_global_string_copy("%ti", found ? (found-arg1)+1 : 0);
}
static char *binop_partof(const char *arg1, const char *arg2, bool case_sensitive) {
    return binop_contains(arg2, arg1, case_sensitive);
}

static GB_ERROR gbl_compare  (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_compare,  true);  }
static GB_ERROR gbl_icompare (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_compare,  false); }
static GB_ERROR gbl_equals   (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_equals,   true);  }
static GB_ERROR gbl_iequals  (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_equals,   false); }
static GB_ERROR gbl_contains (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_contains, true);  }
static GB_ERROR gbl_icontains(GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_contains, false); }
static GB_ERROR gbl_partof   (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_partof,   true);  }
static GB_ERROR gbl_ipartof  (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, binop_partof,   false); }

static GB_ERROR gbl_isEmpty(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);
    for (int i=0; i<args->input.size(); i++) {
        const char *str = args->input.get(i);
        FORMAT_2_OUT(args, "%i", str[0] == 0);
    }
    return NULp;
}
static GB_ERROR gbl_inRange(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 2, "low,high");

    double low  = strtod(args->get_param(0), NULp);
    double high = strtod(args->get_param(1), NULp);

    for (int i=0; i<args->input.size(); i++) {
        double val     = strtod(args->input.get(i), NULp);
        bool   inRange = low<=val && val<=high;
        FORMAT_2_OUT(args, "%i", inRange);
    }
    return NULp;
}


static GB_ERROR gbl_translate(GBL_command_arguments *args) {
    EXPECT_OPTIONAL_PARAMS(args, 2, "old,new", 1, "other");

    const char *other = args->get_optional_param(2, NULp);
    if (other && (other[0] == 0 || other[1] != 0)) {
        return "third parameter of translate has to be one character (i.e. \"-\")";
    }
    const char replace_other = other ? other[0] : 0;

    // build translation table :
    unsigned char tab[256];
    {
        const unsigned char *o = (const unsigned char *)args->get_param(0);
        const unsigned char *n = (const unsigned char *)args->get_param(1);
        char        used[256];

        if (strlen((const char *)o) != strlen((const char *)n)) {
            return "arguments 1 and 2 of translate should be strings with identical length";
        }

        for (int i = 0; i<256; ++i) { // IRRELEVANT_LOOP
            tab[i]  = replace_other ? replace_other : i; // replace unused or identity translation
            used[i] = 0;
        }

        for (int i = 0; o[i]; ++i) {
            if (used[o[i]]) return GBS_global_string("character '%c' used twice in argument 1 of translate", o[i]);
            used[o[i]] = 1;
            tab[o[i]]  = n[i]; // real translation
        }
    }

    GBS_strstruct buf(1000);
    for (int i=0; i<args->input.size(); i++) {
        buf.erase();
        for (const char *p = args->input.get(i); *p; p++) {
            buf.put(tab[(unsigned char)*p]);
        }
        PASS_2_OUT(args, buf.get_copy());
    }
    return NULp;
}


static GB_ERROR gbl_echo(GBL_command_arguments *args) {
    ACCEPT_ANY_PARAMS(args);
    COMMAND_DROPS_INPUT_STREAMS(args);
    for (int i=0; i<args->param_count(); i++) PARAM_2_OUT(args, i);
    return NULp;
}

static GB_ERROR gbl_dd(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);
    return gbl_mid_streams(args->input, args->output, 0, -1); // copy all streams
}

enum Case { UPPER, LOWER, CAPS };

static GB_ERROR convert_case(GBL_command_arguments *args, Case convTo) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); i++) {
        char *p              = ARB_strdup(args->input.get(i));
        bool  last_was_alnum = false;

        for (char *pp = p; pp[0]; ++pp) {
            switch (convTo) {
                case LOWER:  pp[0] = tolower(pp[0]); break;
                case UPPER:  pp[0] = toupper(pp[0]); break;
                case CAPS: {
                    bool alnum = isalnum(pp[0]);
                    if (alnum) pp[0] = (last_was_alnum ? tolower : toupper)(pp[0]);
                    last_was_alnum = alnum;
                    break;
                }
                default: gb_assert(0); break;
            }
        }

        PASS_2_OUT(args, p);
    }

    return NULp;
}

static GB_ERROR gbl_caps (GBL_command_arguments *args) { return convert_case(args, CAPS); }
static GB_ERROR gbl_upper(GBL_command_arguments *args) { return convert_case(args, UPPER); }
static GB_ERROR gbl_lower(GBL_command_arguments *args) { return convert_case(args, LOWER); }

static GB_ERROR gbl_head(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "length_of_head");
    int start = atoi(args->get_param(0));
    if (start <= 0) return gbl_mid_streams(args->input, args->output, 1, 0); // empty all streams
    return gbl_mid_streams(args->input, args->output, 0, start-1);
}
static GB_ERROR gbl_tail(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "length_of_tail");
    int end = atoi(args->get_param(0));
    if (end <= 0) return gbl_mid_streams(args->input, args->output, 1, 0); // empty all streams
    return gbl_mid_streams(args->input, args->output, -end, -1);
}

inline GB_ERROR mid(GBL_command_arguments *args, int start_index) {
    EXPECT_PARAMS(args, 2, "start,end");
    return gbl_mid_streams(args->input, args->output, atoi(args->get_param(0))-start_index, atoi(args->get_param(1))-start_index);
}
static GB_ERROR gbl_mid0(GBL_command_arguments *args) { return mid(args, 0); }
static GB_ERROR gbl_mid (GBL_command_arguments *args) { return mid(args, 1); }

static GB_ERROR tab(GBL_command_arguments *args, bool pretab) {
    EXPECT_PARAMS(args, 1, "tabstop");

    int tab = atoi(args->get_param(0));
    for (int i=0; i<args->input.size(); i++) {
        int len = strlen(args->input.get(i));
        if (len >= tab) IN_2_OUT(args, i);
        else {
            char *p = ARB_alloc<char>(tab+1);
            if (pretab) {
                int spaces = tab-len;
                for (int j = 0; j<spaces; ++j) p[j] = ' ';
                strcpy(p+spaces, args->input.get(i));
            }
            else {
                strcpy(p, args->input.get(i));
                for (int j=len; j<tab; j++) p[j] = ' ';
                p[tab] = 0;
            }
            PASS_2_OUT(args, p);
        }
    }
    return NULp;
}
static GB_ERROR gbl_tab   (GBL_command_arguments *args) { return tab(args, false); }
static GB_ERROR gbl_pretab(GBL_command_arguments *args) { return tab(args, true); }

static GB_ERROR gbl_crop(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "\"chars_to_crop\"");

    const char *chars_to_crop = args->get_param(0);
    for (int i=0; i<args->input.size(); i++) {
        const char *s = args->input.get(i);
        while (s[0] && strchr(chars_to_crop, s[0])) s++; // crop at beg of line

        int   len = strlen(s);
        char *p   = ARB_alloc<char>(len+1);
        strcpy(p, s);

        {
            char *pe = p+len-1;

            while (pe >= p && strchr(chars_to_crop, pe[0])) { // crop at end of line
                --pe;
            }
            gb_assert(pe >= (p-1));
            pe[1] = 0;
        }
        PASS_2_OUT(args, p);
    }
    return NULp;
}



static GB_ERROR gbl_cut(GBL_command_arguments *args) {
    EXPECT_PARAMS_PASSED(args, "streamnumber[,streamnumber]+");

    for (int i=0; i<args->param_count(); i++) {
        int stream = atoi(args->get_param(i));
        EXPECT_LEGAL_STREAM_INDEX(args, stream);
        IN_2_OUT(args, bio2info(stream));
    }
    return NULp;
}
static GB_ERROR gbl_drop(GBL_command_arguments *args) {
    EXPECT_PARAMS_PASSED(args, "streamnumber[,streamnumber]+");

    GB_ERROR  error   = NULp;
    bool     *dropped = ARB_alloc<bool>(args->input.size());

    for (int i=0; i<args->input.size(); ++i) dropped[i] = false;

    for (int i=0; i<args->param_count() && !error; ++i) {
        int stream = atoi(args->get_param(i));
        error = check_valid_stream_index(args, stream);
        if (!error) dropped[bio2info(stream)] = true;
    }

    if (!error) {
        for (int i=0; i<args->input.size(); ++i) {
            if (!dropped[i]) IN_2_OUT(args, i);
        }
    }
    free(dropped);

    return error;
}

static GB_ERROR gbl_dropempty(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); ++i) {
        if (args->input.get(i)[0]) { // if non-empty
            IN_2_OUT(args, i);
        }
    }
    return NULp;
}

static GB_ERROR gbl_dropzero(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); ++i) {
        if (atoi(args->input.get(i))) { // if non-zero
            IN_2_OUT(args, i);
        }
    }
    return NULp;
}

static GB_ERROR gbl_swap(GBL_command_arguments *args) {
    EXPECT_OPTIONAL_PARAMS(args, 0, NULp, 2, "streamnumber,streamnumber");

    if (args->input.size()<2) return "need at least two input streams";

    int swap1;
    int swap2;
    if (args->param_count() == 0) {
        swap1 = args->input.size()-1;
        swap2 = args->input.size()-2;
    }
    else {
        gb_assert(args->param_count() == 2);

        swap1 = atoi(args->get_param(0));
        swap2 = atoi(args->get_param(1));

        EXPECT_LEGAL_STREAM_INDEX(args, swap1);
        EXPECT_LEGAL_STREAM_INDEX(args, swap2);

        swap1 = bio2info(swap1);
        swap2 = bio2info(swap2);
    }

    for (int i = 0; i<args->input.size(); ++i) {
        int j = i == swap1 ? swap2 : (i == swap2 ? swap1 : i);
        IN_2_OUT(args, j);
    }

    return NULp;
}

static GB_ERROR backfront_stream(GBL_command_arguments *args, int toback) {
    EXPECT_PARAMS(args, 1, "streamnumber");
    if (args->input.size()<1) return "need at least one input stream";

    int stream_to_move = atoi(args->get_param(0));
    EXPECT_LEGAL_STREAM_INDEX(args, stream_to_move);
    stream_to_move = bio2info(stream_to_move);

    if (!toback) IN_2_OUT(args, stream_to_move);
    for (int i = 0; i<args->input.size(); ++i) {
        if (i != stream_to_move) IN_2_OUT(args, i);
    }
    if (toback) IN_2_OUT(args, stream_to_move);

    return NULp;
}
static GB_ERROR gbl_toback (GBL_command_arguments *args) { return backfront_stream(args, 1); }
static GB_ERROR gbl_tofront(GBL_command_arguments *args) { return backfront_stream(args, 0); }

static GB_ERROR gbl_merge(GBL_command_arguments *args) {
    EXPECT_OPTIONAL_PARAMS(args, 0, NULp, 1, "\"separator\"");
    const char *separator = args->get_optional_param(0, NULp);

    if (args->input.size()) {
        GBS_strstruct str(1000);
        str.cat(args->input.get(0));

        for (int i = 1; i<args->input.size(); ++i) {
            if (separator) str.cat(separator);
            str.cat(args->input.get(i));
        }

        PASS_2_OUT(args, str.release());
    }
    return NULp;
}

static GB_ERROR gbl_split(GBL_command_arguments *args) {
    EXPECT_OPTIONAL_PARAMS_CUSTOM(args, 0, NULp, 2, "\"separator\"[,mode]", true, false);

    const char *separator = args->get_optional_param(0, "\n");
    int split_mode        = atoi(args->get_optional_param(1, "0")); // 0: remove separator, 1: split before separator, 2: split behind separator

    if (!separator[0]) {
        // e.g. happens if trying to specify character ';' or ','
        return "Invalid separator (cannot be empty; please try to quote the parameter)";
    }

    if (split_mode<0 || split_mode>2) return GBS_global_string("Illegal split mode '%i' (valid: 0..2)", split_mode);

    {
        size_t sepLen = strlen(separator);

        for (int i = 0; i<args->input.size(); ++i) {
            const char *in   = args->input.get(i);
            const char *from = in; // search from here

            while (in) {
                const char *splitAt = strstr(from, separator);
                if (splitAt) {
                    size_t  len;
                    char   *copy;

                    if (split_mode == 2) splitAt += sepLen; // split behind separator

                    len  = splitAt-in;
                    copy = ARB_strndup(in, len);

                    PASS_2_OUT(args, copy);

                    in   = splitAt + (split_mode == 0 ? sepLen : 0);
                    from = in+(split_mode == 1 ? sepLen : 0);
                }
                else {
                    COPY_2_OUT(args, in); // last part
                    in = NULp;
                }
            }
        }
    }

    return NULp;
}

static GB_ERROR gbl_colsplit(GBL_command_arguments *args) {
    EXPECT_OPTIONAL_PARAMS(args, 0, NULp, 1, "width");

    int width = atoi(args->get_optional_param(0, "1"));
    if (width<1) return "Invalid width";

    for (int i = 0; i<args->input.size(); ++i) {
        const char *in  = args->input.get(i);
        int         len = strlen(in);

        while (len>0) {
            char *part = ARB_strpartdup(in, in+width-1);
            PASS_2_OUT(args, part);

            in  += width;
            len -= width;
        }
    }

    return NULp;
}
// ----------------------------------
//      Extended string functions

static char *do_extract_words(const char *source, const char *chars, float minlen, bool sort_output) {
    /* extract all words in a text that:
     * if minlen < 1.0 -> contain more than minlen*len_of_text characters that also exists in chars
     * if minlen > 1.0 -> contain more than minlen characters that also exists in chars
     */

    int count   = 0;
    int iminlen = int(minlen+.5);

    char  *s  = ARB_strdup(source);
    char  *f  = s;
    char **ps = ARB_calloc<char*>((strlen(source)>>1) + 1);

    while (char *p = strtok(f, " \t,;:|")) {
        f = NULp;
        int cnt = 0;
        const int len = strlen(p);
        for (char *h=p; *h; h++) {
            if (strchr(chars, *h)) ++cnt;
        }

        if (minlen == 1.0) {
            if (cnt != len) continue;
        }
        else if (minlen > 1.0) {
            if (cnt < iminlen) continue;
        }
        else {
            if (len < 3 || cnt < minlen*len) continue;
        }
        ps[count] = p;
        count ++;
    }

    if (sort_output) {
        GB_sort((void **)ps, 0, count, GB_string_comparator, NULp);
    }

    GBS_strstruct buf(1000);

    for (int cnt = 0; cnt<count; ++cnt) {
        if (cnt) buf.put(' ');
        buf.cat(ps[cnt]);
    }

    free(ps);
    free(s);

    return buf.release_memfriendly();
}

static GB_ERROR gbl_extract_words(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 2, "\"chars\", minchars");

    float len = atof(args->get_param(1));
    for (int i=0; i<args->input.size(); i++) {
        char *res = do_extract_words(args->input.get(i), args->get_param(0), len, 1);
        gb_assert(res);
        PASS_2_OUT(args, res);
    }
    return NULp;
}

static GB_ERROR gbl_extract_sequence(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 2, "\"chars\",minFrequency");

    const char *chars   = args->get_param(0);
    float       minFreq = atof(args->get_param(1));

    if (minFreq <0.0 || minFreq > 1.0) return GBS_global_string("Illegal minFrequency=%f (allowed: ]0.0 .. 1.0[)", minFreq);

    for (int i=0; i<args->input.size(); i++) {
        char *res = do_extract_words(args->input.get(i), chars, minFreq, 0);
        gb_assert(res);
        PASS_2_OUT(args, res);
    }
    return NULp;
}

static GB_ERROR gbl_checksum(GBL_command_arguments *args) {
    GBL_BEGIN_PARAMS;
    GBL_PARAM_STRING(exclude, "exclude=", "", "Remove given characters before calculating");
    GBL_PARAM_BIT   (upper,   "toupper",  0,  "Convert all characters to uppercase before calculating");
    GBL_TRACE_PARAMS(args);
    GBL_END_PARAMS;

    for (int i=0; i<args->input.size(); i++) {
        long id = GBS_checksum(args->input.get(i), upper, exclude);
        FORMAT_2_OUT(args, "%lX", id);
    }
    return NULp;
}

static GB_ERROR gbl_gcgchecksum(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); i++) {
        long id = GBS_gcgchecksum(args->input.get(i));
        FORMAT_2_OUT(args, "%li", id);
    }
    return NULp;
}

// ------------
//      SRT

static GB_ERROR gbl_srt(GBL_command_arguments *args) {
    EXPECT_PARAMS_PASSED(args, "expr[,expr]+");

    GB_ERROR error = NULp;
    for (int i=0; i<args->input.size() && !error; i++) {
        char *modsource = NULp;

        for (int j=0; j<args->param_count() && !error; j++) {
            char *hs = GBS_string_eval_in_env(modsource ? modsource : args->input.get(i), args->get_param(j), args->get_callEnv());

            if (hs) freeset(modsource, hs);
            else {
                error = GB_await_error();
                free(modsource);
            }
        }

        if (!error) {
            if (modsource) PASS_2_OUT(args, modsource);
            else           IN_2_OUT(args, i);
        }
    }
    return error;
}

// -----------------------------
//      Calculator Functions

struct binop_pair {
    int    (*INT)   (int, int);
    double (*DOUBLE)(double, double);
    binop_pair(int (*INT_)(int, int), double (*DOUBLE_)(double, double)) : INT(INT_), DOUBLE(DOUBLE_) {}
};

static char *apply_numeric_binop(const char *arg1, const char *arg2, int (*num_bin_op)(int,int)) {
    int v1     = atoi(arg1);
    int v2     = atoi(arg2);
    int result = num_bin_op(v1, v2);

    return GBS_global_string_copy("%i", result);
}

static char *apply_double_binop(const char *arg1, const char *arg2, double (*num_bin_op)(double,double)) {
    double v1     = strtod(arg1, NULp);
    double v2     = strtod(arg2, NULp);
    double result = num_bin_op(v1, v2);

    return GBS_global_string_copy("%g", result);
}

static char *apply_auto_numeric_binop(const char *arg1, const char *arg2, binop_pair multiop) {
    // argument type detection (int vs double)
    int    i1 = atoi(arg1);
    int    i2 = atoi(arg2);
    double d1 = strtod(arg1, NULp);
    double d2 = strtod(arg2, NULp);

    if (double(i1) == d1 || double(i2) == d2) {
        int result = multiop.INT(i1, i2);
        return GBS_global_string_copy("%i", result);
    }
    else {
        double result = multiop.DOUBLE(d1, d2);
        return GBS_global_string_copy("%g", result);
    }
}



template <typename T> static T binop_plus    (T v1, T v2) { return v1+v2; }
template <typename T> static T binop_minus   (T v1, T v2) { return v1-v2; }
template <typename T> static T binop_mult    (T v1, T v2) { return v1*v2; }
template <typename T> static T binop_div     (T v1, T v2) { return v2 ? v1/v2 : 0; }
template <typename T> static T binop_per_cent(T v1, T v2) { return v2 ? (v1*100)/v2 : 0; }

static int binop_rest(int i1, int i2) { return i2 ? i1%i2 : 0; }


static GB_ERROR gbl_plus     (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_numeric_binop, binop_plus<int>);        }
static GB_ERROR gbl_fplus    (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_double_binop,  binop_plus<double>);     }
static GB_ERROR gbl_minus    (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_numeric_binop, binop_minus<int>);       }
static GB_ERROR gbl_fminus   (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_double_binop,  binop_minus<double>);    }
static GB_ERROR gbl_mult     (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_numeric_binop, binop_mult<int>);        }
static GB_ERROR gbl_fmult    (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_double_binop,  binop_mult<double>);     }
static GB_ERROR gbl_div      (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_numeric_binop, binop_div<int>);         }
static GB_ERROR gbl_fdiv     (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_double_binop,  binop_div<double>);      }
static GB_ERROR gbl_rest     (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_numeric_binop, binop_rest);             }
static GB_ERROR gbl_per_cent (GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_numeric_binop, binop_per_cent<int>);    }
static GB_ERROR gbl_fper_cent(GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_double_binop,  binop_per_cent<double>); }

template <typename T> static T binop_isAbove(T i1, T i2) { return i1>i2; }
template <typename T> static T binop_isBelow(T i1, T i2) { return i1<i2; }
template <typename T> static T binop_isEqual(T i1, T i2) { return i1 == i2; }

static GB_ERROR gbl_isAbove(GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_auto_numeric_binop, binop_pair(binop_isAbove<int>, binop_isAbove<double>)); }
static GB_ERROR gbl_isBelow(GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_auto_numeric_binop, binop_pair(binop_isBelow<int>, binop_isBelow<double>)); }
static GB_ERROR gbl_isEqual(GBL_command_arguments *args) { return gbl_apply_binary_operator(args, apply_auto_numeric_binop, binop_pair(binop_isEqual<int>, binop_isEqual<double>)); }

inline double float_shift_factor(int digits) {
    if (digits<0) {
        return 1.0/float_shift_factor(-digits);
    }
    int factor = 1;
    while (digits>0) { // IRRELEVANT_LOOP (gcc 9.x refuses to optimize)
        factor *= 10;
        --digits;
    }
    return factor;
}

static GB_ERROR gbl_round(GBL_command_arguments *args) {
    EXPECT_PARAMS(args, 1, "digits");
    int digits = atoi(args->get_param(0));

    double factor = float_shift_factor(digits);
    for (int i=0; i<args->input.size(); ++i) {
        double val = strtod(args->input.get(i), NULp);
        val = round(val*factor)/factor;
        FORMAT_2_OUT(args, "%g", val);
    }
    return NULp;
}



// boolean operators

static GB_ERROR gbl_not(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);

    for (int i=0; i<args->input.size(); ++i) {
        const char *s   = args->input.get(i);
        int         val = atoi(s);
        FORMAT_2_OUT(args, "%i", !val);
    }
    return NULp;
}

static GB_ERROR gbl_and(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);
    bool conjunction = true;
    for (int i=0; conjunction && i<args->input.size(); ++i) {
        const char *s = args->input.get(i);
        conjunction = conjunction && atoi(s);
    }
    FORMAT_2_OUT(args, "%i", conjunction);
    return NULp;
}
static GB_ERROR gbl_or(GBL_command_arguments *args) {
    EXPECT_NO_PARAM(args);
    bool disjunction = false;
    for (int i=0; !disjunction && i<args->input.size(); ++i) {
        const char *s = args->input.get(i);
        disjunction = disjunction || atoi(s);
    }
    FORMAT_2_OUT(args, "%i", disjunction);
    return NULp;
}

static GB_ERROR gbl_select(GBL_command_arguments *args) {
    ACCEPT_ANY_PARAMS(args);

    GB_ERROR error = NULp;
    for (int i=0; i<args->input.size() && !error; i++) {
        int paraidx = atoi(args->input.get(i));
        error       = check_valid_param_index(args, paraidx);
        if (!error) {
            char *result = args->get_callEnv().interpret_subcommand("", args->get_param(paraidx)); // @@@ EVALUATED_PARAM (#768)
            if (!result) error = GB_await_error();
            else PASS_2_OUT(args, result);
        }
    }
    return error;
}

static GB_ERROR gbl_readdb(GBL_command_arguments *args) {
    EXPECT_PARAMS_PASSED(args, "fieldname[,fieldname]+");
    EXPECT_ITEM_REFERENCED(args);
    COMMAND_DROPS_INPUT_STREAMS(args);

    GBS_strstruct buf(1024);
    for (int i=0; i<args->param_count(); i++) {
        char *val = GBT_read_as_string(args->get_item_ref(),
                                       args->track_field_access(args->get_param(i)));
        if (val) {
            buf.cat(val);
            free(val);
        }
    }
    PASS_2_OUT(args, buf.release());
    return NULp;
}


enum GBT_ITEM_TYPE {
    GBT_ITEM_UNKNOWN,
    GBT_ITEM_SPECIES,
    GBT_ITEM_GENE
};

static GBT_ITEM_TYPE identify_gb_item(GBDATA *gb_item) {
    /* returns: GBT_ITEM_UNKNOWN    -> unknown database_item
     *          GBT_ITEM_SPECIES    -> /species_data/species
     *          GBT_ITEM_GENE       -> /species_data/species/gene_data/gene */

    GBT_ITEM_TYPE res = GBT_ITEM_UNKNOWN;
    if (gb_item) {
        GBDATA *gb_father = GB_get_father(gb_item);
        if (gb_father) {
            const char *key = GB_KEY(gb_item);

            if (strcmp(key, "species")                    == 0 &&
                strcmp(GB_KEY(gb_father), "species_data") == 0) {
                res = GBT_ITEM_SPECIES;
            }
            else if (strcmp(key, "gene")                   == 0 &&
                strcmp(GB_KEY(gb_father), "gene_data")     == 0 &&
                identify_gb_item(GB_get_father(gb_father)) == GBT_ITEM_SPECIES) {
                res = GBT_ITEM_GENE;
            }
        }
    }
    return res;
}

// --------------------------------------------------------------------------------
// taxonomy caching

#if defined(DEBUG)
// #define DUMP_TAXONOMY_CACHING
#endif


#define GROUP_COUNT_CHARS 6                         // characters in taxonomy-key reserved for group-counter (hex number)
#define BITS_PER_HEXCHAR  4
#define MAX_GROUPS        (1 << (GROUP_COUNT_CHARS*BITS_PER_HEXCHAR)) // resulting number of groups

struct cached_taxonomy {
    char    *tree_name;         // tree for which taxonomy is cached here
    int      groups;            // number of named groups in tree (at time of caching)
    GB_HASH *taxonomy; /* keys: "!species", ">XXXXgroup" and "<root>".
                        * Species and groups contain their first parent (i.e. '>XXXXgroup' or '<root>').
                        * Species not in hash are not members of tree.
                        * The 'XXXX' in groupname is simply a counter to avoid multiple groups with same name.
                        * The group-db-entries are stored in hash as pointers ('>>%p') and
                        * point to their own group entry ('>XXXXgroup')
                        *
                        * Note: the number of 'X's in 'XXXX' above is defined by GROUP_COUNT_CHARS!
                        */
};

static void free_cached_taxonomy(cached_taxonomy *ct) {
    free(ct->tree_name);
    GBS_free_hash(ct->taxonomy);
    free(ct);
}

static void build_taxonomy_rek(TreeNode *node, GB_HASH *tax_hash, const char *parent_group, int *group_counter) {
    if (node->is_leaf()) {
        GBDATA *gb_species = node->gb_node;
        if (gb_species) { // not zombie
            const char *name = GBT_get_name(gb_species);
            if (name) GBS_write_hash(tax_hash, GBS_global_string("!%s", name), (long)ARB_strdup(parent_group));
        }
    }
    else {
        if (node->has_group_info()) { // node with name
            char       *hash_entry;
            const char *hash_binary_entry;
            (*group_counter)++;

            gb_assert((*group_counter)<MAX_GROUPS); // overflow - increase GROUP_COUNT_CHARS

            TreeNode *keelTarget  = node->keelTarget();
            char keelIndicator[2] = { char(keelTarget ? KEELED_INDICATOR : 0), 0 };

            hash_entry = GBS_global_string_copy(">%0*x%s%s",
                                                GROUP_COUNT_CHARS, *group_counter,
                                                keelIndicator,
                                                node->name);
            GBS_write_hash(tax_hash, hash_entry, (long)ARB_strdup(parent_group));

            hash_binary_entry = GBS_global_string(">>%p", node->gb_node);
            GBS_write_hash(tax_hash, hash_binary_entry, (long)ARB_strdup(hash_entry));

            if (keelTarget) { // keeled group (projected to son)
                if (keelTarget->is_leftson()) {
                    build_taxonomy_rek(node->get_leftson(),  tax_hash, hash_entry,   group_counter); // pass down hash_entry only to keelTarget
                    build_taxonomy_rek(node->get_rightson(), tax_hash, parent_group, group_counter);
                }
                else {
                    build_taxonomy_rek(node->get_leftson(),  tax_hash, parent_group, group_counter);
                    build_taxonomy_rek(node->get_rightson(), tax_hash, hash_entry,   group_counter);
                }
            }
            else { // normal group
                build_taxonomy_rek(node->get_leftson(),  tax_hash, hash_entry, group_counter); // pass down hash_entry to both sons
                build_taxonomy_rek(node->get_rightson(), tax_hash, hash_entry, group_counter);
            }

            free(hash_entry);
        }
        else {
            build_taxonomy_rek(node->get_leftson(),  tax_hash, parent_group, group_counter);
            build_taxonomy_rek(node->get_rightson(), tax_hash, parent_group, group_counter);
        }
    }
}

static GB_HASH *cached_taxonomies = NULp;

static bool is_cached_taxonomy(const char */*key*/, long val, void *cl_ct) {
    cached_taxonomy *ct1 = (cached_taxonomy *)val;
    cached_taxonomy *ct2 = (cached_taxonomy *)cl_ct;

    return ct1 == ct2;
}

static const char *tree_of_cached_taxonomy(cached_taxonomy *ct) {
    /* search the hash to find the correct cached taxonomy.
     * searching for tree name does not work, because the tree possibly already was deleted
     */
    const char *tree = GBS_hash_next_element_that(cached_taxonomies, NULp, is_cached_taxonomy, ct);
#ifdef DUMP_TAXONOMY_CACHING
    if (tree) printf("tree_of_cached_taxonomy: tree='%s' ct->tree_name='%s'\n", tree, ct->tree_name);
#endif // DUMP_TAXONOMY_CACHING
    return tree;
}

static void flush_taxonomy_cb(GBDATA *gbd, cached_taxonomy *ct) {
    /* this cb is bound all tree db members below "/tree_data/tree_xxx" which
     * may have an effect on the displayed taxonomy
     * it invalidates cached taxonomies for that tree (when changed or deleted)
     */

    GB_ERROR    error = NULp;
    const char *found = tree_of_cached_taxonomy(ct);

    if (found) {
#ifdef DUMP_TAXONOMY_CACHING
        fprintf(stderr, "Deleting cached taxonomy ct=%p (tree='%s')\n", ct, found);
#endif // DUMP_TAXONOMY_CACHING
        GBS_write_hash(cached_taxonomies, found, 0); // delete cached taxonomy from hash
        free_cached_taxonomy(ct);
    }
#ifdef DUMP_TAXONOMY_CACHING
    else {
        fprintf(stderr, "No tree found for cached_taxonomies ct=%p (already deleted?)\n", ct);
    }
#endif // DUMP_TAXONOMY_CACHING

    if (!GB_inside_callback(gbd, GB_CB_DELETE)) {
        GB_remove_all_callbacks_to(gbd, GB_CB_CHANGED_OR_DELETED, CASTSIG(GB_CB, flush_taxonomy_cb));
    }

    if (found && !error) {
        GBDATA *gb_main = GB_get_gb_main_during_cb();
        if (gb_main) {
            GBDATA *gb_tree_refresh = GB_search(gb_main, AWAR_TREE_REFRESH, GB_INT);
            if (!gb_tree_refresh) {
                error = GBS_global_string("%s (while trying to force refresh)", GB_await_error());
            }
            else {
                GB_touch(gb_tree_refresh); // Note : force tree update
            }
        }
    }

    if (error) {
        fprintf(stderr, "Error in flush_taxonomy_cb: %s\n", error);
    }
}

static void flush_taxonomy_if_new_group_cb(GBDATA *gb_tree, cached_taxonomy *ct) {
    // detects the creation of new groups and call flush_taxonomy_cb() manually
#ifdef DUMP_TAXONOMY_CACHING
    fputs("flush_taxonomy_if_new_group_cb() has been called\n", stderr);
#endif // DUMP_TAXONOMY_CACHING

    const char *tree_name = tree_of_cached_taxonomy(ct);
    if (tree_name) {
        int     groups = 0;
        GBDATA *gb_group_node;

        for (gb_group_node = GB_entry(gb_tree, "node");
             gb_group_node;
             gb_group_node = GB_nextEntry(gb_group_node))
        {
            if (GB_entry(gb_group_node, "group_name")) {
                groups++; // count named groups only
            }
        }

#ifdef DUMP_TAXONOMY_CACHING
        fprintf(stderr, "cached_groups=%i  counted_groups=%i\n", ct->groups, groups);
#endif // DUMP_TAXONOMY_CACHING
        if (groups != ct->groups) {
#ifdef DUMP_TAXONOMY_CACHING
            fprintf(stderr, "Number of groups changed -> invoking flush_taxonomy_cb() manually\n");
#endif // DUMP_TAXONOMY_CACHING
            flush_taxonomy_cb(gb_tree, ct);
        }
    }
#ifdef DUMP_TAXONOMY_CACHING
    else {
        fprintf(stderr, "cached taxonomy no longer valid.\n");
    }
#endif // DUMP_TAXONOMY_CACHING
}

static cached_taxonomy *get_cached_taxonomy(GBDATA *gb_main, const char *tree_name, GB_ERROR *error) {
    long cached;
    *error = NULp;
    if (!cached_taxonomies) {
        cached_taxonomies = GBS_create_hash(20, GB_IGNORE_CASE);
    }
    cached = GBS_read_hash(cached_taxonomies, tree_name);
    if (!cached) {
        TreeNode *tree    = GBT_read_tree(gb_main, tree_name, new SimpleRoot);
        if (!tree) *error = GB_await_error();
        else     *error   = GBT_link_tree(tree, gb_main, false, NULp, NULp);

        if (!*error) {
            GBDATA *gb_tree = GBT_find_tree(gb_main, tree_name);
            if (!gb_tree) {
                *error = GBS_global_string("Can't find tree '%s'", tree_name);
            }
            else {
                cached_taxonomy *ct            = ARB_alloc<cached_taxonomy>(1);
                long             nodes         = GBT_count_leafs(tree);
                int              group_counter = 0;

                ct->tree_name = ARB_strdup(tree_name);
                ct->taxonomy  = GBS_create_dynaval_hash(int(nodes), GB_IGNORE_CASE, GBS_dynaval_free);
                ct->groups    = 0; // counted below

                build_taxonomy_rek(tree, ct->taxonomy, "<root>", &group_counter);
                cached = (long)ct;
                GBS_write_hash(cached_taxonomies, tree_name, (long)ct);

                GB_remove_all_callbacks_to(gb_tree, GB_CB_SON_CREATED, CASTSIG(GB_CB, flush_taxonomy_if_new_group_cb));
                GB_add_callback(gb_tree, GB_CB_SON_CREATED, makeDatabaseCallback(flush_taxonomy_if_new_group_cb, ct));

                {
                    GBDATA *gb_tree_entry = GB_entry(gb_tree, "tree");
                    GBDATA *gb_group_node;

                    if (gb_tree_entry) {
                        GB_remove_all_callbacks_to(gb_tree_entry, GB_CB_CHANGED_OR_DELETED, CASTSIG(GB_CB, flush_taxonomy_cb));
                        GB_add_callback(gb_tree_entry, GB_CB_CHANGED_OR_DELETED, makeDatabaseCallback(flush_taxonomy_cb, ct));
                    }

                    // add callbacks for all node/group_name subentries
                    for (gb_group_node = GB_entry(gb_tree, "node");
                         gb_group_node;
                         gb_group_node = GB_nextEntry(gb_group_node))
                    {
                        GBDATA *gb_group_name = GB_entry(gb_group_node, "group_name");
                        if (gb_group_name) { // group with id = 0 has no name
                            GB_remove_all_callbacks_to(gb_group_name, GB_CB_CHANGED_OR_DELETED, CASTSIG(GB_CB, flush_taxonomy_cb));
                            GB_add_callback(gb_group_name, GB_CB_CHANGED_OR_DELETED, makeDatabaseCallback(flush_taxonomy_cb, ct));
                            ct->groups++;
                        }
                    }
                }
#ifdef DUMP_TAXONOMY_CACHING
                fprintf(stderr, "Created taxonomy hash for '%s' (ct=%p)\n", tree_name, ct);
#endif // DUMP_TAXONOMY_CACHING
            }
        }

        destroy(tree);
    }

    if (!*error) {
        cached_taxonomy *ct = (cached_taxonomy*)cached;
        gb_assert(ct);
        return ct;
    }

    return NULp;
}

static char *get_taxonomy_string(GB_HASH *tax_hash, const char *group_key, int depth, GB_ERROR *error) {
    long  found;
    char *result = NULp;

    gb_assert(depth>0);
    gb_assert(!(group_key[0] == '>' && group_key[1] == '>')); // internal group-pointers not allowed here!

    found = GBS_read_hash(tax_hash, group_key);
    if (found) {
        const char *parent_group_key            = (const char *)found;
        if (strcmp(parent_group_key, "<root>") == 0) { // root reached
            result = ARB_strdup(group_key+(GROUP_COUNT_CHARS+1)); // return own group name
        }
        else {
            if (depth>1) {
                char *parent_name = get_taxonomy_string(tax_hash, parent_group_key, depth-1, error);
                if (parent_name) {
                    result = GBS_global_string_copy("%s/%s", parent_name, group_key+(GROUP_COUNT_CHARS+1));
                    free(parent_name);
                }
                else {
                    *error = GBS_global_string("In get_taxonomy_string(%s): %s", group_key, *error);
                    result = NULp;
                }
            }
            else {
                result = ARB_strdup(group_key+(GROUP_COUNT_CHARS+1)); // return own group name
            }
        }
    }
    else {
        *error = GBS_global_string("Not in tax_hash: '%s'", group_key);
    }
    return result;
}

static const char *get_taxonomy(GBDATA *gb_species_or_group, const char *tree_name, bool is_current_tree, int depth, GB_ERROR *error) {
    GBDATA          *gb_main = GB_get_root(gb_species_or_group);
    cached_taxonomy *tax     = get_cached_taxonomy(gb_main, tree_name, error);
    const char      *result  = NULp;

    if (tax) {
        GBDATA *gb_name       = GB_entry(gb_species_or_group, "name");
        GBDATA *gb_group_name = GB_entry(gb_species_or_group, "group_name");

        if (gb_name && !gb_group_name) { // it's a species
            char *name = GB_read_string(gb_name);
            if (name) {
                GB_HASH *tax_hash = tax->taxonomy;
                long     found    = GBS_read_hash(tax_hash, GBS_global_string("!%s", name));

                if (found) {
                    const char *parent_group = (const char *)found;

                    if (strcmp(parent_group, "<root>") == 0) {
                        result = ""; // not member of any group
                    }
                    else {
                        static char *parent = NULp;

                        freeset(parent, get_taxonomy_string(tax_hash, parent_group, depth, error));
                        result = parent;
                    }
                }
                else {
                    result = GBS_global_string("Species '%s' not in '%s'", name, tree_name);
                }
                free(name);
            }
            else {
                *error = GBS_global_string("Species without 'name' entry!");
            }
        }
        else if (gb_group_name && !gb_name) { // it's a group
            char *group_name = GB_read_string(gb_group_name);
            if (group_name) {
                if (is_current_tree) {
                    GB_HASH *tax_hash = tax->taxonomy;
                    long     found    = GBS_read_hash(tax_hash, GBS_global_string(">>%p", gb_species_or_group));

                    if (found) {
                        static char *full_group = NULp;
                        const char  *group_id   = (const char *)found;

                        freeset(full_group, get_taxonomy_string(tax_hash, group_id, depth, error));
                        result = full_group;
                    }
                    else {
                        result = GBS_global_string("Group '%s' not in '%s'", group_name, tree_name);
                    }
                }
                else {
                    *error = "It's not possible to specify the tree name in taxonomy() for groups";
                }
                free(group_name);
            }
            else {
                *error = "Group without 'group_name' entry";
            }
        }
        else if (gb_group_name) {
            *error = "Container has 'name' and 'group_name' entry - can't detect container type";
        }
        else {
            *error = "Container has neither 'name' nor 'group_name' entry - can't detect container type";
        }
    }

    return result;
}

static GB_ERROR gbl_taxonomy(GBL_command_arguments *args) {
    GB_ERROR error = check_optional_parameters(args, 1, "count", 1, "tree_name", false, true);
    if (!error) {
        EXPECT_ITEM_REFERENCED(args);
        COMMAND_DROPS_INPUT_STREAMS(args);

        char *tree_name       = NULp;
        bool  is_current_tree = false;
        int   depth           = -1;
        char *result          = NULp;

        if (args->param_count() == 1) {   // only 'depth'
            if (!args->get_treename()) {
                result = ARB_strdup("No default tree");
            }
            else {
                tree_name = ARB_strdup(args->get_treename());
                depth = atoi(args->get_param(0));
                is_current_tree = true;
            }
        }
        else { // 'tree_name', 'depth'
            tree_name = ARB_strdup(args->get_param(0));
            depth     = atoi(args->get_param(1));
        }

        if (!result) {
            if (depth<1) {
                error = GBS_global_string("Illegal depth '%i' (allowed 1..n)", depth);
            }
            if (!error) {
                const char *taxonomy_string = get_taxonomy(args->get_item_ref(), tree_name, is_current_tree, depth, &error);
                if (taxonomy_string) result = ARB_strdup(taxonomy_string);
            }
        }

        gb_assert(contradicted(result, error));
        if (result) PASS_2_OUT(args, result);
        free(tree_name);
    }
    return error;
}

static GB_ERROR gbl_sequence(GBL_command_arguments *args) {
    EXPECT_ITEM_REFERENCED(args);
    COMMAND_DROPS_INPUT_STREAMS(args);

    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        switch (identify_gb_item(args->get_item_ref())) {
            case GBT_ITEM_UNKNOWN: {
                error = "'sequence' used for unknown item";
                break;
            }
            case GBT_ITEM_SPECIES: {
                char *use = GBT_get_default_alignment(args->get_gb_main());

                if (!use) {
                    error = GB_await_error();
                }
                else {
                    GBDATA *gb_seq = GBT_find_sequence(args->get_item_ref(), use);

                    if (gb_seq) PASS_2_OUT(args, GB_read_string(gb_seq));
                    else        COPY_2_OUT(args, ""); // if current alignment does not exist -> return empty string

                    free(use);
                }
                break;
            }
            case GBT_ITEM_GENE: {
                char *seq = GBT_read_gene_sequence(args->get_item_ref(), true, 0);

                if (!seq) error = GB_await_error();
                else PASS_2_OUT(args, seq);

                break;
            }
        }
    }
    return error;
}

static GB_ERROR gbl_export_sequence(GBL_command_arguments *args) {
    EXPECT_ITEM_REFERENCED(args);
    COMMAND_DROPS_INPUT_STREAMS(args);

    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        switch (identify_gb_item(args->get_item_ref())) {
            case GBT_ITEM_UNKNOWN: {
                error = "'export_sequence' used for unknown item";
                break;
            }
            case GBT_ITEM_SPECIES: {
                if (!get_export_sequence) {
                    error = "No export-sequence-hook defined (can't use 'export_sequence' here)";
                }
                else {
                    size_t      len;
                    const char *seq = get_export_sequence(args->get_item_ref(), &len, &error);

                    gb_assert(error || seq);

                    if (seq) PASS_2_OUT(args, ARB_strduplen(seq, len));
                }
                break;
            }
            case GBT_ITEM_GENE: {
                error = "'export_sequence' cannot be used for gene";
                break;
            }
        }
    }
    return error;
}

static GB_ERROR gbl_ali_name(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);

    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        GBDATA *gb_main = args->get_gb_main();
        char   *use     = GBT_get_default_alignment(gb_main);
        if (!use) error = GB_await_error();
        else      PASS_2_OUT(args, use);
    }
    return error;
}

static GB_ERROR gbl_sequence_type(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);

    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        GBDATA *gb_main = args->get_gb_main();
        char   *use     = GBT_get_default_alignment(gb_main);
        if (!use) error = GB_await_error();
        else      PASS_2_OUT(args, GBT_get_alignment_type_string(gb_main, use));
        free(use);
    }

    return error;
}

static GB_ERROR format(GBL_command_arguments *args, bool simple_format) {
    // simple_format: true = "format", false="format_sequence"

    GB_ERROR error = NULp;
    int      ic;

    GBL_BEGIN_PARAMS;
    GBL_PARAM_INT(firsttab, "firsttab=", 10, "Indent first line");
    GBL_PARAM_INT(tab,      "tab=",      10, "Indent not first line");
    GBL_PARAM_INT(width,    "width=",    50, "Sequence width (bases only)");

    // "format_sequence"-only
    GBL_PARAM_BIT (numleft,  PARAM_IF(!simple_format, "numleft"),  0,  "Numbers left of sequence");
    GBL_PARAM_INT (numright, PARAM_IF(!simple_format, "numright="), 0, "Numbers right of sequence (specifies width; -1 -> auto-width)");
    GBL_PARAM_UINT(gap,      PARAM_IF(!simple_format, "gap="),     10, "Insert ' ' every n sequence characters");

    // "format"-only
    GBL_PARAM_STRING(nl,      PARAM_IF(simple_format, "nl="),      " ",  "Break line at characters 'str' if wrapping needed");
    GBL_PARAM_STRING(forcenl, PARAM_IF(simple_format, "forcenl="), "\n", "Always break line at characters 'str'");

    GBL_TRACE_PARAMS(args);
    GBL_END_PARAMS;

    if (tab      < 0) tab = 0;
    if (firsttab < 0) firsttab = 0;

    if (width == 0)               return "Illegal zero width";
    if (numleft && numright != 0) return "You may only specify 'numleft' OR 'numright',  not both.";

    if (gap<1) gap = UINT_MAX;

    for (ic = 0; ic<args->input.size(); ++ic) {
        const char *src           = args->input.get(ic);
        size_t      data_size     = strlen(src);
        size_t      needed_size;
        size_t      line_size;
        int         numright_used = numright;

        if (numright_used<0) {
            numright_used = calc_digits(data_size);
        }

        {
            size_t lines;

            if (simple_format) {
                lines     = data_size/2 + 1; // worst case
                line_size = tab + (width>0 ? width : data_size) + 1;
            }
            else {
                size_t gapsPerLine = (width-1)/gap;
                lines              = data_size/width+1;
                line_size          = tab + width + gapsPerLine + 1;

                if (numright_used) {
                    // add space for numright
                    line_size += numright_used+1; // plus space
                }
            }

            needed_size = lines*line_size + firsttab + 1 + 10;
        }

        char *result = ARB_alloc<char>(needed_size);
        if (!result) {
            error = GBS_global_string("Out of memory (tried to alloc %zu bytes)", needed_size);
        }
        else {
            char   *dst       = result;
            size_t  rest_data = data_size;

            if (simple_format) {
                /* format string w/o gaps or numleft
                 * does word-wrapping at chars in nl
                 */

                // build wrap table
                unsigned char isWrapChar[256];
                memset(isWrapChar, 0, sizeof(isWrapChar));
                for (int i = 0; nl[i]; ++i) isWrapChar[(unsigned char)nl[i]] = 1;
                for (int i = 0; forcenl[i]; ++i) isWrapChar[(unsigned char)forcenl[i]] = 2;

                if (firsttab>0) {
                    memset(dst, ' ', firsttab);
                    dst += firsttab;
                }

                while (width>0 && rest_data>unsigned(width)) {
                    int take;
                    int move;
                    int took;

                    for (take = width; take > 0; --take) {
                        if (isWrapChar[(unsigned char)src[take]]) break;
                    }
                    if (take <= 0) { // no wrap character found -> hard wrap at width
                        take  = move = width;
                    }
                    else { // soft wrap at last found wrap character
                        move = take+1;
                    }

                    for (took = 0; took<take; took++) {
                        char c = src[took];
                        if (isWrapChar[(unsigned char)c] == 2) { // forced newline
                            take = took;
                            move = take+1;
                            break;
                        }
                        dst[took] = c;
                    }

                    dst       += take;
                    src       += move;
                    rest_data -= move;

                    if (rest_data>0) {
                        *dst++ = '\n';
                        if (tab>0) {
                            memset(dst, ' ', tab);
                            dst += tab;
                        }
                    }
                }

                if (rest_data>0) {
                    size_t j, k;
                    for (j = 0, k = 0; j<rest_data; ++j) {
                        char c = src[j];

                        if (isWrapChar[(unsigned char)c] == 2) {
                            dst[k++] = '\n';
                            if (tab>0) {
                                memset(dst+k, ' ', tab);
                                k += tab;
                            }
                        }
                        else {
                            dst[k++] = c;
                        }
                    }
                    src       += j;
                    dst       += k;
                    rest_data  = 0;
                }
            }
            else {
                // "format_sequence" with gaps and numleft
                char       *format        = NULp;
                const char *src_start     = src;
                const char *dst_linestart = dst;

                if (numleft) {
                    /* Warning: Be very careful, when you change format strings here!
                     * currently all format strings result in '%u' or '%-##u' (where # are digits)
                     */
                    if (firsttab>0) {
                        char *firstFormat = GBS_global_string_copy("%%-%iu ", firsttab-1);
                        dst += sprintf(dst, firstFormat, (unsigned)1);
                        free(firstFormat);
                    }
                    else {
                        dst += sprintf(dst, "%u ", (unsigned)1);
                    }
                    format = tab>0 ? GBS_global_string_copy("%%-%iu ", tab-1) : ARB_strdup("%u ");
                }
                else if (firsttab>0) {
                    memset(dst, ' ', firsttab);
                    dst += firsttab;
                }

                while (rest_data>0) {
                    size_t take = (width>0 && rest_data>unsigned(width)) ? width : rest_data;

                    rest_data -= take;

                    while (take>gap) {
                        memcpy(dst, src, gap);
                        dst  += gap;
                        src  += gap;
                        *dst++ = ' ';
                        take -= gap;
                    }

                    memcpy(dst, src, take);
                    dst += take;
                    src += take;

                    if (numright_used) {
                        if (rest_data) *dst++ = ' ';
                        else {
                            // fill in missing spaces for proper alignment of numright
                            size_t currSize = dst-dst_linestart;
                            size_t wantSize = line_size-numright_used-1;
                            if (currSize<wantSize) {
                                size_t spaces  = wantSize-currSize;
                                memset(dst, ' ', spaces);
                                dst           += spaces;
                            }
                        }
                        unsigned int num  = (src-src_start);
                        dst              += sprintf(dst, "%*u", numright_used, num);
                    }

                    if (rest_data>0) {
                        *dst++ = '\n';
                        dst_linestart = dst;
                        if (numleft) {
                            unsigned int num  = (src-src_start)+1; // this goes to the '%u' (see comment above)
                            dst              += sprintf(dst, format, num);
                        }
                        else if (tab>0) {
                            memset(dst, ' ', tab);
                            dst += tab;
                        }
                    }
                }

                free(format);
            }

            *dst++ = 0;         // close str

#if defined(DEBUG)
            { // check for array overflow
                size_t used_size = dst-result;
                gb_assert(used_size <= needed_size);
                ARB_realloc(result, used_size);
            }
#endif // DEBUG
        }

        if (!error) PASS_2_OUT(args, result);
        else free(result);
    }
    return error;
}

static GB_ERROR gbl_format         (GBL_command_arguments *args) { return format(args, true); }
static GB_ERROR gbl_format_sequence(GBL_command_arguments *args) { return format(args, false); }


static char *gbl_read_seq_sai_or_species(GBDATA *gb_main, const char *species, const char *sai, const char *ali, size_t *seqLen) {
    /* Reads the alignment 'ali'  of 'species' or 'sai'.
     * If 'ali' is NULp, use default alignment.
     * Returns NULp in case of error (which is exported then)
     */

    char     *seq   = NULp;
    GB_ERROR  error = NULp;

    int sources = !!species + !!sai;
    if (sources != 1) {
        error = "Either parameters 'species' or 'SAI' must be specified";
    }
    else {
        GBDATA     *gb_item = NULp;
        const char *what    = NULp;
        const char *name    = NULp;

        if (species) {
            gb_item = GBT_find_species(gb_main, species);
            what    = "species";
            name    = species;
        }
        else {
            gb_item = GBT_find_SAI(gb_main, sai);
            what    = "SAI";
            name    = sai;
        }

        if (!gb_item) error = GBS_global_string("Can't find %s '%s'", what, name);
        else {
            char *freeMe = NULp;

            if (!ali) {
                ali = freeMe = GBT_get_default_alignment(gb_main);
                if (!ali) error = GB_await_error();
            }

            if (ali) {
                GBDATA *gb_ali = GB_entry(gb_item, ali);

                if (gb_ali) {
                    GBDATA *gb_seq;

                    for (gb_seq = GB_child(gb_ali); gb_seq; gb_seq = GB_nextChild(gb_seq)) {
                        long type = GB_read_type(gb_seq);
                        if (type == GB_BITS) {
                            seq     = GB_read_bits(gb_seq, '-', '+');
                            if (seqLen) *seqLen = GB_read_bits_count(gb_seq);
                            break;
                        }
                        if (type == GB_STRING) {
                            seq     = GB_read_string(gb_seq);
                            if (seqLen) *seqLen = GB_read_string_count(gb_seq);
                            break;
                        }
                    }
                }

                if (!seq) error = GBS_global_string("%s '%s' has no (usable) data in alignment '%s'", what, name, ali);
            }
            free(freeMe);
        }
    }

    if (error) {
        gb_assert(!seq);
        GB_export_error(error);
    }

    return seq;
}

struct common_filter_params {
    const char *align;
    const char *sai;
    const char *species;
    int         first;
    int         pairwise;
};

#define GBL_COMMON_FILTER_PARAMS                                                                                        \
    common_filter_params common_param;                                                                                  \
    GBL_STRUCT_PARAM_STRING(common_param, align,    "align=",    NULp, "alignment to use (defaults to default alignment)"); \
    GBL_STRUCT_PARAM_STRING(common_param, sai,      "SAI=",      NULp, "Use default sequence of given SAI as a filter"); \
    GBL_STRUCT_PARAM_STRING(common_param, species,  "species=",  NULp, "Use default sequence of given species as a filter"); \
    GBL_STRUCT_PARAM_BIT   (common_param, first,    "first=",    0,    "Use 1st stream as filter for other streams");   \
    GBL_STRUCT_PARAM_BIT   (common_param, pairwise, "pairwise=", 0,    "Use 1st stream as filter for 2nd, 3rd for 4th, ...")

typedef char* (*filter_fun)(const char *seq, const char *filter, size_t flen, void *param);
/* Note:
 * filter_fun has to return a heap copy of the filter-result.
 * if 'flen' != 0, it contains the length of 'filter'
 * 'param' may be any client data
 */

static GB_ERROR apply_filters(GBL_command_arguments *args, common_filter_params *common, filter_fun filter_one, void *param) {
    GB_ERROR error = NULp;

    if (args->input.size()==0) error = "No input stream";
    else {
        int methodCount = !!common->sai + !!common->species + !!common->pairwise + !!common->first;

        if (methodCount != 1) error = "Need exactly one of the parameters 'SAI', 'species', 'pairwise' or 'first'";
        else {
            if (common->pairwise) {
                if (args->input.size() % 2) error = "Using 'pairwise' requires an even number of input streams";
                else {
                    int i;
                    for (i = 1; i<args->input.size(); i += 2) {
                        PASS_2_OUT(args, filter_one(args->input.get(i), args->input.get(i-1), 0, param));
                    }
                }
            }
            else {
                int     i      = 0;
                char   *filter = NULp;
                size_t  flen   = 0;

                if (common->first) {
                    if (args->input.size()<2) error = "Using 'first' needs at least 2 input streams";
                    else {
                        const char *in = args->input.get(i++);
                        gb_assert(in);

                        flen   = strlen(in);
                        filter = ARB_strduplen(in, flen);
                    }
                }
                else {
                    filter = gbl_read_seq_sai_or_species(args->get_gb_main(), common->species, common->sai, common->align, &flen);
                    if (!filter) error = GB_await_error();
                }

                gb_assert(filter || error);
                if (filter) {
                    for (; i<args->input.size(); ++i) {
                        PASS_2_OUT(args, filter_one(args->input.get(i), filter, flen, param));
                    }
                }
                free(filter);
            }
        }
    }
    return error;
}

// -------------------------
//      calculate diff

struct diff_params {
    char equalC;
    char diffC;
};
static char *calc_diff(const char *seq, const char *filter, size_t /*flen*/, void *paramP) {
    // filters 'seq' through 'filter'
    // - replace all equal     positions by 'equal_char' (if != 0)
    // - replace all differing positions by 'diff_char'  (if != 0)

    diff_params *param      = (diff_params*)paramP;
    char         equal_char = param->equalC;
    char         diff_char  = param->diffC;

    char *result = ARB_strdup(seq);
    int   p;

    for (p = 0; result[p] && filter[p]; ++p) {
        if (result[p] == filter[p]) {
            if (equal_char) result[p] = equal_char;
        }
        else {
            if (diff_char) result[p] = diff_char;
        }
    }

    // if 'seq' is longer than 'filter' and diff_char is given
    // -> fill rest of 'result' with 'diff_char'
    if (diff_char) {
        for (; result[p]; ++p) {
            result[p] = diff_char;
        }
    }

    return result;
}
static GB_ERROR gbl_diff(GBL_command_arguments *args) {
    GBL_BEGIN_PARAMS;
    GBL_COMMON_FILTER_PARAMS;

    diff_params param;
    GBL_STRUCT_PARAM_CHAR(param, equalC,   "equal=",    '.', "symbol for equal characters");
    GBL_STRUCT_PARAM_CHAR(param, diffC,    "differ=",   0,   "symbol for diff characters (default: use char from input stream)");

    GBL_TRACE_PARAMS(args);
    GBL_END_PARAMS;

    return apply_filters(args, &common_param, calc_diff, &param);
}

// -------------------------
//      standard filter

enum filter_function { FP_FILTER, FP_MODIFY };

struct filter_params { // used by gbl_filter and gbl_change_gc
    filter_function function;

    const char *include;
    const char *exclude;

    // FP_MODIFY only:
    int         change_pc;
    const char *change_to;
};

static char *filter_seq(const char *seq, const char *filter, size_t flen, void *paramP) {
    filter_params *param = (filter_params*)paramP;

    size_t slen     = strlen(seq);
    if (!flen) flen = strlen(filter);
    size_t mlen     = slen<flen ? slen : flen;

    GBS_strstruct out(mlen+1); // +1 to avoid invalid, zero-length buffer

    const char *charset;
    int         include;

    if (param->include) {
        charset = param->include;
        include = 1;
    }
    else {
        gb_assert(param->exclude);
        charset = param->exclude;
        include = 0;
    }

    size_t pos  = 0;
    size_t rest = slen;
    size_t ctl  = 0;
    if (param->function == FP_MODIFY) ctl  = strlen(param->change_to);

    int inset = 1; // 1 -> check chars in charset, 0 -> check chars NOT in charset
    while (rest) {
        size_t count;
        if (pos >= flen) {      // behind filter
            // trigger last loop
            count = rest;
            inset = 0; // if 'include' -> 'applies' will get false, otherwise true
                       // (meaning is: behind filter nothing can match 'include' or 'exclude')
        }
        else {
            count = (inset ? strspn : strcspn)(filter+pos, charset); // count how many chars are 'inset'
        }
        if (count) {
            int applies = !!include == !!inset; // true -> 'filter' matches 'include' or doesn't match 'exclude'
            if (count>rest) count = rest;

            switch (param->function) {
                case FP_FILTER:
                    if (applies) out.ncat(seq+pos, count);
                    break;

                case FP_MODIFY:
                    if (applies) { // then modify
                        size_t i;
                        for (i = 0; i<count; i++) {
                            char c                                               = seq[pos+i];
                            if (isalpha(c) && GB_random(100)<param->change_pc) c = param->change_to[GB_random(ctl)];
                            out.put(c);
                        }
                    }
                    else { // otherwise simply copy
                        out.ncat(seq+pos, count);
                    }
                    break;
            }

            pos  += count;
            rest -= count;
        }
        inset = 1-inset; // toggle
    }
    return out.release();
}

static GB_ERROR gbl_filter(GBL_command_arguments *args) {
    GBL_BEGIN_PARAMS;
    GBL_COMMON_FILTER_PARAMS;

    filter_params param;
    GBL_STRUCT_PARAM_STRING(param, exclude, "exclude=", NULp, "Exclude colums");
    GBL_STRUCT_PARAM_STRING(param, include, "include=", NULp, "Include colums");
    param.function = FP_FILTER;

    GBL_TRACE_PARAMS(args);
    GBL_END_PARAMS;

    GB_ERROR error  = NULp;
    int      inOrEx = !!param.include + !!param.exclude;

    if (inOrEx != 1)    error = "Need exactly one parameter of: 'include', 'exclude'";
    else error                = apply_filters(args, &common_param, filter_seq, &param);

    return error;
}

static GB_ERROR gbl_change_gc(GBL_command_arguments *args) {
    GBL_BEGIN_PARAMS;
    GBL_COMMON_FILTER_PARAMS;

    filter_params param;
    GBL_STRUCT_PARAM_STRING(param, exclude,   "exclude=", NULp, "Exclude colums");
    GBL_STRUCT_PARAM_STRING(param, include,   "include=", NULp, "Include colums");
    GBL_STRUCT_PARAM_INT   (param, change_pc, "change=",  0,    "percentage of changed columns (default: silently change nothing)");
    GBL_STRUCT_PARAM_STRING(param, change_to, "to=",      "GC", "change to one of this");
    param.function = FP_MODIFY;

    GBL_TRACE_PARAMS(args);
    GBL_END_PARAMS;

    GB_ERROR error  = NULp;
    int      inOrEx = !!param.include + !!param.exclude;

    if (inOrEx != 1) error = "Need exactly one parameter of: 'include', 'exclude'";
    else {
        error = apply_filters(args, &common_param, filter_seq, &param);
    }

    return error;
}

static GB_ERROR gbl_exec(GBL_command_arguments *args) {
    EXPECT_PARAMS_PASSED(args, "command[,arguments]+");

    // write inputstreams to temp file:
    GB_ERROR error = NULp;
    char *inputname;
    {
        char *filename = GB_unique_filename("arb_exec_input", "tmp");
        FILE *out      = GB_fopen_tempfile(filename, "wt", &inputname);

        if (!out) error = GB_await_error();
        else {
            for (int i = 0; i<args->input.size(); i++) {
                fprintf(out, "%s\n", args->input.get(i));
            }
            fclose(out);
        }
        free(filename);
    }

    if (!error) {
        // build shell command to execute
        char *sys;
        {
            GBS_strstruct str(1000);

            str.cat(args->get_param(0));
            for (int i = 1; i<args->param_count(); i++) {
                str.cat(" \'");
                str.cat(args->get_param(i)); // @@@ use GBK_singlequote here?
                str.put('\'');
            }
            str.cat(" <");
            str.cat(inputname);

            sys = str.release();
        }

        char *result = NULp;
        {
            FILE *in = popen(sys, "r");
            if (in) {
                GBS_strstruct str(4096);

                int i;
                while ((i=getc(in)) != EOF) { str.put(i); }
                result = str.release();
                pclose(in);
            }
            else {
                error = GBS_global_string("Cannot execute shell command '%s'", sys);
            }
        }

        if (!error) {
            gb_assert(result);
            PASS_2_OUT(args, result);
        }

        free(sys);
    }

    gb_assert(GB_is_privatefile(inputname, false));
    GB_unlink_or_warn(inputname, &error);
    free(inputname);

    return error;
}


static GBL_command_definition gbl_command_table[] = {
    { "ali_name",        gbl_ali_name },
    { "caps",            gbl_caps },
    { "change",          gbl_change_gc },
    { "checksum",        gbl_checksum },
    { "command",         gbl_command },
    { "compare",         gbl_compare },
    { "colsplit",        gbl_colsplit },
    { "icompare",        gbl_icompare },
    { "contains",        gbl_contains },
    { "icontains",       gbl_icontains },
    { "count",           gbl_count },
    { "crop",            gbl_crop },
    { "cut",             gbl_cut },
    { "dd",              gbl_dd },
    { "define",          gbl_define },
    { "diff",            gbl_diff },
    { "div",             gbl_div },
    { "fdiv",            gbl_fdiv },
    { "do",              gbl_do },
    { "drop",            gbl_drop },
    { "dropempty",       gbl_dropempty },
    { "dropzero",        gbl_dropzero },
    { "echo",            gbl_echo },
    { "equals",          gbl_equals },
    { "iequals",         gbl_iequals },
    { "escape",          gbl_escape },
    { "unescape",        gbl_unescape },
    { "eval",            gbl_eval },
    { "exec",            gbl_exec },
    { "export_sequence", gbl_export_sequence },
    { "extract_sequence", gbl_extract_sequence },
    { "extract_words",   gbl_extract_words },
    { "filter",          gbl_filter },
    { "findspec",        gbl_findspec },
    { "findacc",         gbl_findacc },
    { "findgene",        gbl_findgene },
    { "format",          gbl_format },
    { "format_sequence", gbl_format_sequence },
    { "gcgchecksum",     gbl_gcgchecksum },
    { "head",            gbl_head },
    { "inRange",         gbl_inRange },
    { "isAbove",         gbl_isAbove },
    { "isBelow",         gbl_isBelow },
    { "isEqual",         gbl_isEqual },
    { "isEmpty",         gbl_isEmpty },
    { "keep",            gbl_keep },
    { "left",            gbl_head },
    { "len",             gbl_len },
    { "lower",           gbl_lower },
    { "merge",           gbl_merge },
    { "mid",             gbl_mid },
    { "mid0",            gbl_mid0 },
    { "minus",           gbl_minus },
    { "fminus",          gbl_fminus },
    { "mult",            gbl_mult },
    { "fmult",           gbl_fmult },
    { "and",             gbl_and },
    { "or",              gbl_or },
    { "not",             gbl_not },
    { "origin_gene",     gbl_origin_gene },
    { "origin_organism", gbl_origin_organism },
    { "partof",          gbl_partof },
    { "ipartof",         gbl_ipartof },
    { "per_cent",        gbl_per_cent },
    { "fper_cent",       gbl_fper_cent },
    { "plus",            gbl_plus },
    { "fplus",           gbl_fplus },
    { "pretab",          gbl_pretab },
    { "quote",           gbl_quote },
    { "unquote",         gbl_unquote },
    { "readdb",          gbl_readdb },
    { "remove",          gbl_remove },
    { "rest",            gbl_rest },
    { "right",           gbl_tail },
    { "round",           gbl_round },
    { "select",          gbl_select },
    { "sequence",        gbl_sequence },
    { "sequence_type",   gbl_sequence_type },
    { "split",           gbl_split },
    { "srt",             gbl_srt },
    { "streams",         gbl_streams },
    { "swap",            gbl_swap },
    { "tab",             gbl_tab },
    { "tail",            gbl_tail },
    { "taxonomy",        gbl_taxonomy },
    { "toback",          gbl_toback },
    { "tofront",         gbl_tofront },
    { "trace",           gbl_trace },
    { "translate",       gbl_translate },
    { "upper",           gbl_upper },

    { NULp, NULp }
};

const GBL_command_lookup_table& ACI_get_standard_commands() {
    static GBL_command_lookup_table clt(gbl_command_table, ARRAY_ELEMS(gbl_command_table)-1);
    return clt;
}

