// ============================================================= //
//                                                               //
//   File      : group_search.cxx                                //
//   Purpose   : provides group search functionality             //
//                                                               //
//   Coded by Ralf Westram (coder@reallysoft.de) in April 2017   //
//   http://www.arb-home.de/                                     //
//                                                               //
// ============================================================= //

#include "group_search.h"

#include <arb_strarray.h>
#include <arb_progress.h>
#include <arb_sort.h>
#include <arb_strbuf.h>
#include <arb_defs.h>

#include <gb_aci_impl.h>

#include <ad_cb.h>
#include <TreeNode.h>

#include <map>
#include <stack>
#include <arb_misc.h>
#include <arb_msg_nospam.h>

using namespace std;

class GroupSearchTree;

class GroupSearchRoot FINAL_TYPE : public TreeRoot {
public:
    GroupSearchRoot() :
        TreeRoot(false)
    {}
    ~GroupSearchRoot() FINAL_OVERRIDE { predelete(); }

    DEFINE_TREE_ROOT_ACCESSORS(GroupSearchRoot, GroupSearchTree);

    // TreeRoot interface
    inline TreeNode *makeNode() const OVERRIDE;
    inline void destroyNode(TreeNode *node) const OVERRIDE;
};

class GroupSearchTree FINAL_TYPE : public TreeNode {
    mutable Lazy<int,-1>      size;    // number of leafs (=zombies+species); -1 -> need update
    mutable Lazy<int,-1>      marked;  // number of marked species; -1 -> need update
    mutable Lazy<int,-1>      zombies; // number of zombies
    mutable LazyFloat<double> aid;     // average ingroup distance

    enum UpdateWhat {
        UPDATE_SIZE,   // quick (update 'size' only)
        UPDATE_MARKED, // slow  (update all)
    };

    void update_info(UpdateWhat what) const;
    void calc_average_ingroup_distance(int group_size) const;
    double weighted_branchlength_sum(int group_size) const;

    static GBDATA *gb_species_data;

public:
    GroupSearchTree(GroupSearchRoot *root) :
        TreeNode(root)
    {}

    DEFINE_TREE_RELATIVES_ACCESSORS(GroupSearchTree);

    static void set_species_data(GBDATA *gb_species_data_) { gb_species_data = gb_species_data_; }

    // TreeNode interface
    unsigned get_leaf_count() const FINAL_OVERRIDE {
        if (size.needs_eval()) update_info(UPDATE_SIZE);
        return size;
    }
    void compute_tree() OVERRIDE {
        gs_assert(0); // should be unused
    }

    unsigned get_marked_count() const {
        if (marked.needs_eval()) update_info(UPDATE_MARKED);
        return marked;
    }
    unsigned get_zombie_count() const {
        if (zombies.needs_eval()) update_info(UPDATE_MARKED);
        return zombies;
    }

    double get_average_ingroup_distance() const {
        if (aid.needs_eval()) calc_average_ingroup_distance(get_leaf_count());
        return aid;
    }
};

GBDATA *GroupSearchTree::gb_species_data = NULp;

inline TreeNode *GroupSearchRoot::makeNode() const { return new GroupSearchTree(const_cast<GroupSearchRoot*>(this)); }
inline void GroupSearchRoot::destroyNode(TreeNode *node) const { delete DOWNCAST(GroupSearchTree*,node); }

void GroupSearchTree::update_info(UpdateWhat what) const {
    if (is_leaf()) {
        size = 1;
        if (what == UPDATE_MARKED) {
            gs_assert(gb_species_data);

            GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, name);
            if (gb_species) {
                marked  = GB_read_flag(gb_species);
                zombies = 0;
            }
            else {
                marked  = 0;
                zombies = 1;
            }
        }
    }
    else {
        switch (what) {
            case UPDATE_MARKED:
                marked  = get_leftson()->get_marked_count() + get_rightson()->get_marked_count(); // triggers lazy-update (UPDATE_MARKED)
                zombies = get_leftson()->get_zombie_count() + get_rightson()->get_zombie_count();
                // fall-through
            case UPDATE_SIZE:
                size    = get_leftson()->get_leaf_count() + get_rightson()->get_leaf_count();    // triggers lazy-update (UPDATE_SIZE)
                break;
        }
    }
}

typedef SmartPtr<GroupSearchRoot> GroupSearchRootPtr;

class SearchedTree {
    string         name;
    RefPtr<GBDATA> gb_tree;
    long           inner_nodes; // number of inner nodes in binary tree (i.e. ROOTED)
                                // (Note: corrupted trees in existing DBs sometimes contain zero nodes
                                //        (caused by older bugs?))

    GroupSearchRootPtr troot; // (optional) loaded tree
    string             load_error;

    void load_tree() {
        gs_assert(!tree_is_loaded() && !failed_to_load());
        troot              = new GroupSearchRoot;
        TreeNode *rootNode = GBT_read_tree(GB_get_root(gb_tree), get_name(), &*troot);
        gs_assert(implicated(rootNode, !rootNode->is_normal_group())); // otherwise parent caching will get confused
        if (!rootNode) {
            load_error = GB_await_error();
        }
        else {
            gs_assert(rootNode == troot->get_root_node());
        }
    }

public:
    SearchedTree(const char *name_, GBDATA *gb_main) :
        name(name_),
        gb_tree(GBT_find_tree(gb_main, name_)),
        inner_nodes(-1)
    {
        gs_assert(gb_tree);
        GBDATA *gb_nnodes     = GB_entry(gb_tree, "nnodes");
        if (gb_nnodes) inner_nodes = GB_read_int(gb_nnodes); // see GBT_size_of_tree
    }

    GBDATA *get_tree_data() { return gb_tree; }
    const char *get_name() const { return name.c_str(); }

    int get_leaf_count() const { return inner_nodes+1; }
    int get_edge_iteration_count() const { return ARB_edge::iteration_count(get_leaf_count()); }

    bool tree_is_loaded() const { return troot.isSet(); }
    bool failed_to_load() const { return !load_error.empty(); }
    const char *get_load_error() const {
        gs_assert(failed_to_load());
        return load_error.c_str();
    }
    GroupSearchRoot *get_tree_root() {
        if (!tree_is_loaded()) load_tree();
        return failed_to_load() ? NULp : &*troot;
    }
    void flush_loaded_tree() { troot.setNull(); }
};

typedef vector<SearchedTree>            SearchedTreeContainer;
typedef SearchedTreeContainer::iterator SearchedTreeIter;

const char *FoundGroup::get_name() const {
    GBDATA *gb_name = GB_search(gb_group, "group_name", GB_STRING);
    return gb_name ? GB_read_char_pntr(gb_name) : NULp;
}
int FoundGroup::get_name_length() const {
    GB_transaction ta(gb_group);
    GBDATA *gb_name = GB_search(gb_group, "group_name", GB_STRING);
    return GB_read_string_count(gb_name);
}

GBDATA *FoundGroup::get_tree_data() const {
    return GB_get_father(gb_group);
}

const char *FoundGroup::get_tree_name() const {
    GBDATA *gb_tree = get_tree_data();
    return gb_tree ? GB_read_key_pntr(gb_tree) : NULp;
}

int FoundGroup::get_tree_order() const {
    GBDATA *gb_tree = GB_get_father(gb_group);
    int     order   = -1;
    if (gb_tree) {
        GBDATA *gb_order = GB_entry(gb_tree, "order");
        if (gb_order) {
            order = GB_read_int(gb_order);
        }
    }
    return order;
}

GB_ERROR FoundGroup::delete_from_DB() {
    GB_ERROR       error = NULp;
    GB_transaction ta(gb_group);

    GBDATA *gb_gname    = GB_entry(gb_group, "group_name");
    gs_assert(gb_gname); // groups shall always have a name
    if (gb_gname) error = GB_delete(gb_gname);

    if (!error) {
        GBDATA *gb_grouped    = GB_entry(gb_group, "grouped");
        if (gb_grouped) error = GB_delete(gb_grouped);
    }

    if (!error) {
        bool    keep_node = false;
        GBQUARK qid       = GB_find_existing_quark(gb_group, "id");
        for (GBDATA *gb_child = GB_child(gb_group); gb_child && !keep_node; gb_child = GB_nextChild(gb_child)) {
            if (GB_get_quark(gb_child) != qid) {
                keep_node = true;
            }
        }
        if (!keep_node) { // no child beside "id" left -> delete node
            error = GB_delete(gb_group.pointer_ref());
        }
    }

    return error;
}

ARB_ERROR FoundGroup::rename_by_ACI(const char *acisrt, const QueriedGroups& results, int hit_idx) {
    ARB_ERROR      error;
    GB_transaction ta(gb_group);

    GBDATA *gb_gname = GB_entry(gb_group, "group_name");
    if (!gb_gname) {
        gs_assert(0); // groups shall always have a name
        error = "FATAL: unnamed group detected";
    }
    else {
        char *old_name = GB_read_string(gb_gname);
        char *new_name = GS_calc_resulting_groupname(gb_group, results, hit_idx, old_name, acisrt, error);

        if (!error && new_name[0]) { // if ACI produces empty result -> skip rename
            error = GBT_write_group_name(gb_gname, new_name, true);
        }

        free(new_name);
        free(old_name);
    }

    return error;
}

inline bool group_is_folded(GBDATA *gb_group) {
    if (!gb_group) return false;
    GBDATA *gb_grouped = GB_entry(gb_group, "grouped");
    return gb_grouped && GB_read_byte(gb_grouped) != 0;
}
inline ARB_ERROR group_set_folded(GBDATA *gb_group, bool folded) {
    gs_assert(gb_group);

    ARB_ERROR  error;
    GBDATA    *gb_grouped = GB_entry(gb_group, "grouped");

    if (!gb_grouped && folded) {
        gb_grouped = GB_create(gb_group, "grouped", GB_BYTE);
        if (!gb_grouped) error = GB_await_error();
    }
    if (gb_grouped) {
        gs_assert(!error);
        error = GB_write_byte(gb_grouped, folded);
    }
#if defined(ASSERTION_USED)
    else gs_assert(!folded);
#endif
    return error;
}

bool FoundGroup::overlap_is_folded() const {
    return group_is_folded(get_overlap_group());
}
bool FoundGroup::is_folded() const {
    return group_is_folded(gb_group);
}

ARB_ERROR FoundGroup::set_folded(bool folded) {
    return group_set_folded(gb_group, folded);
}
ARB_ERROR FoundGroup::set_overlap_folded(bool folded) {
    return group_set_folded(get_overlap_group(), folded);
}

ARB_ERROR FoundGroup::change_folding(GroupFoldingMode mode) {
    GB_transaction ta(gb_group);

    ARB_ERROR error;

    bool was_folded         = is_folded();
    bool knows_overlap      = knows_details(); // may be false when called by fold_found_groups(); acceptable
    bool overlap_was_folded = knows_overlap && overlap_is_folded();
    bool want_folded        = was_folded;

    switch (mode) {
        case GFM_TOGGLE:   want_folded = !(was_folded || overlap_was_folded); break;
        case GFM_COLLAPSE: want_folded = true; break;
        case GFM_EXPAND:   want_folded = false; break;
        default: error = "invalid collapse mode"; gs_assert(0); break;
    }

    if (!error && want_folded != was_folded) {
        error = set_folded(want_folded);
    }
    if (!error && want_folded != overlap_was_folded && knows_overlap && gb_overlap_group) {
        error = set_overlap_folded(want_folded);
    }

    return error;
}

void ColumnWidths::track(int wName, int wReason, int nesting, int size, int marked, int clusID, double aid, bool keeled) {
    seen_keeled = seen_keeled || keeled;

    // track max. width:
    name   = std::max(name, wName);
    reason = std::max(reason, wReason);

    // track max. value:
    max_nesting    = std::max(max_nesting, nesting);
    max_size       = std::max(max_size, size);
    max_marked     = std::max(max_marked, marked);
    max_marked_pc  = std::max(max_marked_pc, percent(marked, size));
    max_cluster_id = std::max(max_cluster_id, clusID);
    max_aid        = std::max(max_aid, int(aid));
}
void FoundGroup::track_max_widths(ColumnWidths& widths) const {
    gs_assert(knows_details());
    widths.track(get_name_length(),
                 get_hit_reason().length(),
                 nesting,
                 size,
                 marked,
                 clusterID,
                 aid,
                 keeled);
}

// ---------------------
//      ParentCache

class ParentCache : virtual Noncopyable {
    typedef map<GBDATA*,GBDATA*> Cache;
    Cache cache;

public:
    void defineParentOf(GBDATA *gb_child_group, GBDATA *gb_parent_group) {
        // gb_parent_group may be NULp
        gs_assert(gb_child_group);
        cache[gb_child_group] = gb_parent_group;
    }
    GBDATA *lookupParent(GBDATA *gb_child_group) const {
        Cache::const_iterator  found  = cache.find(gb_child_group);
        return found == cache.end() ? NULp : found->second;
    }

    void fix_deleted_groups(const GBDATAset& deleted_groups) {
        ParentCache translate; // translation table: oldDelParent -> newExistingParent (or NULp at top-level)
        for (GBDATAset::const_iterator del = deleted_groups.begin(); del != deleted_groups.end(); ++del) {
            GBDATA *gb_remaining_father = lookupParent(*del);
            if (gb_remaining_father) { // otherwise 'del' point to sth unkown (see comment in GroupSearchCommon)
                while (gb_remaining_father) {
                    if (deleted_groups.find(gb_remaining_father) == deleted_groups.end()) {
                        break; // not deleted -> use as replacement
                    }
                    gb_remaining_father = lookupParent(gb_remaining_father);
                }
                translate.defineParentOf(*del, gb_remaining_father);
            }
        }

        // erase deleted nodes from cache
        for (GBDATAset::const_iterator del = deleted_groups.begin(); del != deleted_groups.end(); ++del) {
            cache.erase(*del);
        }

        // translate remaining entries
        for (Cache::iterator c = cache.begin(); c != cache.end(); ++c) {
            GBDATA *gb_child  = c->first;
            GBDATA *gb_parent = c->second;
            if (deleted_groups.find(gb_parent) != deleted_groups.end()) {
                defineParentOf(gb_child, translate.lookupParent(gb_parent));
            }
        }
    }
};

// ---------------------------
//      GroupSearchCommon

#define TRIGGER_UPDATE_GROUP_RESULTS "/tmp/trigger/group_result_update"

class GroupSearchCommon : virtual Noncopyable {
    // controls and maintains validity of existing group-search-results

    typedef set<GroupSearch*> GroupSearchSet;

    GroupSearchSet searches; // all existing searches (normally only one)

    bool    cbs_installed;
    GBDATA *gb_trigger; // TRIGGER_UPDATE_GROUP_RESULTS (triggers ONCE for multiple DB changes)

    // The following two sets may also contain "node" entries from
    // completely different parts of the DB -> do not make assumptions!
    GBDATAset deleted_groups;  // entries are "deleted", i.e. access is invalid! Only comparing pointers is defined!
    GBDATAset modified_groups;

    ParentCache pcache;

    void add_callbacks(GBDATA *gb_main);
    void remove_callbacks(GBDATA *gb_main);

    void trigger_group_search_update() { GB_touch(gb_trigger); }

public:
    GroupSearchCommon() :
        cbs_installed(false),
        gb_trigger(NULp)
    {}
    ~GroupSearchCommon() {
        gs_assert(!cbs_installed);
    }

    ParentCache& get_parent_cache() { return pcache; }

    void notify_deleted(GBDATA *gb_node)  { deleted_groups.insert(gb_node);  trigger_group_search_update(); }
    void notify_modified(GBDATA *gb_node) { modified_groups.insert(gb_node); trigger_group_search_update(); }

    bool has_been_deleted(GBDATA *gb_node) { return deleted_groups.find(gb_node) != deleted_groups.end(); }
    bool has_been_modified(GBDATA *gb_node) { return modified_groups.find(gb_node) != modified_groups.end(); }

    void add(GroupSearch *gs) {
        if (empty()) {
            GBDATA *gb_main = gs->get_gb_main();
            add_callbacks(gb_main);
        }
        searches.insert(gs);
    }
    void remove(GroupSearch *gs) {
        searches.erase(gs);
        if (empty()) {
            GBDATA *gb_main = gs->get_gb_main();
            remove_callbacks(gb_main);
        }
    }
    bool empty() const { return searches.empty(); }

    void clear_notifications() {
        deleted_groups.clear();
        modified_groups.clear();
    }
    bool has_notifications() {
        return !(deleted_groups.empty() && modified_groups.empty());
    }

    void refresh_all_results() {
        if (has_notifications()) {
            pcache.fix_deleted_groups(deleted_groups);
            for (GroupSearchSet::iterator gs = searches.begin(); gs != searches.end(); ++gs) {
                GroupSearch *gr_search = *gs;
                gr_search->refresh_results_after_DBchanges();
            }
            clear_notifications();
        }
    }
};

static void tree_node_deleted_cb(GBDATA *gb_node, GroupSearchCommon *common, GB_CB_TYPE cbtype) {
    bool mark_as_deleted = cbtype == GB_CB_DELETE;

    if (!mark_as_deleted) {
        if (!GB_entry(gb_node, "group_name")) { // if group_name disappeared
            mark_as_deleted = true;
        }
    }

    if (mark_as_deleted) {
        common->notify_deleted(gb_node);
    }
    else {
        common->notify_modified(gb_node);
    }
}
static void group_name_changed_cb(GBDATA *gb_group_name, GroupSearchCommon *common) {
    GBDATA *gb_node = GB_get_father(gb_group_name);
    if (gb_node) {
        common->notify_modified(gb_node);
    }
}
static void result_update_cb(GBDATA*, GroupSearchCommon *common) {
    // is called once after DB changes that might affect validity of group-search-results
    common->refresh_all_results();
}

void GroupSearchCommon::add_callbacks(GBDATA *gb_main) {
    gs_assert(!cbs_installed);

    GB_transaction ta(gb_main);
    gb_trigger = GB_search(gb_main, TRIGGER_UPDATE_GROUP_RESULTS, GB_INT);

    GB_ERROR error       = GB_add_hierarchy_callback(gb_main, "node",            GB_CB_CHANGED_OR_DELETED, makeDatabaseCallback(tree_node_deleted_cb, this));
    if (!error)    error = GB_add_hierarchy_callback(gb_main, "node/group_name", GB_CB_CHANGED,            makeDatabaseCallback(group_name_changed_cb, this));
    if (!error)    error = GB_add_callback(gb_trigger, GB_CB_CHANGED, makeDatabaseCallback(result_update_cb, this));

    if (error) GBT_message(gb_main, GBS_global_string("Failed to bind callback (Reason: %s)", error));
    else cbs_installed = true;
}

void GroupSearchCommon::remove_callbacks(GBDATA *gb_main) {
    if (cbs_installed) {
        GB_transaction ta(gb_main);
        GB_ERROR       error = GB_remove_hierarchy_callback(gb_main, "node",            GB_CB_CHANGED_OR_DELETED, makeDatabaseCallback(tree_node_deleted_cb, this));
        if (!error)    error = GB_remove_hierarchy_callback(gb_main, "node/group_name", GB_CB_CHANGED,            makeDatabaseCallback(group_name_changed_cb, this));
        GB_remove_callback(gb_trigger, GB_CB_CHANGED, makeDatabaseCallback(result_update_cb, this));

        if (error) GBT_message(gb_main, GBS_global_string("Failed to remove callback (Reason: %s)", error));
        else cbs_installed = false;
    }
}

// ---------------------
//      GroupSearch

GroupSearchCommon *GroupSearch::common = NULp;

GroupSearch::GroupSearch(GBDATA *gb_main_, const GroupSearchCallback& redisplay_results_cb) :
    gb_main(gb_main_),
    redisplay_cb(redisplay_results_cb),
    sortedByOrder(false)
{
    if (!common) common = new GroupSearchCommon;
    common->add(this);
}

GroupSearch::~GroupSearch() {
    common->remove(this);
    if (common->empty()) {
        delete common;
        common = NULp;
    }
}

static void collect_searched_trees(GBDATA *gb_main, const TreeNameSet& trees_to_search, SearchedTreeContainer& searched_tree) {
    ConstStrArray tree_names;
    GBT_get_tree_names(tree_names, gb_main, false);

    {
        bool search_all = trees_to_search.empty();
        for (int t = 0; tree_names[t]; ++t) {
            if (search_all || trees_to_search.find(tree_names[t]) != trees_to_search.end()) {
                searched_tree.push_back(SearchedTree(tree_names[t], gb_main));
            }
        }
    }
}

class Candidate : public FoundGroup {
    // candidate for a search result
    // - able to retrieve values (have tree to examine)
    RefPtr<GroupSearchTree> node;

public:
    Candidate(const FoundGroup& group_, GroupSearchTree *node_) :
        FoundGroup(group_),
        node(node_)
    {}
    Candidate(GBDATA *gb_group_, GroupSearchTree *node_) :
        FoundGroup(gb_group_),
        node(node_)
    {}

    FoundGroup& get_group() { return *this; }
    const FoundGroup& get_group() const { return *this; }

    GroupSearchTree *get_clade() { // return node where clade is shown (differs from get_node for keeled groups)
        TreeNode *keeld = node->keelTarget();
        return keeld ? DOWNCAST(GroupSearchTree*, keeld) : &*node;
    }
    const GroupSearchTree *get_clade() const {
        return const_cast<Candidate*>(this)->get_clade();
    }

    int get_keeledStateInfo() const { return node->keeledStateInfo(); }

    void inform_group(const GroupSearch& group_search, const string& hitReason) {
        // retrieve/store all information needed later (e.g. for sorting):
        hit_reason = hitReason;

        GroupSearchTree *clade = get_clade();

        if (nesting.needs_eval()) nesting = group_search.calc_nesting_level(get_pointer());
        if (size.needs_eval())    size    = clade->get_leaf_count();
        if (marked.needs_eval())  marked  = clade->get_marked_count();
        if (aid.needs_eval())     aid     = clade->get_average_ingroup_distance();

        if (keeled.needs_eval())  {
            keeled = get_keeledStateInfo();

            // set info needed for clade-overlap
            if (keeled) {
                if (!clade->is_leaf() && clade->is_normal_group()) { // got overlap
                    gb_overlap_group = clade->gb_node;
                    gs_assert(gb_overlap_group);
                }
            }
            else {
                if (node->is_keeled_group()) { // got overlap
                    gb_overlap_group = node->father->gb_node;
                    gs_assert(gb_overlap_group);
                }
            }

        }

        gs_assert(knows_details());
    }
};

class TargetGroup: public QueryTarget, virtual Noncopyable {
    // wrapper to use Candidate as QueryTarget
    SmartPtr<Candidate> cand;

public:
    TargetGroup(GBDATA *gb_main_, const char *treename_) :
        QueryTarget(gb_main_, treename_)
    {}
    ~TargetGroup() OVERRIDE {}

    void aimTo(const Candidate& c) { cand = new Candidate(c); }
    void unAim() { cand.setNull(); }

    const FoundGroup& get_group() const { gs_assert(cand.isSet()); return cand->get_group(); }
    const GroupSearchTree *get_clade() const { gs_assert(cand.isSet() && cand->get_clade()); return cand->get_clade(); }

    const char *get_group_name() const { return get_group().get_name(); }
    unsigned get_group_size() const { return get_clade()->get_leaf_count(); }
    unsigned get_marked_count() const { return get_clade()->get_marked_count(); }
    unsigned get_zombie_count() const { return get_clade()->get_zombie_count(); }
    double get_average_ingroup_distance() const { return get_clade()->get_average_ingroup_distance(); }
    int get_keeledStateInfo() const { gs_assert(cand.isSet()); return cand->get_keeledStateInfo(); }

    // virtual QueryTarget interface:
    GBDATA *get_ACI_item() const { return get_group().get_pointer(); }
};

typedef list<Candidate> CandidateList;

#if defined(ASSERTION_USED)
inline bool isCorrectParent(TreeNode *node, GBDATA *gb_group, GBDATA *gb_parent_group) {
    /*! check correctness of parent (caching)
     * @param node            the TreeNode where clade is shown in tree
     * @param gb_group        the group data related to node (at node for normal groups; at parent-node for keeled groups)
     * @param gb_parent_group the parent group data (may be NULp)
     * @return true if gb_parent_group is the correct parent
     */

    gs_assert(node && gb_group);

    TreeNode *pnode = node->find_parent_with_groupInfo(true);
    if (pnode) {
        if (node->gb_node == gb_group) { // = node is not keeled
            gs_assert(node->is_normal_group());
            return pnode->gb_node == gb_parent_group;
        }

        gs_assert(node->is_keeled_group());     // node is keeled
        gs_assert(pnode->keelTarget() == node); // pnode is node storing that keeled node
        gs_assert(pnode->gb_node == gb_group);  // groupdata is attached at pnode

        TreeNode *ppnode = pnode->find_parent_with_groupInfo(true); // continue with next parent
        if (ppnode) {
            return ppnode->gb_node == gb_parent_group;
        }
    }
#if defined(ASSERTION_USED)
    else {
        gs_assert(node->gb_node == gb_group);
    }
#endif

    return gb_parent_group == NULp;
}
#endif

double GroupSearchTree::weighted_branchlength_sum(int group_size) const {
    int    leafs = get_leaf_count();
    double sum   = father ? get_branchlength() * leafs * (group_size-leafs) : 0.0;

    if (!is_leaf()) {
        sum += get_leftson()->weighted_branchlength_sum(group_size);
        sum += get_rightson()->weighted_branchlength_sum(group_size);
    }

    return sum;
}

void GroupSearchTree::calc_average_ingroup_distance(int group_size) const {
    long pairs = long(group_size)*(group_size-1)/2; // warning: int-overflow with SSURef_NR99_128_SILVA_07_09_16_opt.arb

    if (pairs) {
        double wbranchsum = weighted_branchlength_sum(group_size);
        aid               = wbranchsum / pairs;

        gs_assert(aid>=0);
    }
    else {
        aid = 0;
    }
}

void GroupSearch::perform_search(GroupSearchMode mode) {
    typedef set< RefPtr<GBDATA> > ExistingHits;

    ExistingHits existing_hits;
    if (mode & GSM_FORGET_EXISTING) forget_results(); // from last search
    else {
        for (FoundGroupCIter prev = found->begin(); prev != found->end(); ++prev) {
            existing_hits.insert(prev->get_pointer());
        }
    }

    bool match_unlisted = mode&GSM_ADD;

    if (query_expr.isNull()) addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*"); // default

    if (mode&GSM_MISMATCH) {
        query_expr->negate();
    }

    GB_ERROR error = NULp;
    {
        GB_transaction        ta(gb_main);
        SearchedTreeContainer searched_tree;

        GroupSearchTree::set_species_data(GBT_get_species_data(gb_main));

        collect_searched_trees(gb_main, trees_to_search, searched_tree);

        // calc overall iteration count (for progress)
        long overall_iter_count = 0;
        for (SearchedTreeIter st = searched_tree.begin(); st != searched_tree.end(); ++st) { // LOOP_VECTORIZED[!<6.0]
            overall_iter_count += st->get_edge_iteration_count();
        }

        // iterate over all trees
        arb_progress progress("Searching groups", overall_iter_count);

        bool load_failures = false;
        for (SearchedTreeIter st = searched_tree.begin(); !error && st != searched_tree.end(); ++st) {
            GroupSearchRoot *troot = st->get_tree_root();

            TargetGroup target_group(gb_main, st->get_name());

            if (!troot) {
                GBT_message(gb_main, GBS_global_string("Tree skipped: %s", st->get_load_error()));
                progress.inc_by(st->get_edge_iteration_count());
                load_failures = true;
            }
            else {
                CandidateList candidate;
                {
                    // search candidate groups (and populate parent-group cache on-the-fly)

                    GBDATA       *gb_parent_group = NULp; // last traversed parent group
                    ParentCache&  pcache          = common->get_parent_cache();
                    ARB_edge      start           = rootEdge(troot);
                    ARB_edge      e               = start;

                    do {
                        switch (e.get_type()) {
                            case ROOT_EDGE:
                                gb_parent_group = NULp;
                                // fall-through
                            case EDGE_TO_LEAF: { // descent (store parents; perform match)
                                TreeNode *node = e.dest();
                                // [Note: order of if-tests is important, when keeled and normal group fall to same location]
                                if (node->is_keeled_group()) {
                                    TreeNode *parent = e.source();
                                    gs_assert(parent == node->get_father());

                                    GBDATA *gb_group = parent->gb_node;
                                    pcache.defineParentOf(gb_group, gb_parent_group);
                                    gs_assert(isCorrectParent(node, gb_group, gb_parent_group));
                                    gb_parent_group = gb_group;
                                }
                                if (!node->is_leaf() && node->has_group_info()) {
                                    GBDATA *gb_group = node->gb_node;

                                    if (node->is_normal_group()) {
                                        pcache.defineParentOf(gb_group, gb_parent_group);
                                        gs_assert(isCorrectParent(node, gb_group, gb_parent_group));
                                        gb_parent_group = gb_group;
                                    }

                                    ExistingHits::iterator prev_hit = existing_hits.find(gb_group);

                                    bool was_listed = prev_hit != existing_hits.end();
                                    bool test_match = !was_listed == match_unlisted;

                                    if (test_match) { // store candidates
                                        candidate.push_back(Candidate(gb_group, DOWNCAST(GroupSearchTree*, node)));
                                    }
                                }
                                break;
                            }
                            case EDGE_TO_ROOT: { // ascent (restore parents)
                                TreeNode *node = e.source();
                                // [Note: order of if-tests is important, when keeled and normal group fall to same location]
                                if (!node->is_leaf() && node->is_normal_group()) {
                                    GBDATA *gb_group = node->gb_node;
                                    gb_parent_group  = pcache.lookupParent(gb_group); // restore parent group
                                    gs_assert(isCorrectParent(node, gb_group, gb_parent_group));
                                }
                                if (node->is_keeled_group()) {
                                    TreeNode *parent = e.dest();
                                    gs_assert(parent == node->get_father());

                                    GBDATA *gb_group = parent->gb_node;
                                    gb_parent_group  = pcache.lookupParent(gb_group); // restore parent group
                                    gs_assert(isCorrectParent(node, gb_group, gb_parent_group));
                                }
                                break;
                            }
                        }

                        error = progress.inc_and_error_if_aborted();
                        e     = e.next();
                    }
                    while (e != start && !error);
                }

                // now run queries for all candidates:
                bool was_listed = !match_unlisted;
                for (CandidateList::iterator cand = candidate.begin(); !error && cand != candidate.end(); ++cand) {
                    target_group.aimTo(*cand);

                    string hit_reason;
                    if (query_expr->matches(target_group, hit_reason)) {
                        if (!was_listed) {
                            found->add_candidate(*this, *cand, hit_reason);
                        }
                    }
                    else {
                        if (was_listed) {
                            ExistingHits::iterator prev_hit = existing_hits.find(cand->get_group().get_pointer());
                            gs_assert(prev_hit != existing_hits.end()); // internal logic error
                            existing_hits.erase(prev_hit);
                        }
                    }
                }
                target_group.unAim();
                st->flush_loaded_tree();
            }
        }

        if (load_failures) {
            // remove failed trees from 'searched_tree'
            SearchedTreeContainer reduced;
            for (unsigned t = 0; t<searched_tree.size(); ++t) {
                if (!searched_tree[t].failed_to_load()) {
                    reduced.push_back(searched_tree[t]);
                }
            }
            int failed_trees = searched_tree.size()-reduced.size();
            GBT_message(gb_main, GBS_global_string("%i tree(s) failed to load (will operate on rest)", failed_trees));
            swap(reduced, searched_tree);
        }

        if (!match_unlisted && !error) { // keep only hits still listed in existing_hits
            QueriedGroups *kept = new QueriedGroups;

            for (FoundGroupCIter prev = found->begin(); prev != found->end(); ++prev) {
                if (existing_hits.find(prev->get_pointer()) != existing_hits.end()) {
                    kept->add_informed_group(*prev);
                }
            }
            found = kept;
        }
    }

    if (dups.isSet() && !error) {
        // if elements were kept from last search, they have an outdated clusterID -> reset
        for (FoundGroupIter g = found->begin(); g != found->end(); ++g) g->forget_cluster_id();

        error = clusterDuplicates();
    }

    if (error) {
        GBT_message(gb_main, error);
        found = new QueriedGroups; // clear results
    }

    sortedByOrder = false;
}

// -----------------------------------------
//      code for dupe-cluster detection

inline bool contains(const WordSet& ws, const string& w) { return ws.find(w) != ws.end(); }
inline bool contains(const WordSet& ws, const char *w) { string W(w); return contains(ws, W); }

static void string2WordSet(const char *name, WordSet& words, const char *wordSeparators, const WordSet& ignored_words) {
    char *namedup = strdup(name);

    gs_assert(wordSeparators);

    ConstStrArray w;
    GBT_splitNdestroy_string(w, namedup, wordSeparators, SPLIT_DROPEMPTY);
    for (int i = 0; w[i]; ++i) {
        if (!contains(ignored_words, w[i])) words.insert(w[i]);
    }
}
inline void string_to_lower(string& s) {
    for (string::iterator c = s.begin(); c != s.end(); ++c) {
        *c = tolower(*c);
    }
}

struct GroupInfo {       // helper class for Clusterer::calc_matches
    string            name; // groupname (lowercase if constructed with sens==GB_IGNORE_CASE)
    RefPtr<GBDATA>    tree;
    SmartPtr<WordSet> words; // single words (if groupname consists of multiple words and 'prep_wordwise' was true)

    GroupInfo(const FoundGroup& g, bool prep_wordwise, GB_CASE sens, const char *wordSeparators, const WordSet& ignored_words) :
        name(g.get_name()),
        tree(g.get_tree_data())
    {
        if (sens == GB_IGNORE_CASE) string_to_lower(name);

        if (prep_wordwise) {
            words = new WordSet;
            string2WordSet(name.c_str(), *words, wordSeparators, ignored_words);
        }
    }

    size_t get_word_count() const {
        // may return zero (if group name only contains ignored words!)
        return words.isNull() ? 1 : words->size();
    }
};
typedef vector<GroupInfo> GroupInfoVec;

class DupNameCriterion {
    DupNameCriterionType type;
    GB_CASE              sens;

    int     min_words;     // only used by DNC_WORDWISE
    WordSet ignored_words; // only used by DNC_WORDWISE

    string wordSeparators;

public:
    explicit DupNameCriterion(DupNameCriterionType exact, GB_CASE sens_) :
        type(exact),
        sens(sens_),
        min_words(1)
    {
        gs_assert(exact == DNC_WHOLENAME);
    }

    DupNameCriterion(DupNameCriterionType wordwise, GB_CASE sens_, int min_words_, const WordSet& ignored_words_, const char *wordSeparators_) :
        type(wordwise),
        sens(sens_),
        min_words(min_words_),
        wordSeparators(wordSeparators_)
    {
        gs_assert(wordwise == DNC_WORDWISE);
        gs_assert(min_words>0);

        for (WordSet::const_iterator wi = ignored_words_.begin(); wi != ignored_words_.end(); ++wi) {
            string word = *wi;
            if (sens == GB_IGNORE_CASE) string_to_lower(word);
            ignored_words.insert(word);
        }
    }

    DupNameCriterionType get_name_type() const { return type; }
    bool wordwise_name_matching() const { return get_name_type() == DNC_WORDWISE; }

    GB_CASE get_sensitivity() const { return sens; }
    const char *get_word_separators() const { return wordSeparators.c_str(); }

    const WordSet& get_ignored_words() const { return ignored_words; }

    int get_min_wanted_words() const { return min_words; }
    void set_min_wanted_words(int words) { min_words = words; }

    int name_matches_wordwise(const GroupInfo& gi1, const GroupInfo& gi2) const {
        int max_possible_word_matches = min(gi1.get_word_count(), gi2.get_word_count());
        if (max_possible_word_matches<min_words) return false;

        if (gi1.words.isNull()) {
            if (gi2.words.isNull()) {
                gs_assert(min_words<=1);
                gs_assert(!contains(ignored_words, gi1.name));
                gs_assert(!contains(ignored_words, gi2.name));
                return gi1.name.compare(gi2.name) == 0;
            }
            return name_matches_wordwise(gi2, gi1);
        }

        if (gi2.words.isNull()) {
            gs_assert(min_words<=1);
            gs_assert(!contains(ignored_words, gi2.name));
            return contains(*gi1.words, gi2.name);
        }

        int matched_words = 0;
        for (WordSet::const_iterator wi = gi1.words->begin(); wi != gi1.words->end(); ++wi) {
            if (contains(*gi2.words, *wi)) ++matched_words;
        }

        return matched_words>=min_words ? matched_words : false;
    }

    int name_matches(const GroupInfo& gi1, const GroupInfo& gi2) const {
        return type == DNC_WHOLENAME
            ? gi1.name.compare(gi2.name) == 0
            : name_matches_wordwise(gi1, gi2);
    }
};

typedef set<int>                        GroupClusterSet;
typedef GroupClusterSet::const_iterator GroupClusterCIter;

class GroupCluster {
    GroupClusterSet members;    // contains indices into Clusterer::groups
    int             num_groups; // size of Clusterer::groups

    mutable vector<uint8_t> lookup; // when non-empty: contains true for members

    inline bool valid(int i) const { return i >= 0 && i<num_groups; }
    inline bool have_lookup() const { return !lookup.empty(); }

public:
    GroupCluster(int num_of_groups)
        : num_groups(num_of_groups)
    {}
    ~GroupCluster() {}

    GroupCluster(const GroupCluster& other) : // does NOT copy lookup table
        members(other.members),
        num_groups(other.num_groups)
    {}
    DECLARE_ASSIGNMENT_OPERATOR(GroupCluster);

    void allow_lookup() const { // create lookup table -> allows to run 'contains()'
        if (!have_lookup()) {
            lookup.resize(num_groups, int(false));
            for (GroupClusterCIter ci = begin(); ci != end(); ++ci) {
                lookup[*ci] = true;
            }
            gs_assert(have_lookup());
        }
    }
    void forget_lookup() const { lookup.clear(); }

    void clear() {
        if (have_lookup()) {
            for (GroupClusterCIter ci = begin(); ci != end(); ++ci) lookup[*ci] = false;
        }
        members.clear();
    }

    void insert(int i) {
        gs_assert(valid(i));
        members.insert(i);
        if (have_lookup()) lookup[i] = true;
    }
    void erase(int i) {
        gs_assert(valid(i));
        members.erase(i);
        if (have_lookup()) lookup[i] = false;
    }

    bool contains(int i) const {
        gs_assert(valid(i));
        gs_assert(have_lookup());
        return lookup[i];
    }

    bool empty() const { return members.empty(); }
    size_t size() const { return members.size(); }

    GroupClusterCIter begin() const { return members.begin(); }
    GroupClusterCIter end() const { return members.end(); }
};


class DupCriteria : public DupNameCriterion {
    bool                 listDups; // true->list duplicate groups; false->list "unique" groups (non-duplicate groups)
    DupTreeCriterionType ttype;
    int                  minSize;  // minimum cluster size (for DLC_DIFF_TREE: minimum number of different trees per cluster)

public:
    DupCriteria(bool listDups_, const DupNameCriterion& nameCrit_, DupTreeCriterionType ttype_, int minSize_) :
        DupNameCriterion(nameCrit_),
        listDups(listDups_),
        ttype(ttype_),
        minSize(minSize_)
    {
        gs_assert(minSize>=2);
    }

    DupTreeCriterionType get_tree_type() const { return ttype; }
    bool want_unique_groups() const { return !listDups; }

    bool is_inferable() const {
        // An inferable criteria has to allow the following deduction:
        // (A == B) and (B == C) -> (A == C)
        //
        // For comparing group names,
        // - whole name comparison is an inferable criteria
        // - wordwise comparison isnt!

        // Note: comparing trees for equality is inferable,
        //       comparing trees for difference isnt.

        return !wordwise_name_matching();
    }

    bool tree_matches(const GBDATA *data1, const GBDATA *data2) const {
        bool did_match;
        switch (ttype) {
            case DLC_SAME_TREE:
                did_match = data1 == data2;
                break;

            case DLC_DIFF_TREE:
                did_match = data1 != data2;
                break;

            case DLC_ANYWHERE:
                did_match = true; // ignore tree membership
                break;
        }
        return did_match;
    }

    int min_cluster_size() const { return minSize; }
    bool big_enough(const GroupCluster& cluster) const { return !cluster.empty() && int(cluster.size())>=minSize; }
};

class SymmetricMatrixMapper : virtual Noncopyable {
    // maps matrix indices to linear indices and vv.
    //
    // For each x/y-pair of matrix indices the following assumptions are made:
    // - x!=y (i.e. never used)
    // - value(x,y)==value(y,x)

    int size; // matrix size (x and y)
    int lin_size;

    int *firstIndexOfRow;
    void init_firstIndexOfRow() {
        firstIndexOfRow[0] = 0;
        for (int y = 1; y<size; ++y) {
            firstIndexOfRow[y] = firstIndexOfRow[y-1]+(y-1);
        }
    }

public:
    SymmetricMatrixMapper(int elements) :
        size(elements),
        lin_size(size*(size-1)/2),
        firstIndexOfRow(new int[size])
    {
        gs_assert(elements>=2); // smaller is useless
        init_firstIndexOfRow();
    }
    ~SymmetricMatrixMapper() {
        delete [] firstIndexOfRow;
    }

    int linear_size() const { return lin_size; }
    int linear_index(int x, int y) const {
        if (x>y) swap(x, y);

        gs_assert(x<y); // equal indices not allowed
        gs_assert(y<size);
        gs_assert(x>=0);

        return firstIndexOfRow[y]+x;
    }

#if defined(UNIT_TESTS)
    void to_xy(int lin, int& x, int& y) const {      // Note: only used in test-code
        for (y = 1; y<size && lin>=y; ++y) lin -= y; // if needed in production code: maybe use table for speedup
        x = lin;
    }
#endif
};

class Clusterer {
    SmartPtr<QueriedGroups> groups;
    SmartPtr<DupCriteria>   criteria;
    SymmetricMatrixMapper   symmap;

    vector<uint8_t> name_matches;
    vector<bool>    tree_matches;

    vector<uint8_t> words; // stores number of words for each group (indices into 'groups'; only valid when wordwise_name_matching)

    int          next_id;   // used for next cluster
    GroupCluster delivered; // stores indices (into 'groups') of all delivered groups

    int pairIdx(int i, int j) const { return symmap.linear_index(i, j); }
    void calc_matches(GBDATA *gb_main);

    int fits_into_cluster(int idx, const GroupCluster& cluster, bool strong_fit) const {
        const int min_words    = criteria->get_min_wanted_words();
        bool      enough_words = min_words<2 || words[idx] >= min_words;

        gs_assert(min_words>0);

        int fitting = 0;
        if (enough_words && !already_delivered(idx) && !cluster.contains(idx)) {
            bool fitsAll    = true;
            bool weakFitAny = true;

            for (GroupClusterCIter ci = cluster.begin(); fitsAll && ci != cluster.end(); ++ci) {
                const int pi      = pairIdx(idx, *ci);
                bool      fitWeak = name_matches[pi] >= min_words;

                fitsAll    = fitWeak && tree_matches[pi];
                weakFitAny = weakFitAny || fitWeak;
            }

            if      (fitsAll)                   fitting = idx;
            else if (weakFitAny && !strong_fit) fitting = -idx;
        }
        return fitting;
    }

    int find_next_group_fitting_into(const GroupCluster& cluster, int behind_idx, bool strong_fit) const {
        // searches for the next group (with an index > 'behind_idx') fitting into 'cluster'.
        //
        // returns:
        // 0   = no such group found
        // >0  = index of first fitting group
        // <0  = index of candidate group (for cluster extension). not reported if 'strong_fit' is true

        gs_assert(!cluster.empty());
        gs_assert(behind_idx>=0);

        const int gcount  = groups->size();
        int       fitting = 0;

        for (int idx = behind_idx+1; idx<gcount && !fitting; ++idx) {
            fitting = fits_into_cluster(idx, cluster, strong_fit);
        }

        gs_assert(implicated(fitting>0, !cluster.contains(fitting)));
        gs_assert(implicated(strong_fit, fitting>=0));

        return fitting;
    }

    int find_next_candidate_group_fitting_into(const GroupCluster& cluster, const vector<int>& candidates, int& cand_idx, bool strong_fit) const {
        // similar to find_next_group_fitting_into(), but only considers indices listed in 'candidates' (instead of all)
        // (they can be retrieved using find_next_group_fitting_into before)
        //
        // additionally 'cand_idx' is set to the index corresponding with result

        gs_assert(!cluster.empty());
        gs_assert(cand_idx>=-1);

        const int cand_size = candidates.size();
        int       fitting   = 0;

        for (int cidx = cand_idx+1; cidx<cand_size; ++cidx) {
            int idx = candidates[cidx];

            fitting = fits_into_cluster(idx, cluster, strong_fit);
            if (fitting) {
                cand_idx = cidx;
                break;
            }
        }

        gs_assert(implicated(fitting>0, !cluster.contains(fitting)));
        gs_assert(implicated(strong_fit, fitting>=0));

        return fitting;
    }

    void extendClusterToBiggest(GroupCluster& curr, int next_idx, GroupCluster& best, arb_progress& progress_cluster, double done_low, double done_high);

public:
    Clusterer(GBDATA *gb_main, SmartPtr<QueriedGroups> groups_, SmartPtr<DupCriteria> criteria_) :
        groups(groups_),
        criteria(criteria_),
        symmap(groups->size()),
        next_id(1),
        delivered(groups->size())
    {
        calc_matches(gb_main);
    }

    int max_cluster_start_index() const { return groups->size() - criteria->min_cluster_size(); }

    void buildInferableClusterStartingWith(int start_idx, GroupCluster& cluster);
    void findBestClusterBasedOnWords(int wanted_words, GroupCluster& best, arb_progress& progress_cluster, int& first_cluster_found_from_index);

    bool already_delivered(int idx) const { return delivered.contains(idx); }
    void deliverCluster(const GroupCluster& ofCluster, QueriedGroups& toResult) {
        int this_id = next_id++;
        for (GroupClusterCIter ci = ofCluster.begin(); ci != ofCluster.end(); ++ci) {
            int idx = *ci;

            // avoid duplication of groups in result list
            gs_assert(!already_delivered(idx));
            delivered.insert(idx);

            FoundGroup& g = (*groups)[idx];
            g.set_cluster_id(this_id);
            toResult.add_informed_group(g);
        }
    }

    void find_and_deliverTo(QueriedGroups& toResult);
    void deliverRest(QueriedGroups& toResult) {
        int idx = 0;
        for (FoundGroupCIter g = groups->begin(); g != groups->end(); ++g, ++idx) {
            if (!already_delivered(idx)) {
                toResult.add_informed_group(*g);
            }
        }
    }

    int calc_max_used_words(bool ignore_delivered) {
        gs_assert(criteria->wordwise_name_matching()); // otherwise words array contains nothing

        int       maxWords = 0;
        const int maxidx   = groups->size();

        for (int idx = 0; idx<maxidx; ++idx) {
            int thisWords = words[idx];

            if (thisWords>maxWords && (ignore_delivered ? !already_delivered(idx) : true)) {
                maxWords = thisWords;
            }
        }

        return maxWords;
    }

};

void Clusterer::calc_matches(GBDATA *gb_main) {
    const int  gcount    = groups->size();
    const int  lin_range = symmap.linear_size();
    const long way_to_go = long(gcount) + lin_range;

    arb_progress progress(GBS_global_string("[pass 1/2: duplicity matrix (%s)]", GBS_readable_size(lin_range, "b")), way_to_go);

    name_matches.reserve(lin_range);
    tree_matches.reserve(lin_range);

    GroupInfoVec info;
    info.reserve(gcount);

    { // fetch info to speed up calculation below
        GB_transaction ta(gb_main);

        bool            prep_wordwise  = criteria->wordwise_name_matching();
        GB_CASE         sens           = criteria->get_sensitivity();
        const char     *wordSeparators = criteria->get_word_separators();
        const WordSet&  ignoredWords   = criteria->get_ignored_words();

        for (FoundGroupCIter g = groups->begin(); g != groups->end() && !progress.aborted(); ++g) {
            info.push_back(GroupInfo(*g, prep_wordwise, sens, wordSeparators, ignoredWords));
            if (prep_wordwise) {
                const GroupInfo& ginfo = info.back();
                words.push_back(ginfo.get_word_count());
            }
            ++progress;
        }
    }

    for (int i1 = 0; i1<gcount && !progress.aborted(); ++i1) { // calculate pairwise group matches
        for (int i2 = i1+1; i2<gcount && !progress.aborted(); ++i2) {
            const int li = symmap.linear_index(i1, i2);

            name_matches[li] = criteria->name_matches(info[i1], info[i2]);
            tree_matches[li] = criteria->tree_matches(info[i1].tree, info[i2].tree);

            ++progress;
        }
    }
}

void Clusterer::buildInferableClusterStartingWith(const int start_idx, GroupCluster& cluster) {
    gs_assert(criteria->is_inferable()); // works only for inferable compare criteria

    int          gcount = groups->size();
    arb_progress progress_build(long(gcount-start_idx-1));

    gs_assert(cluster.empty());
    gs_assert(!already_delivered(start_idx));
    cluster.insert(start_idx); // always add group at 'start_idx'

    GroupCluster weakCand(gcount); // collects non-strong, possible weak matches

    {
        int pcount   = start_idx;
        int curr_idx = start_idx;
        while (!progress_build.aborted()) {
            const int addable = find_next_group_fitting_into(cluster, curr_idx, false);
            if (!addable) break;

            if (addable>0) { // found a strong match
                cluster.insert(addable);
                curr_idx = addable;
            }
            else {
                gs_assert(addable<0); // found a weak match
                weakCand.insert(-addable);
                curr_idx = -addable;
            }

            gs_assert(curr_idx>pcount);
            progress_build.inc_by(curr_idx-pcount);
            pcount = curr_idx;
        }
    }

    if (criteria->big_enough(cluster) && !progress_build.aborted()) {
        // extent cluster (by adding groups that match weak)
        // - e.g. add groups from same tree when searching for different trees

        if (!weakCand.empty()) {
            GroupCluster toAdd(gcount);

            if (criteria->get_tree_type() == DLC_DIFF_TREE) {
                for (GroupClusterCIter w = weakCand.begin(); w != weakCand.end(); ++w) {
                    int nameFitsAll = true;
                    for (GroupClusterCIter ci = cluster.begin(); nameFitsAll && ci != cluster.end(); ++ci) {
                        int pi      = pairIdx(*w, *ci);
                        nameFitsAll = name_matches[pi];
                    }
                    if (nameFitsAll) toAdd.insert(*w);
                }
            }
            for (GroupClusterCIter a = toAdd.begin(); a != toAdd.end(); ++a) cluster.insert(*a);
        }
    }
    else { // forget if too small
        cluster.clear();
    }

    progress_build.done();

    gs_assert(contradicted(cluster.empty(), criteria->big_enough(cluster)));
}

inline unsigned long permutations(int elems) {
    return elems*elems/2-elems;
}

void Clusterer::extendClusterToBiggest(GroupCluster& curr, int next_idx, GroupCluster& best, arb_progress& progress_cluster, double done_low, double done_high) {
    // extends cluster 'curr' (using all possible combinations starting at 'next_idx' = index into 'groups')
    // stores best (=biggest) cluster in 'best'

    vector<int> candidates; // collect all possible groups
    {
        int idx = next_idx;
        while (1) {
            const int addable = find_next_group_fitting_into(curr, idx, true);
            if (!addable) break;

            candidates.push_back(addable);
            idx = addable;
        }
    }

    if ((candidates.size()+curr.size()) > best.size()) { // any chance to find bigger cluster?
        stack<int> previous;      // previously added indices (into candidates)
        int        curr_idx = -1; // last added (i.e. start with candidates[0])

        const int           del_size          = delivered.size();
        const unsigned long permutation_count = permutations(candidates.size());

        while (!progress_cluster.aborted()) {
            int addable = find_next_candidate_group_fitting_into(curr, candidates, curr_idx, true);
            gs_assert(addable>=0);
            if (addable) {
                curr.insert(addable);
                previous.push(curr_idx);
            }
            else {
                if (curr.size() > best.size() && criteria->big_enough(curr)) { // store 'curr' cluster if better
                    best = curr;

                    const unsigned long permutations_left    = permutations(candidates.size()-best.size());
                    const double        done_percent         = (permutation_count-permutations_left) / double(permutation_count);
                    const double        overall_done_percent = done_low + (done_high-done_low)*done_percent;

                    progress_cluster.inc_to_avoid_overflow(del_size + best.size() * overall_done_percent); // @@@ calculation seems to be wrong (overflows)
                }
                if (previous.empty()) break; // end iteration

                const int last_cidx = previous.top();
                const int last_add  = candidates[last_cidx];

                curr.erase(last_add);
                previous.pop();
                curr_idx = last_cidx;

                const int    rest_cand = candidates.size() - (curr_idx+1);
                const size_t poss_size = rest_cand + curr.size();
                if (poss_size<best.size()) break; // end iteration (impossible to collect enough groups to form a bigger cluster)
            }
        }

        progress_cluster.inc_to_avoid_overflow(del_size + best.size() * done_high); // @@@ calculation seems to be wrong (overflows)
    }
}

void Clusterer::findBestClusterBasedOnWords(int wanted_words, GroupCluster& best, arb_progress& progress_cluster, int& first_cluster_found_from_index) {
    gs_assert(!criteria->is_inferable()); // thorough search not required
    gs_assert(best.empty());

    {
        const int old_min_words = criteria->get_min_wanted_words();
        criteria->set_min_wanted_words(wanted_words);

        const int gcount        = groups->size();
        const int max_start_idx = gcount - criteria->min_cluster_size();

        GroupCluster curr(gcount);
        curr.allow_lookup();

        const int    extension_count    = 1+(wanted_words-1-old_min_words);
        const double done_per_extension = 1.0/extension_count;

        int first_index = 0;

        for (int start_idx = first_cluster_found_from_index; start_idx<max_start_idx && !progress_cluster.aborted(); ++start_idx) {
            if (words[start_idx]>=wanted_words && !already_delivered(start_idx)) {
                curr.clear();
                curr.insert(start_idx);

                extendClusterToBiggest(curr, start_idx, best, progress_cluster, 0.0, done_per_extension);
                if (!first_index && !best.empty()) {
                    first_cluster_found_from_index = first_index = start_idx;
                }
            }
        }

        if (wanted_words>old_min_words && !best.empty() && !progress_cluster.aborted()) { // may less words be accepted?
            // extend cluster with "weaker" matches:

            int ext_done = 1;
            for (int fewer_words = wanted_words-1; fewer_words>=old_min_words && !progress_cluster.aborted(); --fewer_words, ++ext_done) {
                criteria->set_min_wanted_words(fewer_words);

                curr = best;
                curr.allow_lookup();

                const double done_start = ext_done*done_per_extension;
                extendClusterToBiggest(curr, 0, best, progress_cluster, done_start, done_start+done_per_extension);
            }
        }

        criteria->set_min_wanted_words(old_min_words);
    }

    gs_assert(contradicted(best.empty(), criteria->big_enough(best)));
}


void Clusterer::find_and_deliverTo(QueriedGroups& toResult) {
    int          gcount = groups->size();
    GroupCluster curr(gcount);

    delivered.allow_lookup();
    curr.allow_lookup();

    if (criteria->is_inferable()) { // possible to use "fast" clustering?
        const int max_i = max_cluster_start_index();
        gs_assert(max_i>0);

        arb_progress progress_cluster("[pass 2/2: fast duplicate search]", long(max_i));
        for (int i = 0; i<max_i && !progress_cluster.aborted(); ++i) {
            if (!already_delivered(i)) {
                curr.clear();
                buildInferableClusterStartingWith(i, curr);
                if (!curr.empty()) { // found a cluster
                    deliverCluster(curr, toResult);
                }
            }
            ++progress_cluster;
        }
    }
    else { // use thorough cluster search
        int       max_words = calc_max_used_words(true);
        const int min_words = criteria->get_min_wanted_words();

        long groups_with_min_words = 0;
        for (int gidx = 0; gidx<gcount; ++gidx) { // LOOP_VECTORIZED [!<5.0]
            if (words[gidx]>=min_words) ++groups_with_min_words;
        }

        arb_progress progress_cluster("[pass 2/2: thorough duplicate search]", groups_with_min_words);

        int first_cluster_found_from_index = 0;
        while (max_words >= min_words && !progress_cluster.aborted()) {
            curr.clear();
            findBestClusterBasedOnWords(max_words, curr, progress_cluster, first_cluster_found_from_index);

            if (curr.empty()) {
                --max_words;
                first_cluster_found_from_index = 0;
            }
            else {
                deliverCluster(curr, toResult);
                progress_cluster.inc_to(delivered.size());
            }
        }
        progress_cluster.done();
    }
}

GB_ERROR GroupSearch::clusterDuplicates() {
    GB_ERROR error       = NULp;
    bool     enough_hits = found->size()>=2;

    if (enough_hits) {
        arb_progress progress("Restricting to duplicate groups", 2L);
        Clusterer    clusterer(gb_main, found, dups);

        if (clusterer.max_cluster_start_index()<0) {
            enough_hits = false; // e.g. 2 hits, but min. cluster-size==3
            progress.done();
        }
        else {
            found = new QueriedGroups;            // clear result list
            clusterer.find_and_deliverTo(*found); // detect clusters of duplicates and add them to the result list

            if (dups->want_unique_groups() && !progress.aborted()) {
                QueriedGroups *nonDupGroups = new QueriedGroups;

                clusterer.deliverRest(*nonDupGroups);
                found = nonDupGroups;
            }
        }

        if (!error) error = progress.error_if_aborted();
    }

    if (!enough_hits && !error) {
        error = GBS_global_string("Not enough hits (%zu) to find duplicates", found->size());
    }

    return error;
}

const QueriedGroups& GroupSearch::get_results() {
    if (found.isNull()) found = new QueriedGroups;
    if (!sortedByOrder) sort_results();
    return *found;
}

struct has_been_deleted {
    GroupSearchCommon *common;
    has_been_deleted(GroupSearchCommon *common_) : common(common_) {}
    bool operator()(const FoundGroup& g) { return common->has_been_deleted(g.get_pointer()); }
};
struct was_modified {
    GroupSearchCommon *common;
    was_modified(GroupSearchCommon *common_) : common(common_) {}
    bool operator()(const FoundGroup& g) { return common->has_been_modified(g.get_pointer()); }
};

bool QueriedGroups::erase_deleted(GroupSearchCommon *common) {
    FoundGroupIter first_removed = remove_if(found.begin(), found.end(), has_been_deleted(common));
    bool           erased        = first_removed != found.end();

    found.erase(first_removed, found.end());
    invalidate_widths();
    return erased;
}
bool QueriedGroups::contains_changed(GroupSearchCommon *common) const {
    FoundGroupCIter modified  = find_if(found.begin(), found.end(), was_modified(common));
    return modified          != found.end();
}

struct compare_by_criteria {
    const SortCriteria& by;
    compare_by_criteria(const SortCriteria& by_) : by(by_) {}
    bool operator()(const FoundGroup& g1, const FoundGroup& g2) const {
        int  cmp               = 0;
        bool last_was_modifier = false;
        bool reversed          = false;

        SortCriteria::const_iterator crit = by.begin();
        while ((!cmp || last_was_modifier) && crit != by.end()) {
            last_was_modifier = (*crit == GSC_REVERSE);
            switch (*crit) {
                case GSC_NONE:    gs_assert(0); break; // should not occur
                case GSC_REVERSE: reversed = !reversed; break;

                    // alphabetically:
                case GSC_NAME:     cmp = strcmp(g1.get_name(),      g2.get_name());      break;
                case GSC_TREENAME: cmp = strcmp(g1.get_tree_name(), g2.get_tree_name()); break;

                case GSC_HIT_REASON: cmp = g1.get_hit_reason().compare(g2.get_hit_reason()); break;

                    // small first:
                case GSC_TREEORDER: cmp = g1.get_tree_order() - g2.get_tree_order(); break;
                case GSC_NESTING:   cmp = g1.get_nesting()    - g2.get_nesting(); break;
                case GSC_CLUSTER:   cmp = g1.get_cluster_id() - g2.get_cluster_id(); break;
                case GSC_AID:       cmp = double_cmp(g1.get_aid(), g2.get_aid()); break;

                    // big first:
                case GSC_SIZE:      cmp = g2.get_size()       - g1.get_size(); break;
                case GSC_MARKED:    cmp = g2.get_marked()     - g1.get_marked(); break;
                case GSC_MARKED_PC: cmp = g2.get_marked_pc()  - g1.get_marked_pc(); break;
                case GSC_KEELED:    cmp = g2.get_keeled()     - g1.get_keeled(); break;
            }
            ++crit;
        }
        return reversed ? cmp>0 : cmp<0;
    }
};

void QueriedGroups::sort_by(const SortCriteria& by) {
    stable_sort(found.begin(), found.end(), compare_by_criteria(by));
    sorted_by = &by;
}

void QueriedGroups::remove_hit(size_t idx) {
    if (idx<size()) {
        FoundGroupContainer::iterator del = found.begin();
        advance(del, idx);
        found.erase(del);
        invalidate_widths();
    }
}

const ColumnWidths& QueriedGroups::get_column_widths() const {
    if (widths.isNull()) {
        widths          = new ColumnWidths;
        ColumnWidths& w = *widths;
        for (FoundGroupCIter g = begin(); g != end(); ++g) {
            g->track_max_widths(w);
        }
    }
    return *widths;
}
const char *QueriedGroups::get_group_display(const FoundGroup& g, bool show_tree_name) const {
    const ColumnWidths& width = get_column_widths(); // updates width information (if outdated)

    static GBS_strstruct display;

    display.erase();

    if (width.seen_keeled) display.put(g.get_keeled() ? KEELED_INDICATOR : ' ');
    display.nprintf(width.name+1, "%-*s", width.name, g.get_name()); // insert name as 1st column

    if (sorted_by) {
        // generate display-string depending on active SortCriteria:
        for (SortCriteria::const_iterator sc = sorted_by->begin(); sc != sorted_by->end(); ++sc) {
            switch (*sc) {
                case GSC_NONE: gs_assert(0); break; // invalid

                case GSC_TREENAME:  // ignored (either already shown or only have one tree)
                case GSC_TREEORDER: // dito
                case GSC_REVERSE:
                case GSC_NAME:
                    break;          // ignored for display

                case GSC_HIT_REASON:
                    display.nprintf(width.reason+1, " %-*s", width.reason, g.get_hit_reason().c_str());
                    break;

                case GSC_NESTING: {
                    int nesting_width = ColumnWidths::max2width(width.max_nesting);
                    display.nprintf(nesting_width+1, " %*i", nesting_width, g.get_nesting());
                    break;
                }
                case GSC_SIZE: {
                    int size_width = ColumnWidths::max2width(width.max_size);
                    display.nprintf(size_width+1, " %*i", size_width, g.get_size());
                    break;
                }
                case GSC_MARKED: {
                    int marked_width = ColumnWidths::max2width(width.max_marked);
                    display.nprintf(marked_width+1, " %*i", marked_width, g.get_marked());
                    break;
                }
                case GSC_MARKED_PC: {
                    int marked_width = ColumnWidths::max2width(width.max_marked_pc);
                    display.nprintf(marked_width+2, " %*i%%", marked_width, g.get_marked_pc());
                    break;
                }
                case GSC_CLUSTER: {
                    int cluster_width = ColumnWidths::max2width(width.max_cluster_id);
                    display.nprintf(cluster_width+2, " %*ic", cluster_width, g.get_cluster_id());
                    break;
                }
                case GSC_AID: {
                    int aid_width = ColumnWidths::max2width(width.max_aid);
                    display.nprintf(aid_width+6, " %*.4f", aid_width, g.get_aid());
                    break;
                }
                case GSC_KEELED: {
                    display.nprintf(2, " %i", g.get_keeled());
                    break;
                }
            }
        }
    }

    if (show_tree_name) {
        display.put(' ');
        display.cat(g.get_tree_name());
    }

    return display.get_data();
}

void QueriedGroups::add_candidate(const GroupSearch& group_search, Candidate& cand, const std::string& hit_reason) {
    cand.inform_group(group_search, hit_reason);
    add_informed_group(cand.get_group());
}


void GroupSearch::refresh_results_after_DBchanges() {
    if (!found.isNull() && !found->empty()) {
        bool erased  = found->erase_deleted(common);
        bool changed = false;
        if (!erased) {
            changed = found->contains_changed(common);
        }
        if (erased || changed) {
            redisplay_cb(this);
        }
    }
}

void GroupSearch::addSortCriterion(GroupSortCriterion gsc) {
    /*! add new primary sort criterion
     * previously added (different) criteria remain active, but become secondary, tertiary, ...
     */

    if (gsc == GSC_NONE) {
        forgetSortCriteria();
    }
    else {
        bool add = true;

        if (!order.empty() && order.front() == gsc) {
            add = false;
            if (gsc == GSC_REVERSE) {
                order.pop_front(); // eliminate duplicate reverse
                sortedByOrder = false;
            }
        }

        if (add) {
            if (gsc != GSC_REVERSE) {
                // remove duplicated search criterion from order
                SortCriteria::iterator dup = find(order.begin(), order.end(), gsc);
                if (dup != order.end()) {
                    SortCriteria::iterator pre = dup;
                    do --pre; while (pre != order.end() && *pre == GSC_REVERSE);

                    if (pre == order.end()) pre = order.begin(); // erase from start
                    else ++pre;                                  // step back to 1st GSC_REVERSE

                    ++dup; // point behind duplicate
                    order.erase(pre,dup);
                }
            }

            order.push_front(gsc);
            sortedByOrder = false;
        }
    }
}

void GroupSearch::sort_results() {
    if (!order.empty()) {
        GB_transaction ta(gb_main);
        found->sort_by(order);
        sortedByOrder = true;
    }
}

void GroupSearch::setDupCriteria(bool listDups, DupNameCriterionType ntype, GB_CASE sens, DupTreeCriterionType ttype, int min_cluster_size) {
    gs_assert(ntype != DNC_WORDWISE); // use flavor below
    dups = new DupCriteria(listDups, DupNameCriterion(ntype, sens), ttype, min_cluster_size);
}
void GroupSearch::setDupCriteria(bool listDups, DupNameCriterionType ntype, GB_CASE sens, int min_words, const WordSet& ignored_words, const char *wordSeparators, DupTreeCriterionType ttype, int min_cluster_size) {
    gs_assert(ntype == DNC_WORDWISE); // use flavor above
    dups = new DupCriteria(listDups, DupNameCriterion(ntype, sens, min_words, ignored_words, wordSeparators), ttype, min_cluster_size);
}
void GroupSearch::setDupCriteria(bool listDups, DupNameCriterionType ntype, GB_CASE sens, int min_words, const char *ignored_words, const char *wordSeparators, DupTreeCriterionType ttype, int min_cluster_size) {
    WordSet ignoredWordsSet;
    WordSet none; // no words ignored in ignoredWordsSet
    string2WordSet(ignored_words, ignoredWordsSet, wordSeparators, none);
    setDupCriteria(listDups, ntype, sens, min_words, ignoredWordsSet, wordSeparators, ttype, min_cluster_size);
}


void GroupSearch::forgetDupCriteria() {
    dups.setNull();
}

GB_ERROR GroupSearch::delete_group(size_t idx) {
    if (idx<found->size()) return (*found)[idx].delete_from_DB();
    return "index out-of-bounds";
}

GB_ERROR GroupSearch::delete_found_groups() {
    GB_ERROR error = NULp; // @@@ use ARB_ERROR instead (whole module + callers)
    if (has_results()) {
        GB_transaction ta(gb_main);

        for (FoundGroupIter group = found->begin(); !error && group != found->end(); ++group) {
            error = group->delete_from_DB();
        }
        error = ta.close(error);
    }
    return error;
}

// ------------------------------------------
//      ACI extension for group renaming

using namespace GBL_IMPL;

struct GroupRename_callenv : public GBL_call_env {
    const QueriedGroups& queried;
    int                  hit_idx;

    GroupRename_callenv(const QueriedGroups& queried_, int hit_idx_, const GBL_env& env_) :
        GBL_call_env(NULp, env_),
        queried(queried_),
        hit_idx(hit_idx_)
    {}

    bool legal_hit_index() const { return hit_idx>=0 && unsigned(hit_idx)<queried.size(); }

    const FoundGroup *get_hit_group() const {
        if (legal_hit_index()) return &queried[hit_idx];
        return NULp;
    }

    int get_dupidx(GB_ERROR& error) const {
        const FoundGroup *group = get_hit_group();
        if (!group) {
            error = "no hit";
            return -1;
        }

        int cluster = group->get_cluster_id();
        if (cluster == 0) {
            error = "no duplicate";
            return -1;
        }

        int dupidx = 0;

        for (FoundGroupCIter g = queried.begin(); g != queried.end(); ++g) {
            if (&*g == group) return dupidx;
            if (g->get_cluster_id() == cluster) dupidx++;
        }

        gs_assert(0); // something went wrong
        error = "unknown error";
        return -1;
    }

};

inline const GroupRename_callenv& custom_gr_env(GBL_command_arguments *args) {
    return DOWNCAST_REFERENCE(const GroupRename_callenv, args->get_callEnv());
}

static GB_ERROR grl_hitidx(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const GroupRename_callenv& callEnv = custom_gr_env(args);
        if (callEnv.legal_hit_index()) {
            FORMAT_2_OUT(args, "%i", info2bio(callEnv.hit_idx));
        }
        else {
            error = "no hit";
        }
    }

    return error;
}

static GB_ERROR grl_dupidx(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const GroupRename_callenv& callEnv = custom_gr_env(args);

        const int dupidx = callEnv.get_dupidx(error);
        if (!error) {
            gs_assert(dupidx>=0);
            FORMAT_2_OUT(args, "%i", info2bio(dupidx));
        }
    }

    return error;
}


static GB_ERROR grl_hitcount(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const GroupRename_callenv& callEnv = custom_gr_env(args);
        FORMAT_2_OUT(args, "%zu", callEnv.queried.size());
    }
    return error;
}
static GB_ERROR grl_groupsize(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const FoundGroup *hit = custom_gr_env(args).get_hit_group();
        if (hit) {
            FORMAT_2_OUT(args, "%i", hit->get_size());
        }
        else {
            error = "no hit";
        }
    }
    return error;
}
static GB_ERROR grl_markedingroup(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const FoundGroup *hit = custom_gr_env(args).get_hit_group();
        if (hit) {
            FORMAT_2_OUT(args, "%i", hit->get_marked());
        }
        else {
            error = "no hit";
        }
    }
    return error;
}
static GB_ERROR grl_aid(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const FoundGroup *hit = custom_gr_env(args).get_hit_group();
        if (hit) {
            FORMAT_2_OUT(args, "%f", hit->get_aid());
        }
        else {
            error = "no hit";
        }
    }
    return error;
}
static GB_ERROR grl_nesting(GBL_command_arguments *args) {
    COMMAND_DROPS_INPUT_STREAMS(args);
    GB_ERROR error = check_no_parameter(args);
    if (!error) {
        const FoundGroup *hit = custom_gr_env(args).get_hit_group();
        if (hit) {
            FORMAT_2_OUT(args, "%i", hit->get_nesting());
        }
        else {
            error = "no hit";
        }
    }
    return error;
}


static GBL_command_definition groupRename_command_table[] = {
    { "hitidx",        grl_hitidx },
    { "dupidx",        grl_dupidx },
    { "hitcount",      grl_hitcount },
    { "groupSize",     grl_groupsize },
    { "markedInGroup", grl_markedingroup },
    { "aid",           grl_aid },
    { "nesting",       grl_nesting },

    { NULp, NULp }
};

static const GBL_command_lookup_table& get_GroupRename_customized_ACI_commands() {
    static GBL_custom_command_lookup_table clt(groupRename_command_table,
                                               ARRAY_ELEMS(groupRename_command_table)-1,
                                               ACI_get_standard_commands());
    return clt;
}

char *GS_calc_resulting_groupname(GBDATA *gb_main, const QueriedGroups& queried, int hit_idx, const char *input_name, const char *acisrt, ARB_ERROR& error) {
    char *result = NULp;
    if (!input_name || !input_name[0]) {
        error = "Error: empty input groupname";
    }
    else {
        GB_transaction    ta(gb_main);
        bool              know_hit = hit_idx>=0 && unsigned(hit_idx)<queried.size();
        const FoundGroup *hit      = know_hit ? &queried[hit_idx] : NULp;

        GBL_env             env(gb_main, hit ? hit->get_tree_name() : NULp, get_GroupRename_customized_ACI_commands());
        GroupRename_callenv callEnv(queried, hit_idx, env);

        result = GB_command_interpreter_in_env(input_name, acisrt, callEnv);
        if (!result) {
            error = GBS_global_string("Error: %s", GB_await_error());
        }
        else {
            freeset(result, GBS_trim(result)); // trim whitespace
        }
    }
    return result;
}

ARB_ERROR GroupSearch::rename_group(size_t idx, const char *acisrt) {
    if (idx<found->size()) {
        return (*found)[idx].rename_by_ACI(acisrt, *found, idx);
    }
    return "index out-of-bounds";
}

ARB_ERROR GroupSearch::rename_found_groups(const char *acisrt) {
    ARB_ERROR error;
    if (has_results()) {
        GB_transaction ta(gb_main);

        MessageSpamFilter suppress("problematic group names");

        int idx = 0;
        for (FoundGroupIter group = found->begin(); !error && group != found->end(); ++group, ++idx) {
            error = group->rename_by_ACI(acisrt, *found, idx);
        }
        error = ta.close(error);
    }
    return error;
}

ARB_ERROR GroupSearch::fold_group(size_t idx, GroupFoldingMode mode) {
    if (idx<found->size()) {
        return (*found)[idx].change_folding(mode);
    }
    return "index out-of-bounds";
}

GBDATA *GroupSearch::get_parent_group(GBDATA *gb_group) const {
    // works for groups which are members of one of the searched tree
    return common->get_parent_cache().lookupParent(gb_group);
}

int GroupSearch::calc_nesting_level(GBDATA *gb_group) const {
    int nesting = 0;
    while (gb_group) {
        gb_group = get_parent_group(gb_group);
        if (gb_group) ++nesting;
    }
    return nesting;
}


ARB_ERROR GroupSearch::fold_found_groups(GroupFoldingMode mode) {
    ARB_ERROR      error;
    GB_transaction ta(gb_main);

    GBDATAset modifiedTrees;

    // create a set of affected groups
    GBDATAset targetGroups;
    for (FoundGroupCIter g = found->begin(); g != found->end(); ++g) {
        GBDATA *gb_group = g->get_pointer();
        targetGroups.insert(gb_group);
    }

    if (mode & GFM_RECURSE) { // also operate on parents
        GBDATAset testParentsOf = targetGroups;
        if (mode & GFM_PARENTS_ONLY) targetGroups.clear();
        while (!testParentsOf.empty()) { // redo until no more parents get added
            GBDATAset addedParents;
            for (GBDATAset::iterator t = testParentsOf.begin(); t != testParentsOf.end(); ++t) {
                GBDATA *gb_parent_group = get_parent_group(*t);
                if (gb_parent_group && targetGroups.find(gb_parent_group) == targetGroups.end()) {
                    addedParents.insert(gb_parent_group);
                    targetGroups.insert(gb_parent_group);
                }
            }
            testParentsOf = addedParents;
        }
    }

    GroupFoldingMode basicMode = GroupFoldingMode(mode & (GFM_EXPAND|GFM_TOGGLE));
    for (GBDATAset::iterator n = targetGroups.begin(); n != targetGroups.end() && !error; ++n) {
        error = FoundGroup(*n).change_folding(basicMode);
    }

    if (!error && (mode & GFM_COLLAPSE_REST)) { // collapse everything else
        SearchedTreeContainer searched_tree;
        collect_searched_trees(gb_main, trees_to_search, searched_tree);

        for (SearchedTreeIter t = searched_tree.begin(); t != searched_tree.end() && !error; ++t) {
            GBDATA *gb_tree_data = t->get_tree_data();
            for (GBDATA *gb_node = GB_entry(gb_tree_data, "node"); gb_node && !error; gb_node = GB_nextEntry(gb_node)) {
                GBDATA *gb_name = GB_entry(gb_node, "group_name");
                if (gb_name) { // named node (aka group)
                    if (targetGroups.find(gb_node) == targetGroups.end()) { // not already handled before
                        error = FoundGroup(gb_node).change_folding(GFM_COLLAPSE);
                    }
                }
            }
        }
    }

    return ta.close(error);
}

ARB_ERROR GroupSearch::collectSpecies(const QueriedGroups& groups, CollectMode cmode, SpeciesNames& species) {
    SearchedTreeContainer searched_tree;
    collect_searched_trees(gb_main, trees_to_search, searched_tree);

    ARB_ERROR error;
    for (SearchedTreeIter t = searched_tree.begin(); t != searched_tree.end() && !error; ++t) {
        GBDATAset groupsFoundInTree;
        for (FoundGroupCIter g = groups.begin(); g != groups.end(); ++g) {
            if (t->get_tree_data() == g->get_tree_data()) {
                groupsFoundInTree.insert(g->get_pointer());
            }
        }

        if (!groupsFoundInTree.empty()) {
            // iterate over tree and insert or intersect species from each group with set
            GroupSearchRoot *troot = t->get_tree_root();

            ARB_edge start = rootEdge(troot);
            ARB_edge e     = start;
            do {
                if (e.is_inner_edge() && e.get_type() != EDGE_TO_ROOT) {
                    TreeNode *node = e.dest();
                    if (node->is_normal_group()) {
                        if (groupsFoundInTree.find(node->gb_node) != groupsFoundInTree.end()) {
                            // iterate all leafs in subtree and store in 'speciesInGroup'
                            SpeciesNames speciesInGroup;
                            ARB_edge     sub  = e;
                            ARB_edge     stop = sub.inverse();

                            while (sub != stop) {
                                if (sub.is_edge_to_leaf()) {
                                    TreeNode *leaf = sub.dest();
                                    if (leaf->name) speciesInGroup.insert(leaf->name);
                                }
                                sub = sub.next();
                            }

                            if (species.empty()) { // simply add first group
                                gs_assert(!speciesInGroup.empty()); // tree broken?
                                species = speciesInGroup;
                            }
                            else { // intersect or unite two groups
                                SpeciesNames combined;
                                if (cmode == INTERSECT) {
                                    set_intersection(
                                        speciesInGroup.begin(), speciesInGroup.end(),
                                        species.begin(), species.end(),
                                        // combined.begin()
                                        inserter(combined, combined.begin())
                                        );

                                    if (combined.empty()) {
                                        error = "No species is member of ALL groups";
                                    }
                                }
                                else {
                                    gs_assert(cmode == UNITE);
                                    set_union(
                                        speciesInGroup.begin(), speciesInGroup.end(),
                                        species.begin(), species.end(),
                                        // combined.begin()
                                        inserter(combined, combined.begin())
                                        );
                                }
                                species = combined;
                            }
                        }
                    }
                }
                e = e.next();
            }
            while (e != start && !error);
        }
    }
    return error;
}

static void set_marks_of(const SpeciesNames& targetSpecies, GBDATA *gb_main, GroupMarkMode mode) {
    if (!targetSpecies.empty()) {
        size_t found    = 0;
        for (GBDATA *gb_species = GBT_first_species(gb_main);
             gb_species;
             gb_species = GBT_next_species(gb_species))
        {
            const char *name = GBT_get_name_or_description(gb_species);
            if (targetSpecies.find(name) != targetSpecies.end()) {
                ++found;
                if (mode == GMM_INVERT) {
                    UNCOVERED();
                    GB_write_flag(gb_species, !GB_read_flag(gb_species));
                }
                else {
                    UNCOVERED();
                    GB_write_flag(gb_species, mode == GMM_MARK);
                }
            }
        }
        size_t targetted = targetSpecies.size();
        if (found<targetted) {
            size_t zombies = targetted-found;
            GBT_message(gb_main, GBS_global_string("Warning: Refused to touch %zu zombies", zombies));
        }
    }
}

ARB_ERROR GroupSearch::set_marks_in_group(size_t idx, GroupMarkMode mode) {
    ARB_ERROR error;
    if (idx<found->size()) {
        QueriedGroups groups;
        groups.add_informed_group((*found)[idx]);

        SpeciesNames targetSpecies;
        error = collectSpecies(groups, UNITE, targetSpecies);
        if (!error) set_marks_of(targetSpecies, gb_main, mode);
    }
    return error;
}
ARB_ERROR GroupSearch::set_marks_in_found_groups(GroupMarkMode mode, CollectMode cmode) {
    // intersect == true -> affect only species which are members of ALL found groups
    ARB_ERROR error;
    if (has_results()) {
        SpeciesNames targetSpecies;
        error = collectSpecies(*found, cmode, targetSpecies);
        if (!error) set_marks_of(targetSpecies, gb_main, mode);
    }
    return error;
}

struct GroupNameQueryKey : public ExplicitQueryKey {
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        return strdup(target_group.get_group_name()); // retrieve group name
    }
    const char *get_name() const OVERRIDE { return "name"; }
};

struct GroupFoldedKey : public ExplicitQueryKey {
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        const FoundGroup&  group        = target_group.get_group();

        return GBS_global_string_copy("%i", int(group.is_folded()));
    }
    const char *get_name() const OVERRIDE { return "folded"; }
};

struct GroupAIDkey : public ExplicitQueryKey {
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        return GBS_global_string_copy("%e", target_group.get_average_ingroup_distance());
    }
    const char *get_name() const OVERRIDE { return "AID"; }
};

struct GroupSizeKey : public ExplicitQueryKey {
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        return GBS_global_string_copy("%u", target_group.get_group_size());
    }
    const char *get_name() const OVERRIDE { return "size"; }
};
struct GroupKeeledKey : public ExplicitQueryKey {
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        return GBS_global_string_copy("%i", target_group.get_keeledStateInfo());
    }
    const char *get_name() const OVERRIDE { return "keeled"; }
};
struct GroupZombiesKey : public ExplicitQueryKey {
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        return GBS_global_string_copy("%u", target_group.get_zombie_count());
    }
    const char *get_name() const OVERRIDE { return "zombies"; }
};
class GroupMarkedKey : public ExplicitQueryKey {
    bool percent;
public:
    GroupMarkedKey(bool percent_) :
        percent(percent_)
    {}
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);

        int marked = target_group.get_marked_count();
        if (percent) {
            int    size = target_group.get_group_size();
            double pc   = 100.0*marked/size;
            return GBS_global_string_copy("%5.2f", pc);
        }

        return GBS_global_string_copy("%u", marked);
    }
    const char *get_name() const OVERRIDE { return "marked"; }
};

class NestingLevelKey : public ExplicitQueryKey {
    const GroupSearch& group_search;
public:
    NestingLevelKey(const GroupSearch& group_search_) :
        group_search(group_search_)
    {}
    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
        const FoundGroup&  group        = target_group.get_group();

        return GBS_global_string_copy("%i", group_search.calc_nesting_level(group.get_pointer()));
    }
    const char *get_name() const OVERRIDE { return "nesting"; }
};

class ParentGroupNameQueryKey: public QueryKey, virtual Noncopyable {
    const GroupSearch& group_search;
    bool               directParentOnly; // true -> direct parent; false -> any parent (iterates)

    mutable GBDATA *gb_parent;
    mutable int     distance; // 1=direct parent, 2=parent of direct parent,  ...

    static inline query_key_type detectKeyType(CriterionType ctype) {
        query_key_type qkt;
        switch (ctype) {
            case CT_PARENT_DIRECT: qkt = QKEY_EXPLICIT; break;
            case CT_PARENT_ANY:    qkt = QKEY_ANY;      break;
            case CT_PARENT_ALL:    qkt = QKEY_ALL;      break;
            default: gs_assert(0); break;
        }
        return qkt;
    }

public:
    ParentGroupNameQueryKey(const GroupSearch& group_search_, CriterionType ctype) :
        QueryKey(detectKeyType(ctype)),
        group_search(group_search_),
        directParentOnly(ctype == CT_PARENT_DIRECT),
        gb_parent(NULp),
        distance(0)
    {
        gs_assert(ctype == CT_PARENT_DIRECT || ctype == CT_PARENT_ANY || ctype == CT_PARENT_ALL);
    }
    ~ParentGroupNameQueryKey() OVERRIDE {}

    char *get_target_data(const QueryTarget& target, GB_ERROR& /*error*/) const OVERRIDE {
        // retrieve name of parent group
        if (!gb_parent) { // search first (direct) parent
            const TargetGroup& target_group = DOWNCAST_REFERENCE(const TargetGroup, target);
            const FoundGroup&  group        = target_group.get_group();

            gb_parent = group_search.get_parent_group(group.get_pointer());
            ++distance;
            if (!gb_parent) return strdup(""); // does not match "*"
        }

        FoundGroup parent(gb_parent);
        return strdup(parent.get_name());
    }
    const char *get_name() const OVERRIDE {
        // name of target (e.g. for reports)
        if (get_type() == QKEY_EXPLICIT) { // direct parent
            return "parent-name";
        }

        return GBS_global_string("parent-%i-name", distance);
    }
    bool iterate() const OVERRIDE {
        // iterate key to next entry (not for QKEY_EXPLICIT)
        if (gb_parent && get_type() != QKEY_EXPLICIT) {
            gb_parent = group_search.get_parent_group(gb_parent);
            ++distance;
            return gb_parent;
        }
        return false;
    }
    void reset() const OVERRIDE {
        // reset iteration
        gb_parent = NULp;
        distance  = 0;
    }

};

void GroupSearch::addQueryExpression(CriterionOperator op, CriterionType type, CriterionMatch mtype, const char *expression) {
    query_operator aqo = ILLEGAL;

    if (query_expr.isNull()) {
        aqo = OR; // first is always OR
    }
    else {
        switch (op) {
            case CO_AND: aqo = AND; break;
            case CO_OR: aqo  = OR; break;
            case CO_IGNORE:
                return; // ignore this expression
        }
    }

    QueryKeyPtr key;
    switch (type) {
        case CT_NAME:          key = new GroupNameQueryKey; break;
        case CT_FOLDED:        key = new GroupFoldedKey; break;
        case CT_NESTING_LEVEL: key = new NestingLevelKey(*this); break;
        case CT_SIZE:          key = new GroupSizeKey; break;
        case CT_MARKED:        key = new GroupMarkedKey(false); break;
        case CT_MARKED_PC:     key = new GroupMarkedKey(true); break;
        case CT_ZOMBIES:       key = new GroupZombiesKey; break;

        case CT_PARENT_DIRECT:
        case CT_PARENT_ANY:
        case CT_PARENT_ALL:    key = new ParentGroupNameQueryKey(*this, type); break;

        case CT_AID:           key = new GroupAIDkey; break;
        case CT_KEELED:        key = new GroupKeeledKey; break;
    }

    QueryExpr *qe = new QueryExpr(aqo, key, mtype == CM_MISMATCH, expression);
    if (query_expr.isNull()) { // store 1st
        query_expr = qe;
    }
    else { // append others
        query_expr->append(qe);
    }
}
void GroupSearch::forgetQExpressions() {
    query_expr.setNull();
}


// --------------------------------------------------------------------------------

#ifdef UNIT_TESTS
#ifndef TEST_UNIT_H
#include <test_unit.h>
#endif

enum GroupListType {
    GLT_NAME,
    GLT_NAME_TREE,
    GLT_NAME_SIZE,
    GLT_NAME_AID,
    GLT_CLUST_NT,        // cluster, name + tree
    GLT_NAME_FOLD,       // shows foldings state
    GLT_NAME_AND_PARENT, // shows parent relation (using ParentCache)
    GLT_KNAME_NEST,      // shows keeled state and nesting
};

static arb_test::match_expectation groupListingIs(const QueriedGroups& foundGroups, GroupListType type, const char *expected_entries) {
    using namespace arb_test;

    ParentCache& pcache = GroupSearch::get_common()->get_parent_cache();

    StrArray entries;
    for (FoundGroupCIter g = foundGroups.begin(); g != foundGroups.end(); ++g) {
        switch (type) {
            case GLT_NAME:
                entries.put(strdup(g->get_name()));
                break;

            case GLT_NAME_TREE:
                entries.put(GBS_global_string_copy("%s/%s", g->get_name(), g->get_tree_name()));
                break;

            case GLT_NAME_SIZE:
                entries.put(GBS_global_string_copy("%s(%i)", g->get_name(), g->get_size()));
                break;

            case GLT_NAME_AID:
                entries.put(GBS_global_string_copy("%s(%.4f)", g->get_name(), g->get_aid()));
                break;

            case GLT_CLUST_NT:
                entries.put(GBS_global_string_copy("%i/%s/%s", g->get_cluster_id(), g->get_name(), g->get_tree_name()));
                break;

            case GLT_NAME_FOLD: {
                const char *format = g->is_folded() ? "[%s]" : "%s";
                entries.put(GBS_global_string_copy(format, g->get_name()));
                break;
            }
            case GLT_NAME_AND_PARENT: {
                GBDATA *gb_parent = pcache.lookupParent(g->get_pointer());
                if (gb_parent) {
                    entries.put(GBS_global_string_copy("%s<%s>", FoundGroup(gb_parent).get_name(), g->get_name()));
                }
                else {
                    entries.put(strdup(g->get_name()));
                }
                break;
            }
            case GLT_KNAME_NEST: {
                int         kstate  = g->get_keeled();
                const char *kprefix = kstate ? (kstate == 1 ? "!" : "?") : "";
                entries.put(GBS_global_string_copy("%s%s(L%i)", kprefix, g->get_name(), g->get_nesting()));
                break;
            }
        }
    }

    SmartCharPtr  found_entriesP = GBT_join_strings(entries, '*');
    const char   *found_entries = &*found_entriesP;
    return that(found_entries).is_equal_to(expected_entries);
}

static arb_test::match_expectation speciesInGroupsAre(GroupSearch& gs, CollectMode cmode, const char *expected_species) {
    using namespace   arb_test;
    expectation_group fulfilled;

    SpeciesNames species;
    {
        const QueriedGroups& groups = gs.get_results();
        ARB_ERROR            error  = gs.collectSpecies(groups, cmode, species);
        fulfilled.add(doesnt_report_error(error));
    }

    ConstStrArray entries;
    for (SpeciesNames::const_iterator n = species.begin(); n != species.end(); ++n) {
        entries.put(n->c_str());
    }
    entries.sort(GB_string_comparator, NULp);

    SmartCharPtr  contained_speciesP = GBT_join_strings(entries, ',');
    const char   *contained_species  = &*contained_speciesP;
    fulfilled.add(that(contained_species).is_equal_to(expected_species));

    return all().ofgroup(fulfilled);
}

static arb_test::match_expectation resultListingIs(GroupSearch& gs, GroupListType type, const char *expected_entries) {
    using namespace arb_test;

    const QueriedGroups& results = gs.get_results();
    GB_transaction       ta(gs.get_gb_main());

    return groupListingIs(results, type, expected_entries);
}

static arb_test::match_expectation hasOrder(const GroupSearch& gs, const char *expected_order) {
    using namespace arb_test;

    const int MAX_ORDER = 20;
    char      found_order[MAX_ORDER];
    int       off       = 0;

    const SortCriteria& order = gs.inspect_order();
    for (SortCriteria::const_iterator i = order.begin(); i != order.end(); ++i) {
        char c = '?';
        switch (*i) {
            case GSC_NONE:       c = '_'; break;
            case GSC_NAME:       c = 'N'; break;
            case GSC_TREENAME:   c = 'T'; break;
            case GSC_TREEORDER:  c = 'O'; break;
            case GSC_REVERSE:    c = '!'; break;
            case GSC_HIT_REASON: c = 'R'; break; // @@@ untested
            case GSC_NESTING:    c = 'G'; break; // --- dito ---
            case GSC_SIZE:       c = 'S'; break; // --- dito ---
            case GSC_MARKED:     c = 'M'; break; // --- dito ---
            case GSC_MARKED_PC:  c = '%'; break; // --- dito ---
            case GSC_CLUSTER:    c = 'C'; break;
            case GSC_AID:        c = 'A'; break;
            case GSC_KEELED:     c = 'k'; break;
        }
        found_order[off++] = c;
    }
    gs_assert(off<MAX_ORDER);
    found_order[off] = 0;
    return that(found_order).is_equal_to(expected_order);
}

static arb_test::match_expectation addingCriterionProduces(GroupSearch& gs, GroupSortCriterion crit, const char *expected_order, const char *expected_entries) {
    using namespace   arb_test;
    expectation_group fulfilled;

    gs.addSortCriterion(crit);

    fulfilled.add(hasOrder(gs, expected_order));
    fulfilled.add(resultListingIs(gs, GLT_NAME_TREE, expected_entries));

    return all().ofgroup(fulfilled);
}

static int refreshes_traced = 0;
static void trace_refresh_cb() { ++refreshes_traced; }

void TEST_group_search() {
    GB_shell  shell;
    GBDATA   *gb_main = GB_open("../../demo.arb", "r");

    GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
    refreshes_traced = 0;

    {
        GroupSearch allGroups(gb_main, traceRefresh_cb);
        TEST_EXPECT(allGroups.get_results().empty());

        allGroups.perform_search(GSM_FIND);
        TEST_EXPECT(!allGroups.get_results().empty());
        TEST_EXPECT_EQUAL(allGroups.get_results().size(), 28);
        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_TREE,
                                         "last/tree_test*another group/tree_test*outer/tree_test*inner/tree_test*test/tree_test*outer/tree_test*test/tree_test*xx/tree_test*"
                                         "outer/tree_tree2*g2/tree_tree2*xx/tree_tree2*test/tree_tree2*outer/tree_tree2*inner/tree_tree2*test/tree_tree2*"
                                         "zombsub/tree_zomb*zomb/tree_zomb*ZOMB/tree_zomb*dup/tree_zomb*inner outer group/tree_zomb*inner group/tree_zomb*outer group/tree_zomb*g4/tree_zomb*g3/tree_zomb*g2/tree_zomb*xx/tree_zomb*yy/tree_zomb*eee/tree_zomb"
                                         ));

        TEST_EXPECTATION(hasOrder(allGroups, ""));
        allGroups.addSortCriterion(GSC_NAME); // sort by name
        TEST_EXPECTATION(hasOrder(allGroups, "N"));
        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_TREE,
                                         "ZOMB/tree_zomb*" // @@@ should be sorted case insensitive
                                         "another group/tree_test*dup/tree_zomb*eee/tree_zomb*"
                                         "g2/tree_tree2*g2/tree_zomb*"
                                         "g3/tree_zomb*g4/tree_zomb*"
                                         "inner/tree_test*inner/tree_tree2*"                                  // order is stable
                                         "inner group/tree_zomb*inner outer group/tree_zomb*last/tree_test*"
                                         "outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*" // order is stable
                                         "outer group/tree_zomb*"
                                         "test/tree_test*test/tree_test*test/tree_tree2*test/tree_tree2*"     // order is stable
                                         "xx/tree_test*xx/tree_tree2*xx/tree_zomb*"                           // order is stable
                                         "yy/tree_zomb*zomb/tree_zomb*zombsub/tree_zomb"
                             ));

        // search only in tree_tree2
        TreeNameSet tree2;
        tree2.insert("tree_tree2");
        allGroups.setSearchRange(tree2);
        allGroups.perform_search(GSM_FIND);
        TEST_EXPECT_EQUAL(allGroups.get_results().size(), 7);
        TEST_EXPECTATION(hasOrder(allGroups, "N")); // results still sorted by name (sort criteria are not reset by new search)
        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_TREE, "g2/tree_tree2*inner/tree_tree2*outer/tree_tree2*outer/tree_tree2*test/tree_tree2*test/tree_tree2*xx/tree_tree2"));
    }

    {
        GroupSearch some(gb_main, traceRefresh_cb);

        some.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*ou*");

        some.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME, "another group*outer*outer*outer*outer*inner outer group*inner group*outer group"));
        TEST_EXPECT_EQUAL(some.get_results().get_column_widths().name, 17);

        // test 2nd filter
        some.forgetQExpressions();
        some.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*er");
        some.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME_TREE, "outer/tree_test*inner/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_tree2"));
        TEST_EXPECT_EQUAL(some.get_results().get_column_widths().name, 5);

        {
            // test order
            const char *BY_NAME_FWD = "inner/tree_test*inner/tree_tree2*outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2";
            const char *BY_NAME_REV = "outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_test*inner/tree_tree2";

            TEST_EXPECTATION(addingCriterionProduces(some, GSC_NAME,    "N",  BY_NAME_FWD));
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_REVERSE, "!N", BY_NAME_REV));
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_NAME,    "N",  BY_NAME_FWD));

            // test multiple "reverse" criteria
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_REVERSE, "!N", BY_NAME_REV));
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_REVERSE, "N",  BY_NAME_FWD));
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_REVERSE, "!N", BY_NAME_REV));

            // test sort by treename
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_TREENAME, "T!N",  "outer/tree_test*outer/tree_test*inner/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_tree2"));
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_REVERSE,  "!T!N", "inner/tree_tree2*outer/tree_tree2*outer/tree_tree2*inner/tree_test*outer/tree_test*outer/tree_test"));

            // test sort by tree-order (as specified in tree-admin)
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_TREEORDER, "O!T!N",  "inner/tree_test*outer/tree_test*outer/tree_test*inner/tree_tree2*outer/tree_tree2*outer/tree_tree2"));
            TEST_EXPECTATION(addingCriterionProduces(some, GSC_REVERSE,   "!O!T!N", "outer/tree_tree2*outer/tree_tree2*inner/tree_tree2*outer/tree_test*outer/tree_test*inner/tree_test"));

            some.forgetSortCriteria();
        }

        // combine both filters (conjunction will only report 'outer')
        some.addQueryExpression(CO_AND, CT_NAME, CM_MATCH, "*ou*");
        some.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME_TREE, "outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2"));

        // test adding results
        some.forgetQExpressions();
        some.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*xx*");
        some.perform_search(GSM_ADD);
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME, "outer*outer*outer*outer*xx*xx*xx"));

        some.forgetQExpressions();
        some.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*er*");
        some.perform_search(GSM_ADD); // check no duplicates are reported (filter also matches 'outer')
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME, "outer*outer*outer*outer*xx*xx*xx*another group*inner*inner*inner outer group*inner group*outer group"));

        // test removing a single result
        {
            some.addSortCriterion(GSC_TREEORDER); // first change order to make removal comprehensible
            TEST_EXPECTATION(resultListingIs(some, GLT_NAME, "outer*outer*xx*another group*inner*outer*outer*xx*inner*xx*inner outer group*inner group*outer group"));

            const char *FIRST_XX_REMOVED = "outer*outer*another group*inner*outer*outer*xx*inner*xx*inner outer group*inner group*outer group";
            some.remove_hit(2); // remove first 'xx'
            TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
            // test that out-of-bounds removals are NOOPs:
            some.remove_hit(-10); TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
            some.remove_hit(100); TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
        }

        // test keeping results
        some.forgetQExpressions();
        some.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*ou*");
        some.perform_search(GSM_KEEP);
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME, "outer*outer*another group*outer*outer*inner outer group*inner group*outer group"));

        // test removing results (also tests "mismatch")
        some.forgetQExpressions();
        some.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "outer");
        some.perform_search(GSM_REMOVE);
        TEST_EXPECTATION(resultListingIs(some, GLT_NAME, "another group*inner outer group*inner group*outer group"));
    }

    // test different search keys
    {
        GroupSearch keyed(gb_main, traceRefresh_cb);
        const char *TOP_GROUPS = "last*another group*outer*test*outer*outer*zombsub*dup*inner outer group";

        // CT_PARENT_DIRECT (direct parent group name)
        keyed.addQueryExpression(CO_OR, CT_PARENT_DIRECT, CM_MATCH, ""); // direct parent w/o name (=no direct parent)
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, TOP_GROUPS));  // -> TOP_GROUPS

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_PARENT_DIRECT, CM_MATCH, "/^[^ ]*ou[^ ]*$/"); // uses regular expression query
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "outer<inner>*outer<test>*outer<xx>*outer<g2>*outer<test>*outer<inner>*outer<test>"));

        // CT_PARENT_ANY
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR,  CT_PARENT_ANY, CM_MATCH,    "|contains(\"ou\");contains(\" \")|equals(0)|minus");
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "outer<inner>*outer<test>*outer<xx>*outer<g2>*g2<xx>*outer<test>*test<outer>*outer<inner>*outer<test>"));

        // CT_PARENT_ALL
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR,  CT_PARENT_ALL, CM_MISMATCH, "/ou/"); // not inside group containing 'ou'
        keyed.addQueryExpression(CO_AND, CT_NAME,       CM_MISMATCH, "/ou/"); // and not containing 'ou' itself
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "last*test*zombsub*zombsub<zomb>*zombsub<ZOMB>*dup"));

        // CT_NESTING_LEVEL
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NESTING_LEVEL, CM_MATCH, "<1");         // nesting level less than 1
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, TOP_GROUPS)); // -> TOP_GROUPS

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NESTING_LEVEL, CM_MISMATCH, ">0");      // nesting level not above 0
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, TOP_GROUPS)); // -> TOP_GROUPS

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NESTING_LEVEL, CM_MATCH, ">4"); // too high nesting level
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, ""));

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NESTING_LEVEL, CM_MATCH, ">3"); // highest occurring nesting level
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "yy<eee>")); // one group with nesting level 4

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NESTING_LEVEL, CM_MATCH, ">2");
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "outer<inner>*g2<xx>*g2<yy>*yy<eee>")); // 1xL4 + 3xL3

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR,  CT_NESTING_LEVEL, CM_MATCH, ">1");
        keyed.addQueryExpression(CO_AND, CT_NESTING_LEVEL, CM_MATCH, "<4");
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "g2<xx>*test<outer>*outer<inner>*outer group<g4>*outer group<g3>*outer group<g2>*g2<xx>*g2<yy>")); // 5x L2 + 3x L3

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NESTING_LEVEL, CM_MATCH, "2");
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, "g2<xx>*test<outer>*outer group<g4>*outer group<g3>*outer group<g2>")); // 5x L2

        // CT_FOLDED
        const char *EXPANDED_GROUPS = "last*outer*outer<inner>*outer*outer*zombsub";
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_FOLDED, CM_MATCH, "0");
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, EXPANDED_GROUPS));

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_NAME /*does not matter*/, CM_MISMATCH, "|readdb(grouped)|equals(1)"); // directly access field of group-container
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, EXPANDED_GROUPS));

        // CT_SIZE
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR,  CT_SIZE, CM_MATCH, ">12");             // find bigger groups
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_SIZE, "another group(29)*outer(15)*outer(47)*zombsub(14)*inner outer group(19)*outer group(15)"));
        keyed.addQueryExpression(CO_AND, CT_SIZE, CM_MATCH, "|rest(2)|equals(0)"); // with even groupsize only
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_SIZE, "zombsub(14)")); // the only bigger group with an even number of members

        // CT_MARKED + CT_MARKED_PC
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_MARKED, CM_MATCH, ">7"); // at least 8 marked species inside group
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, "another group*outer*inner outer group*outer group"));

        const char *COMPLETELY_MARKED_GROUPS = "test*xx*xx*g4*xx*eee";
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_MARKED_PC, CM_MATCH, ">99");                      // completely marked groups (more than 99%)
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS));
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_MARKED_PC, CM_MISMATCH, "<100");                  // completely marked groups (not less than 100%)
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS));
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_MARKED_PC, CM_MATCH, "100");                      // completely marked groups (equal to 100%)
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION__BROKEN(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS), // @@@ matching % for equality does not work as expected
                                 resultListingIs(keyed, GLT_NAME, ""));


        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR,  CT_MARKED,    CM_MISMATCH, "0");   // groups with marked..
        keyed.addQueryExpression(CO_AND, CT_MARKED_PC, CM_MATCH,    "<50"); // ..but less than 50%
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, "outer*outer*test"));

        // CT_ZOMBIES
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_ZOMBIES, CM_MISMATCH, "0"); // groups with zombies
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, "zombsub*zomb*ZOMB"));

        // CT_AID
        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_AID, CM_MATCH, ">1"); // groups with high AID
        keyed.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AID, "outer(1.0996)*outer(1.1605)"));

        keyed.forgetQExpressions();
        keyed.addQueryExpression(CO_OR, CT_AID, CM_MATCH, "<.1"); // groups with low AID
        keyed.perform_search(GSM_FIND);
        keyed.addSortCriterion(GSC_AID);
        keyed.addSortCriterion(GSC_REVERSE);
        TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AID, "xx(0.0786)*xx(0.0786)*g3(0.0665)*dup(0.0399)*inner group(0.0259)"));

        // CT_KEELED is tested in TEST_keeled_group_search()
    }

    TEST_EXPECT_EQUAL(refreshes_traced, 0); // no refresh traced up to here

    // test group-actions:

    {
        refreshes_traced = 0;

        GroupSearch misc(gb_main, traceRefresh_cb);

        misc.addQueryExpression(CO_OR,  CT_NAME, CM_MATCH,    "*e*");
        misc.addQueryExpression(CO_AND, CT_NAME, CM_MISMATCH, "* *");
        misc.perform_search(GSM_FIND);
        {
            const char *ACI_add_tag = "\"[TAG] \";dd";

            const char *BEFORE_RENAME    = "outer*inner*test*outer*test*outer*test*outer*inner*test*eee";
            const char *OUTER_PREFIXED = "[TAG] outer*inner*test*outer*test*outer*test*outer*inner*test*eee";

            TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, BEFORE_RENAME));

            // test renaming groups:
            TEST_EXPECT_NO_ERROR(misc.rename_group(0, ACI_add_tag));      TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, OUTER_PREFIXED)); // prefix first 'outer'
            TEST_EXPECT_NO_ERROR(misc.rename_group(0, "\"\""));           TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, OUTER_PREFIXED)); // test empty ACI-result does not rename anything

            TEST_EXPECT_NO_ERROR(misc.rename_found_groups("\"[X]\";dd;\"   \"")); // prefix '[X]' to all found groups + suffix space (which are trimmed away afterwards)
            TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, "[X][TAG] outer*[X]inner*[X]test*[X]outer*[X]test*[X]outer*[X]test*[X]outer*[X]inner*[X]test*[X]eee"));

            // test errors get reported:
            TEST_EXPECT_ERROR_CONTAINS(misc.rename_group(0,     ":x"), "no '=' found");
            TEST_EXPECT_ERROR_CONTAINS(misc.rename_found_groups(":x"), "no '=' found");

            TEST_EXPECT_NO_ERROR(misc.rename_found_groups("/\\[.*\\]//")); // remove any prefixes

            TEST_EXPECT_NO_ERROR(misc.rename_found_groups("dd;\"_\";hitidx;\"/\";hitcount")); // append "_index/hitcount" to groupname
            TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, "outer_1/11*inner_2/11*test_3/11*outer_4/11*test_5/11*outer_6/11*test_7/11*outer_8/11*inner_9/11*test_10/11*eee_11/11"));

            TEST_EXPECT_NO_ERROR(misc.rename_found_groups("command(\"/_.*$//\")|dd;\"_\";markedInGroup;\"/\";groupSize")); // replace suffix with "marked/size"
            TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, "outer_6/11*inner_4/5*test_7/7*outer_7/15*test_0/4*outer_20/47*test_6/12*outer_6/11*inner_4/5*test_2/6*eee_3/3"));

            TEST_EXPECT_NO_ERROR(misc.rename_found_groups(":_*=_L*(|nesting)\\=*(|aid)")); // replace suffix with nesting level and aid
            TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, "outer_L0=0.695293*inner_L1=0.269289*test_L0=0.160956*outer_L0=1.099650*test_L1=0.591923*outer_L0=1.160535*test_L1=0.726679*outer_L2=0.704352*inner_L3=0.265516*test_L1=0.303089*eee_L4=0.229693"));

            // undo renaming groups (to avoid need to change tests below)
            TEST_EXPECT_NO_ERROR(misc.rename_found_groups("/_.*$//"));     // remove all behind '_'
            TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, BEFORE_RENAME));

            TEST_EXPECT_EQUAL(refreshes_traced, 7); // amount of result-list refreshes that would happen (1 * rename_group() + 6 * rename_found_groups(); one rename_group did nothing!)
            refreshes_traced = 0;
        }

        {
            GroupSearch all(gb_main, traceRefresh_cb);  // run a 2nd search
            GroupSearch none(gb_main, traceRefresh_cb); // run a 3rd search
            GroupSearch few(gb_main, traceRefresh_cb);  // run a 4th search

            // test folding single groups
            TEST_EXPECTATION(                                                      resultListingIs(misc, GLT_NAME_FOLD, "outer*inner*[test]*outer*[test]*outer*[test]*[outer]*[inner]*[test]*[eee]"));   // shows current folding state
            TEST_EXPECT_NO_ERROR(misc.fold_group(0, GFM_TOGGLE)); TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_FOLD, "[outer]*inner*[test]*outer*[test]*outer*[test]*[outer]*[inner]*[test]*[eee]")); // fold 1st 'outer'
            TEST_EXPECT_NO_ERROR(misc.fold_group(0, GFM_TOGGLE)); TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_FOLD, "outer*inner*[test]*outer*[test]*outer*[test]*[outer]*[inner]*[test]*[eee]"));   // unfold 1st 'outer'

            TEST_EXPECT_EQUAL(refreshes_traced, 2); // 2 result-list refreshes would happen (one for each fold_group())
            refreshes_traced = 0;

            none.addQueryExpression(CO_OR, CT_NAME, CM_MISMATCH, "*"); // no such group
            all.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "*"); // matches all groups
            few.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "inner");

            none.perform_search(GSM_FIND);
            few.perform_search(GSM_FIND);
            all.perform_search(GSM_FIND);

            TEST_EXPECTATION(resultListingIs(none, GLT_NAME,      "")); // shows no results
            TEST_EXPECTATION(resultListingIs(few,  GLT_NAME_FOLD, "inner*[inner]")); // shows some results
            // shows current folding state (of all groups from all trees):
            TEST_EXPECTATION(resultListingIs(all,  GLT_NAME_FOLD, "last*[another group]*outer*inner*[test]*outer*[test]*[xx]*outer*[g2]*[xx]*[test]*[outer]*[inner]*[test]*zombsub*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));

            TEST_EXPECT_EQUAL(refreshes_traced, 0);

            // test folding listed groups
            // (Note: that results used for folding and for test differ!)
            TEST_EXPECT_NO_ERROR( few.fold_found_groups(GFM_EXPANDREC));          TEST_EXPECTATION(resultListingIs(all, GLT_NAME_FOLD, "last*[another group]*outer*inner*[test]*outer*[test]*[xx]*"          "outer*[g2]*[xx]*test*outer*inner*[test]*"         "zombsub*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));  // [A] only unfolds 2nd inner and 2 of its 3 parent groups
            TEST_EXPECT_NO_ERROR(misc.fold_found_groups(GFM_EXPANDREC));          TEST_EXPECTATION(resultListingIs(all, GLT_NAME_FOLD, "last*[another group]*outer*inner*test*outer*test*[xx]*"              "outer*[g2]*[xx]*test*outer*inner*test*"           "zombsub*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*eee"));            // 'xx' and 'g2' remain folded
            TEST_EXPECT_NO_ERROR(misc.fold_found_groups(GFM_COLLAPSE));           TEST_EXPECTATION(resultListingIs(all, GLT_NAME_FOLD, "last*[another group]*[outer]*[inner]*[test]*[outer]*[test]*[xx]*"   "[outer]*[g2]*[xx]*[test]*[outer]*[inner]*[test]*"  "zombsub*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*[eee]"));          // 'last' remains unfolded
            TEST_EXPECT_NO_ERROR( few.fold_found_groups(GFM_EXPANDREC_COLLREST)); TEST_EXPECTATION(resultListingIs(all, GLT_NAME_FOLD, "[last]*[another group]*outer*inner*[test]*[outer]*[test]*[xx]*"      "outer*[g2]*[xx]*test*outer*inner*[test]*"        "[zombsub]*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]")); // similar to line [A], but 'last' gets folded
            TEST_EXPECT_NO_ERROR(none.fold_found_groups(GFM_EXPANDREC_COLLREST)); TEST_EXPECTATION(resultListingIs(all, GLT_NAME_FOLD, "[last]*[another group]*[outer]*[inner]*[test]*[outer]*[test]*[xx]*" "[outer]*[g2]*[xx]*[test]*[outer]*[inner]*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]")); // unfold none+collapse rest = fold all
            TEST_EXPECT_NO_ERROR(misc.fold_found_groups(GFM_EXPANDPARENTS));      TEST_EXPECTATION(resultListingIs(all, GLT_NAME_FOLD, "[last]*[another group]*outer*[inner]*[test]*outer*[test]*[xx]*"      "outer*[g2]*[xx]*test*outer*[inner]*[test]*"      "[zombsub]*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*[eee]"));         // unfold all groups containing listed groups

            TEST_EXPECT_EQUAL(refreshes_traced, 16); // @@@ want less refreshes!
            refreshes_traced = 0;

            {
                GroupSearch group2(gb_main, traceRefresh_cb);  // run a 5th search
                group2.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "g2"); // group 'g2' exists in 2 tree; species overlap, but are not identical
                group2.perform_search(GSM_FIND);

                GB_transaction ta(gb_main);

                // test retrieval of species contained in groups:
                TEST_EXPECTATION(speciesInGroupsAre(none, INTERSECT, ""));

                // groups 'inner' are identical in all trees:
                const char *INNER_SPECIES = "McpCapri,McpMyco2,McpMycoi,McpSpeci,SpiMelli";
                TEST_EXPECTATION(speciesInGroupsAre(few, UNITE,     INNER_SPECIES));
                TEST_EXPECTATION(speciesInGroupsAre(few, INTERSECT, INNER_SPECIES));

                TEST_EXPECTATION(speciesInGroupsAre(group2, UNITE,     "AnaAbact,BacMegat,BacPaste,CloTyro2,CloTyro4,CloTyrob,StaAureu,StaEpide"));
                TEST_EXPECTATION(speciesInGroupsAre(group2, INTERSECT, "AnaAbact,BacMegat,BacPaste,"       "CloTyro4,CloTyrob,StaAureu"));
            }
        }

        TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_AND_PARENT, "outer*outer<inner>*test*outer*outer<test>*outer*outer<test>*test<outer>*outer<inner>*outer<test>*yy<eee>")); // format is "parent<child>"

        // test deleting groups:
        TEST_EXPECT_NO_ERROR(misc.delete_group(6)); TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, "outer*inner*test*outer*test*outer*outer*inner*test*eee")); // delete 1st 'test' from 'tree_test2' (DEL_TEST)
        TEST_EXPECT_NO_ERROR(misc.delete_group(3)); TEST_EXPECTATION(resultListingIs(misc, GLT_NAME, "outer*inner*test*test*outer*outer*inner*test*eee"));       // delete 2nd 'outer' from 'tree_tree' (DEL_OUTER)

        // deleting invalid index only returns an error:
        TEST_EXPECT_ERROR_CONTAINS(misc.delete_group(100), "out-of-bounds");
        TEST_EXPECT_ERROR_CONTAINS(misc.delete_group(-1), "out-of-bounds");

        TEST_EXPECT_EQUAL(refreshes_traced, 2); // 2 result-list refreshes would happen (one for each delete_group())
        refreshes_traced = 0;

        TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_AND_PARENT, "outer*outer<inner>*test*test*outer*outer<outer>*outer<inner>*outer<test>*yy<eee>")); // 'test' between 'outer<outer>' got removed

        // delete all (but one) groups named 'outer':
        misc.forgetQExpressions();
        misc.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "outer");
        misc.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE, "outer/tree_test*outer/tree_tree2*outer/tree_tree2")); // also tests that 'outer' was deleted from DB; see .@DEL_OUTER

        misc.remove_hit(1); // will not get deleted
        TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE, "outer/tree_test*outer/tree_tree2"));

        TEST_EXPECT_NO_ERROR(misc.delete_found_groups());           // now delete all listed groups
        TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE, "")); // result-list is empty now

        misc.perform_search(GSM_FIND);                                              // search again
        TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE, "outer/tree_tree2")); // hit removed before deleting listed still exists in DB

        TEST_EXPECT_EQUAL(refreshes_traced, 1); // only one refresh triggered for deletion of all listed groups
    }

    {
        refreshes_traced = 0;

        GroupSearch outer(gb_main, traceRefresh_cb);
        outer.addQueryExpression(CO_OR, CT_NAME, CM_MATCH, "test");
        outer.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(outer, GLT_NAME_TREE, "test/tree_test*test/tree_test*test/tree_tree2")); // also tests that 'test' was deleted from DB; see .@DEL_TEST

        // test result-update callbacks (triggered by DB-changes)
        { // delete tree_tree2:
            GB_transaction  ta(gb_main);
            GBDATA         *gb_tree = GBT_find_tree(gb_main, "tree_tree2");
            TEST_REJECT_NULL(gb_tree);
            TEST_EXPECT_NO_ERROR(GB_delete(gb_tree));
        }
        TEST_EXPECT_EQUAL(refreshes_traced, 1); // one modifying TA => only one refresh callback triggered
        TEST_EXPECTATION(resultListingIs(outer, GLT_NAME_TREE, "test/tree_test*test/tree_test")); // all results referring 'tree_tree2' were removed
    }


    GB_close(gb_main);
}

void TEST_keeled_group_search() {
    GB_shell shell;
    GBDATA   *gb_main = GB_open("TEST_trees.arb", "rw");

    GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
    refreshes_traced = 0;
    {
        GB_transaction ta(gb_main);

        GroupSearch allGroups(gb_main, traceRefresh_cb);
        {
            GroupSearch keeledGroups(gb_main, traceRefresh_cb);
            GroupSearch normalGroups(gb_main, traceRefresh_cb);

            TEST_EXPECT(allGroups.get_results().empty());
            TEST_EXPECT(keeledGroups.get_results().empty());
            TEST_EXPECT(normalGroups.get_results().empty());

            // CT_KEELED:
            keeledGroups.addQueryExpression(CO_OR, CT_KEELED, CM_MISMATCH, "0"); // find keeled groups
            normalGroups.addQueryExpression(CO_OR, CT_KEELED, CM_MATCH,    "0"); // find normal groups

            allGroups.perform_search(GSM_FIND);
            keeledGroups.perform_search(GSM_FIND);
            normalGroups.perform_search(GSM_FIND);

            TEST_EXPECT(!allGroups.get_results().empty());
            TEST_EXPECT(!keeledGroups.get_results().empty());
            TEST_EXPECT(!normalGroups.get_results().empty());

            TEST_EXPECT_EQUAL(allGroups.get_results().size(), 21);
            TEST_EXPECT_EQUAL(allGroups.get_results().size(),
                              keeledGroups.get_results().size()+normalGroups.get_results().size());
            TEST_EXPECT_EQUAL(keeledGroups.get_results().size(), 6);
            TEST_EXPECT_EQUAL(normalGroups.get_results().size(), 15);

            TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_TREE,
                                             "test/tree_test*"
                                             "outer/tree_tree2*g2/tree_tree2*"
                                             "outer/tree_removal*g2 [was: test]/tree_removal*"
                                             "lower/tree_groups*low2/tree_groups*twoleafs/tree_groups*low1/tree_groups*upper/tree_groups*"
                                             "twoleafs/tree_keeled*low2/tree_keeled*lower/tree_keeled*upper/tree_keeled*low1/tree_keeled*"
                                             "low2/tree_keeled_2*twoleafs/tree_keeled_2*lower/tree_keeled_2*upper/tree_keeled_2*low1/tree_keeled_2*allButOne/tree_keeled_2" // finds "keeled group at leaf" 'allButOne'; see also ../../ARBDB/adtree.cxx@HIDDEN_KEELED_GROUP
                                 ));

            TEST_EXPECTATION(resultListingIs(keeledGroups, GLT_KNAME_NEST,
                                             "!twoleafs(L0)*!low2(L1)*?lower(L2)*" // tree_keeled
                                             "!low2(L0)*?lower(L1)*!allButOne(L2)" // tree_keeled_2
                                 ));
        }

        TreeNameSet keeledTrees;
        keeledTrees.insert("tree_keeled");
        keeledTrees.insert("tree_keeled_2");

        allGroups.setSearchRange(keeledTrees);
        allGroups.perform_search(GSM_FIND);

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_AND_PARENT,
                                         // tree_keeled:
                                         "twoleafs*twoleafs<low2>*low2<lower>*lower<upper>*"
                                         "low2<low1>*"

                                         // tree_keeled_2:
                                         "low2*"
                                         "twoleafs*"
                                         "low2<lower>*"
                                         "lower<upper>*"   // keeled group 'lower' encloses 'upper'
                                         "low2<low1>*"
                                         "low1<allButOne>"
                             ));

        // test folding of keeled groups:
        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_FOLD,
                                         "twoleafs*low2*lower*upper*low1*"          // tree_keeled
                                         "low2*twoleafs*lower*upper*low1*allButOne" // tree_keeled_2
                             ));

        TEST_EXPECT_NO_ERROR(allGroups.fold_group(0, GFM_TOGGLE)); // fold 'twoleafs'
        TEST_EXPECT_NO_ERROR(allGroups.fold_group(2, GFM_TOGGLE)); // fold 'lower' -> does as well fold 'upper' (overlayed groups)

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_FOLD,
                                         "[twoleafs]*low2*[lower]*[upper]*low1*"    // tree_keeled
                                         "low2*twoleafs*lower*upper*low1*allButOne" // tree_keeled_2
                             ));

        TEST_EXPECT_NO_ERROR(allGroups.fold_group(3,  GFM_TOGGLE)); // unfold 'upper' -> does as well unfold 'lower' (overlayed groups)
        TEST_EXPECT_NO_ERROR(allGroups.fold_group(10, GFM_TOGGLE));

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_FOLD,
                                         "[twoleafs]*low2*lower*upper*low1*"          // tree_keeled
                                         "low2*twoleafs*lower*upper*low1*[allButOne]" // tree_keeled_2
                             ));

        TEST_EXPECT_NO_ERROR(allGroups.fold_group(0,  GFM_TOGGLE));
        TEST_EXPECT_NO_ERROR(allGroups.fold_group(10, GFM_TOGGLE));

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_FOLD,
                                         "twoleafs*low2*lower*upper*low1*"          // tree_keeled
                                         "low2*twoleafs*lower*upper*low1*allButOne" // tree_keeled_2
                             ));

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_AID,
                                         // tree_keeled:
                                         "twoleafs(1.4310)*low2(1.4436)*lower(1.0288)*upper(1.0288)*low1(1.1200)*"

                                         // tree_keeled_2:
                                         "low2(1.4436)*twoleafs(0.0087)*lower(1.0288)*upper(1.0288)*low1(1.1200)*"
                                         "allButOne(0.0000)" // 1 member -> zero AID
                             ));

        keeledTrees.insert("tree_groups");
        allGroups.setSearchRange(keeledTrees);
        allGroups.perform_search(GSM_FIND);

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_KNAME_NEST,
                                         // tree_groups:
                                         "lower(L0)*low2(L1)*twoleafs(L2)*low1(L1)*upper(L0)*"

                                         // tree_keeled:
                                         "!twoleafs(L0)*!low2(L1)*?lower(L2)*upper(L3)*"
                                         "low1(L2)*"

                                         // tree_keeled_2:
                                         "!low2(L0)*"
                                         "twoleafs(L0)*"
                                         "?lower(L1)*upper(L2)*low1(L1)*!allButOne(L2)"
                             ));

        TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_SIZE,
                                         // tree_groups:
                                         "lower(10)*low2(3)*twoleafs(2)*low1(7)*upper(5)*"

                                         // tree_keeled:
                                         "twoleafs(13)*"
                                         "low2(12)*"
                                         "lower(5)*upper(5)*"
                                         "low1(7)*"

                                         // tree_keeled_2:
                                         "low2(12)*"
                                         "twoleafs(2)*"
                                         "lower(5)*"
                                         "upper(5)*low1(7)*"
                                         "allButOne(1)" // only 1 species!
                             ));

        allGroups.addSortCriterion(GSC_KEELED);
        TEST_EXPECTATION(resultListingIs(allGroups, GLT_KNAME_NEST,
                                         "?lower(L2)*?lower(L1)*!twoleafs(L0)*!low2(L1)*!low2(L0)*!allButOne(L2)*lower(L0)*low2(L1)*twoleafs(L2)*low1(L1)*upper(L0)*upper(L3)*low1(L2)*twoleafs(L0)*upper(L2)*low1(L1)"
                             ));
    }

    GB_close(gb_main);
}



static arb_test::match_expectation does_map_index(const SymmetricMatrixMapper& mm, int x, int y, int lin) {
    using namespace   arb_test;
    expectation_group fulfilled;

    fulfilled.add(that(mm.linear_index(x, y)).is_equal_to(lin));
    fulfilled.add(that(mm.linear_index(y, x)).is_equal_to(lin));

    int rx, ry;
    mm.to_xy(lin, rx, ry);
    if (x>y) swap(x, y);

    fulfilled.add(that(rx).is_equal_to(x));
    fulfilled.add(that(ry).is_equal_to(y));

    return all().ofgroup(fulfilled);
}

void TEST_SymmetricMatrixMapper() {
    {
        SymmetricMatrixMapper m2(2);
        TEST_EXPECT_EQUAL(m2.linear_size(), 1);
        TEST_EXPECTATION(does_map_index(m2, 0, 1, 0));
    }
    {
        SymmetricMatrixMapper m3(3);
        TEST_EXPECT_EQUAL(m3.linear_size(), 3);
        TEST_EXPECTATION(does_map_index(m3, 0, 1, 0));
        TEST_EXPECTATION(does_map_index(m3, 2, 0, 1));
        TEST_EXPECTATION(does_map_index(m3, 2, 1, 2));
    }
    {
        SymmetricMatrixMapper m100(100);
        TEST_EXPECT_EQUAL(m100.linear_size(), 4950);
        TEST_EXPECTATION(does_map_index(m100, 0, 1, 0));
        TEST_EXPECTATION(does_map_index(m100, 49, 50, 1274));
        TEST_EXPECTATION(does_map_index(m100, 51, 50, 1274+51));
        TEST_EXPECTATION(does_map_index(m100, 99, 98, 4949));
    }
}

void TEST_group_duplicate_detection() {
    GB_shell  shell;
    GBDATA   *gb_main = GB_open("../../demo.arb", "r");

    GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);

    {
        refreshes_traced = 0;

        GroupSearch search(gb_main, traceRefresh_cb);
        search.addSortCriterion(GSC_NAME);
        search.addSortCriterion(GSC_TREENAME);

        search.setDupCriteria(true, DNC_WHOLENAME, GB_MIND_CASE, DLC_SAME_TREE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(hasOrder(search, "TN")); // treename, groupname
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/outer/tree_test*"
                                         "1/outer/tree_test*"
                                         "2/test/tree_test*"
                                         "2/test/tree_test*"
                                         "3/outer/tree_tree2*"
                                         "3/outer/tree_tree2*"
                                         "4/test/tree_tree2*"
                                         "4/test/tree_tree2"
                             ));

        search.addSortCriterion(GSC_REVERSE);
        search.addSortCriterion(GSC_CLUSTER);
        search.addSortCriterion(GSC_REVERSE);

        search.setDupCriteria(true, DNC_WHOLENAME, GB_MIND_CASE, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(hasOrder(search, "!C!TN")); // cluster(rev), treename, groupname
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "5/g2/tree_tree2*"
                                         "5/g2/tree_zomb*"
                                         "4/xx/tree_test*"
                                         "4/xx/tree_tree2*"
                                         "4/xx/tree_zomb*"
                                         "3/test/tree_test*"
                                         "3/test/tree_test*"
                                         "3/test/tree_tree2*"
                                         "3/test/tree_tree2*"
                                         "2/inner/tree_test*"
                                         "2/inner/tree_tree2*"
                                         "1/outer/tree_test*"
                                         "1/outer/tree_test*"
                                         "1/outer/tree_tree2*"
                                         "1/outer/tree_tree2"
                             ));

        search.setDupCriteria(false, DNC_WHOLENAME, GB_MIND_CASE, DLC_ANYWHERE, 2); // search "unique" groups
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "0/another group/tree_test*"
                                         "0/last/tree_test*"
                                         "0/ZOMB/tree_zomb*"
                                         "0/dup/tree_zomb*"
                                         "0/eee/tree_zomb*"
                                         "0/g3/tree_zomb*"
                                         "0/g4/tree_zomb*"
                                         "0/inner group/tree_zomb*"
                                         "0/inner outer group/tree_zomb*"
                                         "0/outer group/tree_zomb*"
                                         "0/yy/tree_zomb*"
                                         "0/zomb/tree_zomb*"
                                         "0/zombsub/tree_zomb"
                             ));

        search.addSortCriterion(GSC_NAME);
        search.addSortCriterion(GSC_TREENAME);
        search.addSortCriterion(GSC_CLUSTER);

        search.setDupCriteria(true, DNC_WHOLENAME, GB_MIND_CASE, DLC_DIFF_TREE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(hasOrder(search, "CTN")); // cluster, treename, groupname
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/outer/tree_test*"
                                         "1/outer/tree_test*"
                                         "1/outer/tree_tree2*"
                                         "1/outer/tree_tree2*"
                                         "2/inner/tree_test*"
                                         "2/inner/tree_tree2*"
                                         "3/test/tree_test*"
                                         "3/test/tree_test*"
                                         "3/test/tree_tree2*"
                                         "3/test/tree_tree2*"
                                         "4/xx/tree_test*"
                                         "4/xx/tree_tree2*"
                                         "4/xx/tree_zomb*"
                                         "5/g2/tree_tree2*"
                                         "5/g2/tree_zomb"
                             ));

        search.setDupCriteria(true, DNC_WHOLENAME, GB_MIND_CASE, DLC_DIFF_TREE, 3); // expect hits in 3 diff. trees
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT, // Note: does not add 'outer' or 'test' (they occur 4 times, but only in 2 trees!)
                                         "1/xx/tree_test*"
                                         "1/xx/tree_tree2*"
                                         "1/xx/tree_zomb"
                             ));

        // --------------------------------------------
        //      test DNC_WORDWISE name comparison:

        const char *word_sep = " ";
        WordSet     no_words_ignored;
        search.setDupCriteria(true, DNC_WORDWISE, GB_MIND_CASE, 1, no_words_ignored, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/another group/tree_test*"
                                         "1/inner group/tree_zomb*"
                                         "1/inner outer group/tree_zomb*"
                                         "1/outer group/tree_zomb*"

                                         "2/outer/tree_test*"
                                         "2/outer/tree_test*"
                                         "2/outer/tree_tree2*"
                                         "2/outer/tree_tree2*"

                                         "3/test/tree_test*"
                                         "3/test/tree_test*"
                                         "3/test/tree_tree2*"
                                         "3/test/tree_tree2*"

                                         "4/xx/tree_test*"
                                         "4/xx/tree_tree2*"
                                         "4/xx/tree_zomb*"

                                         "5/inner/tree_test*"
                                         "5/inner/tree_tree2*"

                                         "6/g2/tree_tree2*"
                                         "6/g2/tree_zomb"
                             ));

        search.setDupCriteria(true, DNC_WORDWISE, GB_MIND_CASE, 2, no_words_ignored, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/inner group/tree_zomb*"
                                         "1/inner outer group/tree_zomb"
                             ));

        // rename one group (spaces->commas) to test special word separators
        {
            GB_transaction ta(gb_main);
            TEST_EXPECT_NO_ERROR(search.rename_group(0, "/ /,/"));
            TEST_EXPECT_EQUAL(search.get_results()[0].get_name(), "inner,group");
        }

        search.setDupCriteria(true, DNC_WORDWISE, GB_MIND_CASE, 2, no_words_ignored, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT, // rename of group causes a change of detected cluster
                                         "1/inner outer group/tree_zomb*"
                                         "1/outer group/tree_zomb"
                             ));


        word_sep = ", "; // <<<------------------------------ commas separate words from now on!

        search.setDupCriteria(true, DNC_WORDWISE, GB_MIND_CASE, 2, no_words_ignored, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/inner outer group/tree_zomb*"
                                         "1/inner,group/tree_zomb"
                             ));

        WordSet ignore_group;
        ignore_group.insert("Group");

        search.setDupCriteria(true, DNC_WORDWISE, GB_IGNORE_CASE, 1, ignore_group, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/outer/tree_test*"
                                         "1/outer/tree_test*"
                                         "1/outer/tree_tree2*"
                                         "1/outer/tree_tree2*"
                                         "1/inner outer group/tree_zomb*"
                                         "1/outer group/tree_zomb*"

                                         "2/test/tree_test*"
                                         "2/test/tree_test*"
                                         "2/test/tree_tree2*"
                                         "2/test/tree_tree2*"

                                         "3/inner/tree_test*"
                                         "3/inner/tree_tree2*"
                                         "3/inner,group/tree_zomb*"

                                         "4/xx/tree_test*"
                                         "4/xx/tree_tree2*"
                                         "4/xx/tree_zomb*"

                                         "5/g2/tree_tree2*"
                                         "5/g2/tree_zomb*"

                                         "6/ZOMB/tree_zomb*"
                                         "6/zomb/tree_zomb"
                             ));

        search.setDupCriteria(true, DNC_WORDWISE, GB_IGNORE_CASE, 2, ignore_group, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT, "")); // none

        search.setDupCriteria(true, DNC_WORDWISE, GB_IGNORE_CASE, 1, ignore_group, "", DLC_ANYWHERE, 2); // empty word separator -> uses whole names
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/outer/tree_test*"
                                         "1/outer/tree_test*"
                                         "1/outer/tree_tree2*"
                                         "1/outer/tree_tree2*"

                                         "2/test/tree_test*"
                                         "2/test/tree_test*"
                                         "2/test/tree_tree2*"
                                         "2/test/tree_tree2*"

                                         "3/xx/tree_test*"
                                         "3/xx/tree_tree2*"
                                         "3/xx/tree_zomb*"

                                         "4/inner/tree_test*"
                                         "4/inner/tree_tree2*"

                                         "5/g2/tree_tree2*"
                                         "5/g2/tree_zomb*"

                                         "6/ZOMB/tree_zomb*"
                                         "6/zomb/tree_zomb"
                             ));

        // rename more groups to test cluster-search based on 3 words and extension based on 2 words
        {
            GB_transaction ta(gb_main);
            TEST_EXPECT_NO_ERROR(search.rename_group(0, "/outer/group inner outer/"));
            TEST_EXPECT_NO_ERROR(search.rename_group(1, "/outer/group outer/"));
            TEST_EXPECT_NO_ERROR(search.rename_group(2, "/outer/outer group/"));
            TEST_EXPECT_EQUAL(search.get_results()[0].get_name(), "group inner outer");
            TEST_EXPECT_EQUAL(search.get_results()[1].get_name(), "group outer");
            TEST_EXPECT_EQUAL(search.get_results()[2].get_name(), "outer group");
        }

        search.setDupCriteria(true, DNC_WORDWISE, GB_IGNORE_CASE, 2, no_words_ignored, word_sep, DLC_ANYWHERE, 2);
        search.perform_search(GSM_FIND);
        TEST_EXPECTATION(resultListingIs(search, GLT_CLUST_NT,
                                         "1/group inner outer/tree_test*" // cluster based on 3 words gets extended by groups matching 2 of these words ("group" and "outer")
                                         "1/group outer/tree_test*"       // (note that group containing 'inner' and 'group' is discarded, because resulting cluster would be smaller)
                                         "1/outer group/tree_tree2*"
                                         "1/inner outer group/tree_zomb*"
                                         "1/outer group/tree_zomb"
                             ));

        TEST_EXPECT_EQUAL(refreshes_traced, 2); // 2 renames
    }
    GB_close(gb_main);
}

static double bruteForce_calc_average_ingroup_distance(GroupSearchTree *node) {
    unsigned leafs = node->get_leaf_count();

    if (leafs == 1) return 0.0; // single leaf -> zero distance

    ARB_edge last  = parentEdge(node->get_leftson());
    ARB_edge start = parentEdge(node->get_rightson()).inverse();

    if (start == last) {
        gs_assert(start.get_type() == ROOT_EDGE);
        start = start.next();
    }

    unsigned pairs    = 0;
    double   dist_sum = 0.0;

    for (ARB_edge e1 = start; e1 != last; e1 = e1.next()) {
        if (e1.is_edge_to_leaf()) {
            for (ARB_edge e2 = e1.next(); e2 != last; e2 = e2.next()) {
                if (e2.is_edge_to_leaf()) {
                    dist_sum += e1.dest()->intree_distance_to(e2.dest());
                    ++pairs;
                }
            }
        }
    }

#if defined(ASSERTION_USED)
    const unsigned calc_pairs = (leafs*(leafs-1))/2;
    gs_assert(pairs == calc_pairs);
#endif

    return dist_sum/pairs;
}

#define TEST_EXPECT_PROPER_AID(node) do{                                        \
        const double EPSILON = 0.000001;                                        \
        TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(node),     \
                            (node)->get_average_ingroup_distance(),             \
                            EPSILON);                                           \
    }while(0)

void TEST_ingroup_distance() {
    GB_shell  shell;
    GBDATA   *gb_main = GB_open("TEST_trees.arb", "r");

    {
        GB_transaction ta(gb_main);
        SearchedTree   stree("tree_test", gb_main);

        GroupSearchRoot *troot = stree.get_tree_root();
        TEST_REJECT(stree.failed_to_load());

        // get some specific nodes:
        GroupSearchTree *rootNode = troot->get_root_node();
        GroupSearchTree *leftSon  = rootNode->get_leftson();
        GroupSearchTree *grandSon = leftSon->get_rightson();

        GroupSearchTree *someLeaf = grandSon->get_leftson();
        while (!someLeaf->is_leaf()) { // descent into bigger subtree => reaches subtree containing 2 leafs
            GroupSearchTree *L = someLeaf->get_leftson();
            GroupSearchTree *R = someLeaf->get_rightson();

            someLeaf = L->get_leaf_count() > R->get_leaf_count() ? L : R;
        }

        TEST_EXPECT_EQUAL(someLeaf->get_leaf_count(), 1);

        GroupSearchTree *minSubtree = someLeaf->get_father();
        TEST_EXPECT_EQUAL(minSubtree->get_leaf_count(), 2);

        // brute-force AID calculation:
        {
            const double EPSILON = 0.000001;
            TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(someLeaf),   0.0,      EPSILON);
            TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(minSubtree), minSubtree->leftlen + minSubtree->rightlen, EPSILON);
            TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(grandSon),   0.534927, EPSILON);
            TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(leftSon),    0.976091, EPSILON);
            TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(rootNode),   1.108438, EPSILON);
        }

        // calculate AID on-the-fly and compare with brute-force results
        TEST_EXPECT_PROPER_AID(someLeaf);
        TEST_EXPECT_PROPER_AID(minSubtree);
        TEST_EXPECT_PROPER_AID(grandSon);
        TEST_EXPECT_PROPER_AID(leftSon);
        TEST_EXPECT_PROPER_AID(rootNode);

        ARB_edge start = rootEdge(troot);
        for (ARB_edge e = start.next(); e != start; e = e.next()) {
            TEST_EXPECT_PROPER_AID(DOWNCAST(GroupSearchTree*, e.dest()));
        }
    }
    GB_close(gb_main);
}

#endif // UNIT_TESTS

// --------------------------------------------------------------------------------

