// =============================================================== //
//                                                                 //
//   File      : NT_tree_cmp.h                                     //
//   Purpose   :                                                   //
//                                                                 //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //

#ifndef NT_TREE_CMP_H
#define NT_TREE_CMP_H

#ifndef ARBDB_BASE_H
#include <arbdb_base.h>
#endif
#ifndef NT_LOCAL_H
#include "NT_local.h"
#endif

enum GroupTransferMode {
    COMPARE_TOPOLOGY,       // compare inner nodes + write differences to tree remarks
    REMOVE_EXISTING_GROUPS, // transfer groups (removes existing groups)
    KEEP_OLD_NAMES,         // transfer groups (keep old groups; combines mismatching names)
};

enum GroupsToTransfer {
    XFER_ALL_GROUPS,
    XFER_GROUPS_WITH_MARKED,
};

class RSpecSet;
class TSpecSet;
class GroupMatchScorer;

class RatioLimits {
    double minRatio;
    double maxRatio;

public:
    RatioLimits()                           : minRatio(0.0),   maxRatio(1.0)   {}
    RatioLimits(double lower, double upper) : minRatio(lower), maxRatio(upper) {}

    bool insideLimits(double ratio) const { return minRatio <= ratio && ratio <= maxRatio; }
    bool isValid() const { return minRatio>=0.0 && minRatio<=maxRatio && maxRatio<=1.0; }
};

class GroupPenalty {
    double penalty; // 0 = perfect match; >0 otherwise

    double ingroup_ratio;  // how many percent of original groups members are members of new group [0..1]
    double outgroup_ratio; // how many percent of new group are no original group members [0..1]

    bool keeled; // @@@ shall be used for #451

    // data copied from unregistered set (TSpecSet; i.e. from source tree when moving groups):
    int groupSize; // size of source group (known members)
    int unknown;   // unknown members in source group

    static double no_match_penalty() { return LONG_MAX; }

#if defined(ASSERTION_USED)
    bool isRatio(double r) const { return r>=0.0 && r<=1.0; }
#endif

public:
    GroupPenalty() :
        penalty(no_match_penalty()),
        ingroup_ratio(-1),
        outgroup_ratio(-1),
        keeled(false),
        groupSize(-1),
        unknown(-1)
    {}
    GroupPenalty(double penalty_, double ingroup_ratio_, double outgroup_ratio_, int unreg_groupsize) :
        penalty(penalty_),
        ingroup_ratio(ingroup_ratio_),
        outgroup_ratio(outgroup_ratio_),
        keeled(false),
        groupSize(unreg_groupsize),
        unknown(-1)
    {
        nt_assert(isRatio(ingroup_ratio));
        nt_assert(isRatio(outgroup_ratio));
        nt_assert(groupSize>0);
    }

    bool doesMatch() const { return penalty != no_match_penalty(); }
    bool betterThan(const GroupPenalty& other) const { return penalty < other.penalty; }

    double get_penalty() const { return penalty; }
    bool isPerfectMatch() const { return penalty == 0.0; }
    double get_ingroup_ratio() const { return ingroup_ratio; }
    double get_outgroup_ratio() const { return outgroup_ratio; }

    int get_groupsize() const { nt_assert(groupSize>0); return groupSize; }
    int get_unknown() const { nt_assert(unknown>=0); return unknown; }

    bool shouldHaveBeenKeeled() const { return keeled; }

    static GroupPenalty NoMatch() { return GroupPenalty(); } // syntactic sugar

    void mark_as_keeled() { keeled = true; }
    void addPenalty(double p) {
        nt_assert(p>=0.0);
        nt_assert(doesMatch()); // avoid "!doesMatch()"-condition gets destroyed
        penalty += p;
    }
    void registerUnfound(const GroupMatchScorer& scorer, const TSpecSet& tset);
};

class GroupMatchScorer {
    RatioLimits ingroup;  // only need lower limit (upper should always be 100%)
    RatioLimits outgroup; // only need upper limit (lower should always be 0%)

    // Pep: per error penalty (=absolute penalty=traditional penalty scoring)
    double ingroupPep;  // penalty for each former member of group (when moved out of group, but still member of tree)
    double outgroupPep; // penalty for each former non-member (when moved into group)
    double unfoundPep;  // penalty for each unknown species (when moved into group and not member of source tree)

    double keelPenalty; // penalty added for using keeled group

    // RelPen: ratio penalty (=relative penalty)
    double ingroupInvRelPen; // factor for (1-GroupPenalty::ingroup_ratio), i.e. penalty rel.to ratio of non-included former group members
    double outgroupRelPen;   // factor for ( GroupPenalty::outgroup_ratio), i.e. penalty rel.to ratio of former non-member in new group

    bool insideLimits(double ingroupRatio, double outgroupRatio) const {
        return
            ingroup.insideLimits(ingroupRatio) &&
            outgroup.insideLimits(outgroupRatio);
    }
    GroupPenalty calcPenalty(long removed, long added, long commonSpecies);

public:
    GroupMatchScorer() :
        ingroupPep(1.0),
        outgroupPep(1.0),
        unfoundPep(0.0001),
        keelPenalty(0.01),
        ingroupInvRelPen(0.0),
        outgroupRelPen(0.0)
    {}

    GB_ERROR check_validity() const;

    // setup:
    void setLimits(const RatioLimits& ingroupLimits, const RatioLimits& outgroupLimits) {
        nt_assert(ingroupLimits.insideLimits(1.0)); // upper should always be 100%
        nt_assert(outgroupLimits.insideLimits(0.0)); // lower should always be 0%

        ingroup  = ingroupLimits;
        outgroup = outgroupLimits;
    }
    void setPerErrorPenalties(double ingroup_pep, double outgroup_pep, double unfound_pep) {
        ingroupPep  = ingroup_pep;
        outgroupPep = outgroup_pep;
        unfoundPep  = unfound_pep;
    }
    void setRelativePenalties(double ingroup_inv_relpen, double outgroup_relpen) {
        ingroupInvRelPen = ingroup_inv_relpen;
        outgroupRelPen   = outgroup_relpen;
    }

    // scoring:
    GroupPenalty matchGroups(const TSpecSet& sourceSet, const RSpecSet& targetSet, long commonSpecies, long overallSpecies);
    double       calcUnknownMembersPenalty(const TSpecSet& sourceSet) const;
};

GB_ERROR NTREE_move_tree_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, GroupTransferMode mode, GroupsToTransfer what, const GroupMatchScorer& scorer, const char *aci);

#else
#error NT_tree_cmp.h included twice
#endif // NT_TREE_CMP_H
