// ============================================================= //
//                                                               //
//   File      : CT_ctree.cxx                                    //
//   Purpose   : consensus tree                                  //
//                                                               //
//   Institute of Microbiology (Technical University Munich)     //
//   http://www.arb-home.de/                                     //
//                                                               //
// ============================================================= //

#include "CT_ctree.hxx"
#include "CT_hash.hxx"
#include "CT_ntree.hxx"
#include <arb_strbuf.h>

SpeciesSpace::SpeciesSpace(const CharPtrArray& names_) :
    names(names_),
    Name_hash(NULp),
    psize(NULp)
{
    // names = leafnames (=species names)

    int leaf_count = names.size();
    Name_hash = GBS_create_hash(leaf_count, GB_MIND_CASE);
    for (int i=0; i< leaf_count; i++) {
        GBS_write_hash(Name_hash, names[i], i+1);
    }

    psize      = new PartitionSize(leaf_count);
    allSpecies = psize->create_root();
}

SpeciesSpace::~SpeciesSpace() {
    delete allSpecies;
    delete psize;

    if (Name_hash) {
        GBS_free_hash(Name_hash);
        Name_hash  = NULp;
    }
}

// ---------------------------
//      DeconstructedTree

DeconstructedTree::DeconstructedTree(const SpeciesSpace& specSpace_) :
    specSpace(specSpace_),
    overall_weight(0)
{
    registry = new PartRegistry;
}
DeconstructedTree::~DeconstructedTree() {
    delete registry;
}
void DeconstructedTree::start_sorted_retrieval() { registry->build_sorted_list(overall_weight); }
size_t DeconstructedTree::get_part_count() const { return registry->size(); }
PART *DeconstructedTree::get_part() { return registry->get_part(); }
const PART *DeconstructedTree::peek_part(int idx) const { return registry->peek_part(idx); }

const PART *DeconstructedTree::part_with_origin(const TreeNode *node) const {
    arb_assert(node);
    size_t parts = get_part_count();
    for (size_t i = 0; i<parts; ++i) {
        const PART *p = peek_part(i);
        if (p->get_origin() == node) {
            return p;
        }
    }
    return NULp;
}
const PART *DeconstructedTree::find_part(const TreeNode *node) const {
    const PART *found = part_with_origin(node);
    if (!found && node->is_son_of_root()) {
        found = part_with_origin(node->get_brother());
    }
    return found;
}

const PART *DeconstructedTree::find_innermost_part() const {
    int         best_dist = INT_MAX;
    const PART *best_part = NULp;

    size_t parts = get_part_count();
    for (size_t p = 0; p<parts; ++p) {
        const PART *part = peek_part(p);
        int         dist = part->distance_to_tree_center();

        if (dist<best_dist) {
            best_dist = dist;
            best_part = part;
        }
    }

    arb_assert(best_part);
    return best_part;
}
// -----------------------
//      ConsensusTree

ConsensusTree::ConsensusTree(const CharPtrArray& names_) :
    SpeciesSpace(names_),
    DeconstructedTree(static_cast<const SpeciesSpace&>(*this))
{
}

GB_ERROR DeconstructedTree::deconstruct_weighted(const TreeNode *tree, const PART *wholeTree, int leafs, double weight, bool provideProgress, const PART *allSpecies, DeconstructionMode mode) {
    // inserts a tree into the PartRegistry.
    arb_assert(GBT_count_leafs(tree) == size_t(leafs));

    overall_weight += weight;

    if (wholeTree->get_members() != leafs) {
        arb_assert(wholeTree->get_members() < leafs);
        return "tree contains duplicated species";
    }

    arb_progress *insertProgress = NULp;
    if (provideProgress) {
        long nodeCount = leafs_2_nodes(wholeTree->get_members(), ROOTED);
        insertProgress = new arb_progress(nodeCount);
    }

    bool contains_all_species   = wholeTree->equals(allSpecies);
    bool handle_partial_as_full = mode == DMODE_ROOTSYNC;

    if (contains_all_species || handle_partial_as_full) {
        deconstruct_full_rootnode(tree, weight, insertProgress);
    }
    else {
        deconstruct_partial_rootnode(tree, weight, wholeTree, insertProgress);
    }

    GB_ERROR error = NULp;
    if (provideProgress) {
        error = insertProgress->error_if_aborted();

        delete insertProgress;
        insertProgress = NULp;
    }
    arb_assert(!insertProgress);

    return error;
}

GB_ERROR ConsensusTree::insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress) {
    PART_Ptr wholeTree(create_tree_PART(tree, 0.0)); // weight does not matter here
    return deconstruct_weighted(tree, &*wholeTree, leafs, weight, provideProgress, get_allSpecies(), DMODE_CONSENSUS_TREE);
}

SizeAwareTree *ConsensusTree::get_consensus_tree(GB_ERROR& error) {
    // Get new consensus-tree -> SizeAwareTree

    /* This function is little bit tricky:
       the root-partition consist of 111....111 so it must have two sons
       that represent the same partition son1 == ~son2 to do this we must split
       the fist son-partition in two parts through logical calculation there
       could only be one son! */

    arb_assert(!error);
    ntree_init(get_PartitionSize());

    start_sorted_retrieval();

    const size_t steps          = get_part_count()+2;
    const size_t weighted_steps = triangular_number(steps);
    arb_progress progress(weighted_steps);
    size_t inserted = 0;
    {
#if defined(DUMP_PART_INSERTION)
        PART::start_pretty_printing(get_names());
#endif

        PART *p = get_part();
        while (p && !progress.aborted()) {
            insert_ntree(p);
            progress.inc_by(++inserted);
            p = get_part();
        }

#if defined(DUMP_PART_INSERTION)
        PART::stop_pretty_printing();
#endif
    }

    SizeAwareTree *result_tree = NULp;

    error = progress.error_if_aborted();
    if (!error) {
        const NT_NODE *n = ntree_get();

        arb_assert(ntree_count_sons(n) == 2);

#if defined(NTREE_DEBUG_FUNCTIONS)
        arb_assert(is_well_formed(n));
#endif

        result_tree = rb_gettree(n);

        progress.inc_by(++inserted);

        result_tree->get_tree_root()->find_innermost_edge().set_root();
        result_tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM);

        progress.inc_by(++inserted);
    }

    ntree_cleanup();
    arb_assert(contradicted(result_tree, error));
    return result_tree;
}


char *ConsensusTreeBuilder::get_tree_remark() const {
    GBS_strstruct remark(1000);
    {
        char *build_info = GBS_global_string_copy("ARB consensus tree build from %zu trees:", get_tree_count());
        char *dated      = GBS_log_action_to(NULp, build_info, true);
        remark.cat(dated);
        free(dated);
        free(build_info);
    }

    size_t allCount = get_species_count();

    size_t maxnamelen = 0;
    for (size_t t = 0; t<get_tree_count(); ++t) {
        maxnamelen = std::max(maxnamelen, strlen(get_tree_info(t).name()));
    }
    for (size_t t = 0; t<get_tree_count(); ++t) {
        const TreeInfo& tree = get_tree_info(t);
        remark.cat(" - ");
        remark.nprintf(maxnamelen, "%-*s", int(maxnamelen), tree.name());
        if (tree.species_count()<allCount) {
            double pc = tree.species_count() / double(allCount);
            remark.nprintf(50, " (%.1f%%; %zu/%zu)", pc*100.0, tree.species_count(), allCount);
        }
        remark.put('\n');
    }

    return remark.release();
}



