// =============================================================== //
//                                                                 //
//   File      : AP_sequence.hxx                                   //
//   Purpose   :                                                   //
//                                                                 //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //

#ifndef AP_SEQUENCE_HXX
#define AP_SEQUENCE_HXX

#ifndef ALIVIEW_HXX
#include <AliView.hxx>
#endif
#ifndef ARBTOOLS_H
#include <arbtools.h>
#endif
#ifndef ARB_ASSERT_H
#include <arb_assert.h>
#endif
#ifndef _STDINT_H
#include <stdint.h>
#endif

#define ap_assert(cond) arb_assert(cond)

typedef double AP_FLOAT;

long AP_timer();

class AP_sequence : virtual Noncopyable {
    const AliView *ali;

    GBDATA *gb_sequence;  // points to species/ali_xxx/data (or NULp if unbound, e.g. inner nodes in tree)
    bool    has_sequence; // true -> sequence was set()
    long    update;

protected:
    mutable AP_FLOAT cached_wbc; // result for weighted_base_count(); <0.0 means "not initialized"

    void mark_sequence_set(bool is_set) {
        if (is_set != has_sequence) {
            update   = is_set ? AP_timer() : 0;
            has_sequence = is_set;
            cached_wbc   = -1.0;
        }
    }

    virtual void set(const char *sequence) = 0;
    virtual void unset()                   = 0;

    void do_lazy_load() const;

public:
    AP_sequence(const AliView *aliview);
    virtual ~AP_sequence() {}

    virtual AP_sequence *dup() const = 0;                 // used to dup derived class

    GB_ERROR bind_to_species(GBDATA *gb_species);
    void     unbind_from_species();
    bool is_bound_to_species() const { return gb_sequence; }
    GBDATA *get_bound_species_data() const { return gb_sequence; }

    void lazy_load_sequence() const {
        if (!has_sequence && is_bound_to_species()) do_lazy_load();
    }
    void ensure_sequence_loaded() const {
        lazy_load_sequence();
        ap_assert(has_sequence);
    }

    bool hasSequence() const { return has_sequence; }
    void forget_sequence() { if (has_sequence) unset(); }

    size_t get_sequence_length() const { return ali->get_length(); } // filtered length
    const AP_filter *get_filter() const { return ali->get_filter(); }
    const AP_weights *get_weights() const { return ali->get_weights(); }

    const AliView *get_aliview() const { return ali; }

};

// ----------------------------------------------------------------------
// estimation of upper limit for Mutations = MPB * BP * (2*SP) * W
//
// meaning                               limits
// -------                               ------
// MPB                                     = max.mutation per basepos        3 for aa; 1 for nucs
// BP                                      = base positions                  ~1500 for nucs (=> ~1200 (4/5) theoretical max. for worst possible tree)
// SP                                      = number of species               600k for silva (2*SP = leafs+inner nodes)
// W                                       = weight                          1M theoretically; ~6k seen
//
// -> 1 * 1200 * 2*600k * 6k               = > 8.6*e^12    (1/1M   of long-range)
// -> 1 * 1200 * 2*1M   * 1M               = > 2.4*e^15    (1/3800 of long-range)
// ----------------------------------------------------------------------
typedef long Mutations; // Note: equal to min. mutations only for nucs w/o weights
// ----------------------------------------------------------------------

class AP_combinableSeq: public AP_sequence {
    static long global_combineCount;

protected:
    virtual AP_FLOAT count_weighted_bases() const = 0;

    static void inc_combine_count() { global_combineCount++; }

public:
    AP_combinableSeq(const AliView *aliview) : AP_sequence(aliview) {}
    virtual ~AP_combinableSeq() {}

    virtual AP_combinableSeq *dup() const = 0; // used to dup derived class
    virtual int cmp_combined(const AP_combinableSeq *other) const = 0;

    virtual Mutations combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights, char *mutation_per_site = NULp) = 0;
    virtual Mutations mutations_if_combined_with(const AP_combinableSeq *other)                                                  = 0;

    virtual void partial_match(const AP_combinableSeq *part, long *overlap, long *penalty) const = 0;
    virtual uint32_t checksum() const                                                            = 0;

    static long combine_count() { return global_combineCount; }

    AP_FLOAT weighted_base_count() const { // returns < 0.0 if no sequence!
        if (cached_wbc<0.0) cached_wbc = count_weighted_bases();
        return cached_wbc;
    }

    Mutations noncounting_combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights) {
        Mutations res = combine_seq(lefts, rights);
        global_combineCount--;
        return res;
    }

    bool combinedEquals(const AP_combinableSeq *other) const {
        return cmp_combined(other) == 0;
    }
};


#else
#error AP_sequence.hxx included twice
#endif // AP_SEQUENCE_HXX
