// =============================================================== //
//                                                                 //
//   File      : BI_helix.cxx                                      //
//   Purpose   :                                                   //
//                                                                 //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //

#include "BI_helix.hxx"
#include <arbdbt.h>
#include <cctype>

#define LEFT_HELIX "{[<("
#define RIGHT_HELIX "}]>)"


BI_pairdef::BI_pairdef() {
    for (int i=0; i<PAIR_TYPE_COUNT; i++) {
        pairs_def[i] = NULp;
        char_bind[i] = NULp;
    }

    pairs_def[HELIX_STRONG_PAIR] = strdup("CG AT AU");
    char_bind[HELIX_STRONG_PAIR] = strdup("~");

    pairs_def[HELIX_NORMAL_PAIR] = strdup("GT GU");
    char_bind[HELIX_NORMAL_PAIR] = strdup("-");

    pairs_def[HELIX_WEAK_PAIR] = strdup("GA GG");
    char_bind[HELIX_WEAK_PAIR] = strdup("=");

    pairs_def[HELIX_NO_PAIR] = strdup("AA AC CC CT CU TT UU");
    char_bind[HELIX_NO_PAIR] = strdup("#");

    pairs_def[HELIX_USER0] = strdup(".A .C .G .T .U");
    char_bind[HELIX_USER0] = strdup("*");

    pairs_def[HELIX_USER1] = strdup("-A -C -G -T -U");
    char_bind[HELIX_USER1] = strdup("#");

    pairs_def[HELIX_USER2] = strdup("-- .. -.");
    char_bind[HELIX_USER2] = strdup("");

    pairs_def[HELIX_USER3] = strdup("");
    char_bind[HELIX_USER3] = strdup("+");

    pairs_def[HELIX_USER4] = strdup("");
    char_bind[HELIX_USER4] = strdup("");

    pairs_def[HELIX_USER5] = strdup("");
    char_bind[HELIX_USER5] = strdup("");

    pairs_def[HELIX_DEFAULT] = strdup("");
    char_bind[HELIX_DEFAULT] = strdup("%");
}

BI_pairdef::~BI_pairdef() {
    for (int i=0; i<PAIR_TYPE_COUNT; i++) {
        free(pairs_def[i]);
        free(char_bind[i]);
    }
}

bool BI_pairdef::is_pairtype(char left, char right, BI_PAIR_TYPE pair_type) const {
    const char *pairs = pairs_def[pair_type];
    int         len   = strlen(pairs)-1;

    for (int i=0; i<len; i+=3) {
        if ((pairs[i] == left  && pairs[i+1] == right) ||
            (pairs[i] == right && pairs[i+1] == left)) {
            return true;
        }
    }
    return false;
}

char BI_pairdef::get_symbol(char left, char right) const {
    left  = toupper(left);
    right = toupper(right);

    int erg = *char_bind[HELIX_DEFAULT];
    for (int i = HELIX_STRONG_PAIR; i< PAIR_TYPE_COUNT; i++) {
        if (is_pairtype(left, right, BI_PAIR_TYPE(i))) {
            erg = *char_bind[i];
            break;
        }
    }
    if (!erg) erg = ' ';
    return erg;
}

int BI_pairdef::pair_strength(char left, char right) {
    // returned values:
    //
    // 3 = strong helix
    // 2 = normal helix
    // 1 = weak helix
    // 0 = no helix

    left  = toupper(left);
    right = toupper(right);

    if (is_pairtype(left, right, HELIX_STRONG_PAIR)) return 3;
    if (is_pairtype(left, right, HELIX_NORMAL_PAIR)) return 2;
    if (is_pairtype(left, right, HELIX_WEAK_PAIR)) return 1;
    return 0;
}

// --------------------------------------------------------------------------------

char *BI_helix::helix_error = NULp;

struct helix_stack {
    struct helix_stack *next;

    long pos;
    char c;
};

BI_helix::BI_helix() {
    entries = NULp;
    Size    = 0;
}

BI_helix::~BI_helix() {
    if (entries) {
        for (size_t i = 0; i<Size; ++i) {
            if (entries[i].allocated) {
                free(entries[i].helix_nr);
            }
        }
        free(entries);
    }
}


static void BI_helix_check_error(const char *key, long val, void *client_data) {
    struct helix_stack *stack    = (struct helix_stack *)val;
    GB_ERROR           *errorPtr = (GB_ERROR*)client_data;
    if (!*errorPtr && stack) {
        *errorPtr = GBS_global_string("Too many '%c' in Helix '%s' pos %li", stack->c, key, stack->pos);
    }
}


static long BI_helix_free_hash(const char *, long val, void *) {
    struct helix_stack *stack = (struct helix_stack *)val;
    struct helix_stack *next;
    for (; stack; stack = next) {
        next = stack->next;
        delete stack;
    }
    return 0;
}

GB_ERROR BI_helix::initFromData(const char *helix_nr_in, const char *helix_in, size_t sizei) {
    /* helix_nr string of helix identifiers
     * helix    helix
     * size     alignment len
     */
    clear_error();

    GB_HASH *hash = GBS_create_hash(256, GB_IGNORE_CASE);
    size_t   pos;
    char     c;
    char     ident[256];
    char    *sident;

    helix_stack *laststack = NULp;
    helix_stack *stack;

    Size = sizei;

    char *helix = NULp;
    {
        size_t len          = strlen(helix_in);
        if (len > Size) len = Size;

        char *h = ARB_alloc<char>(Size+1);
        h[Size] = 0;

        if (len<Size) memset(h+len, '.', Size-len);
        memcpy(h, helix_in, len);
        helix = h;
    }

    char *helix_nr = NULp;
    if (helix_nr_in) {
        size_t len          = strlen(helix_nr_in);
        if (len > Size) len = (int)Size;

        char *h = ARB_alloc<char>(Size+1);
        h[Size] = 0;

        if (len<Size) memset(h+len, '.', (int)(Size-len));
        memcpy(h, helix_nr_in, len);
        helix_nr = h;
    }

    strcpy(ident, "0");
    long pos_scanned_till = -1;

    ARB_calloc(entries, Size);
    sident  = NULp;

    for (pos = 0; pos < Size; pos ++) {
        if (helix_nr) {
            if (long(pos)>pos_scanned_till && isalnum(helix_nr[pos])) {
                for (int j=0; (pos+j)<Size; j++) {
                    char hn = helix_nr[pos+j];
                    if (isalnum(hn)) {
                        ident[j] = hn;
                    }
                    else {
                        ident[j]         = 0;
                        pos_scanned_till = pos+j;
                        break;
                    }
                }
            }
        }
        c = helix[pos];
        if (strchr(LEFT_HELIX, c)) { // push
            laststack = (struct helix_stack *)GBS_read_hash(hash, ident);
            stack = new helix_stack;
            stack->next = laststack;
            stack->pos = pos;
            stack->c = c;
            GBS_write_hash(hash, ident, (long)stack);
        }
        else if (strchr(RIGHT_HELIX, c)) { // pop
            stack = (struct helix_stack *)GBS_read_hash(hash, ident);
            if (!stack) {
                bi_assert(!helix_error); // already have an error
                helix_error = GBS_global_string_copy("Too many '%c' in Helix '%s' pos %zu", c, ident, pos);
                goto helix_end;
            }
            if (strchr(RIGHT_HELIX, c)) {
                entries[pos].is_pairpos = true;
                entries[stack->pos].is_pairpos = true;
            }
            else {
                c = tolower(c);
                if (stack->c != c) {
                    bi_assert(!helix_error); // already have an error
                    helix_error = GBS_global_string_copy("Character '%c' pos %li doesn't match character '%c' pos %zu in Helix '%s'",
                                                         stack->c, stack->pos, c, pos, ident);
                    goto helix_end;
                }

                arb_assert(!isalpha(c)); // shall not occur

                entries[pos].is_pairpos        = false;
                entries[stack->pos].is_pairpos = false;
            }
            entries[pos].pair_pos = stack->pos;
            entries[stack->pos].pair_pos = pos;
            GBS_write_hash(hash, ident, (long)stack->next);

            if (!sident || strcmp(sident+1, ident) != 0) {
                ARB_alloc(sident, strlen(ident)+2);
                sprintf(sident, "-%s", ident);

                entries[stack->pos].allocated = true;
            }
            entries[pos].helix_nr        = sident+1;
            entries[stack->pos].helix_nr = sident;
            bi_assert((long)pos != stack->pos);

            delete stack;
        }
    }

    if (!get_error()) {
        GB_ERROR error = NULp;
        GBS_hash_do_const_loop(hash, BI_helix_check_error, (void*)&error);
        if (error) set_error(error);
    }

  helix_end :
    GBS_hash_do_loop(hash, BI_helix_free_hash, NULp);
    GBS_free_hash(hash);

    free(helix_nr);
    free(helix);

    return get_error();
}

GB_ERROR BI_helix::init(GBDATA *gb_main, const char *alignment_name) {
    GB_transaction ta(gb_main);

    clear_error();

    GBDATA *gb_sai_data = GBT_get_SAI_data(gb_main);
    long    sizei       = GBT_get_alignment_len(gb_main, alignment_name);

    if (sizei<=0) {
        set_error(GB_await_error());
    }
    else {
        char   *helix_name      = GBT_get_default_helix(gb_main);
        char   *helix_nr_name   = GBT_get_default_helix_nr(gb_main);
        GBDATA *gb_helix_nr_con = GBT_find_SAI_rel_SAI_data(gb_sai_data, helix_nr_name);
        GBDATA *gb_helix_con    = GBT_find_SAI_rel_SAI_data(gb_sai_data, helix_name);
        GBDATA *gb_helix        = NULp;
        GBDATA *gb_helix_nr     = NULp;

        if (gb_helix_nr_con)    gb_helix_nr = GBT_find_sequence(gb_helix_nr_con, alignment_name);
        if (gb_helix_con)       gb_helix    = GBT_find_sequence(gb_helix_con, alignment_name);

        if      (!gb_helix)    set_error(GBS_global_string("Can't find SAI:%s", helix_name));
        else if (!gb_helix_nr) set_error(GBS_global_string("Can't find SAI:%s", helix_nr_name));
        else {
            initFromData(GB_read_char_pntr(gb_helix_nr), GB_read_char_pntr(gb_helix), sizei); // does set 'helix_error' // @@@ NOT_ALL_SAI_HAVE_DATA
        }

        free(helix_name);
        free(helix_nr_name);
    }

    return get_error();
}

GB_ERROR BI_helix::init(GBDATA *gb_main) {
    GB_transaction ta(gb_main);

    char *alignment_name = GBT_get_default_alignment(gb_main);
    if (!alignment_name) {
        set_error(GB_await_error());
    }
    else {
        init(gb_main, alignment_name); // does set 'helix_error'
        free(alignment_name);
    }

    return get_error();
}

long BI_helix::first_pair_position() const {
    return entries[0].is_pairpos ? 0 : next_pair_position(0);
}

long BI_helix::next_pair_position(size_t pos) const {
    if (entries[pos].next_pair_pos == 0) {
        size_t p;
        long   next_pos = -1;
        for (p = pos+1; p<Size && next_pos == -1; ++p) {
            if (entries[p].is_pairpos) {
                next_pos = p;
            }
            else if (entries[p].next_pair_pos != 0) {
                next_pos = entries[p].next_pair_pos;
            }
        }

        size_t q = p<Size ? p-2 : Size-1;

        for (p = pos; p <= q; ++p) {
            bi_assert(entries[p].next_pair_pos == 0);
            entries[p].next_pair_pos = next_pos;
        }
    }
    return entries[pos].next_pair_pos;
}

long BI_helix::first_position(const char *helix_Nr) const {
    long pos;
    for (pos = first_pair_position(); pos != -1; pos = next_pair_position(pos)) {
        if (strcmp(helix_Nr, entries[pos].helix_nr) == 0) break;
    }
    return pos;
}

long BI_helix::last_position(const char *helix_Nr) const {
    long pos = first_position(helix_Nr);
    if (pos != -1) {
        long next_pos = next_pair_position(pos);
        while (next_pos != -1 && strcmp(helix_Nr, entries[next_pos].helix_nr) == 0) {
            pos      = next_pos;
            next_pos = next_pair_position(next_pos);
        }
    }
    return pos;
}

