// ============================================================= //
//                                                               //
//   File      : MP_sonde.cxx                                    //
//   Purpose   :                                                 //
//                                                               //
//   Institute of Microbiology (Technical University Munich)     //
//   http://www.arb-home.de/                                     //
//                                                               //
// ============================================================= //

#include "MP_externs.hxx"
#include "MultiProbe.hxx"

#include <aw_msg.hxx>
#include <arbdbt.h>
#include <client.h>

#include <cmath>
#include <servercntrl.h>

Sonde::Sonde(const char* bezeichner, int num_probes, int allowed_mis, double outside_mis) {
    kennung         = ARB_strdup(bezeichner);
    bitkennung      = NULp;
    // fuer Basissonden haben die Bitvektoren noch nicht die Volle laenge, da noch nicht bekannt ist, wieviele Sonden eingetragen werden
    hitliste        = NULp;
    length_hitliste = 0;
    minelem         = 0;
    maxelem         = 0;

    mp_assert(num_probes>0);

    Allowed_Mismatch = new long[num_probes];
    Outside_Mismatch = new double[num_probes];
    for (int i=0; i<num_probes; i++) { // LOOP_VECTORIZED=2[!>=910<11] // completely fails with 9.x and 10.x series
        Allowed_Mismatch[i]=0;
        Outside_Mismatch[i]=0;
    }

    Allowed_Mismatch[0] = allowed_mis;
    Outside_Mismatch[0] = outside_mis;


}

Sonde::~Sonde() {
    int i;

    free(kennung);

    for (i=0; i<length_hitliste;  i++) {
        delete hitliste[i];             // Hits loeschen
    }
    delete [] hitliste;

    delete [] Allowed_Mismatch;
    delete [] Outside_Mismatch;
    delete bitkennung;
}

void Sonde::print() {
    printf("\nSonde %s\n------------------------------------------------\n", kennung);
    bitkennung->print();
    printf("Laenge hitliste %ld mit minelem %ld und maxelem %ld\n", length_hitliste, minelem, maxelem);
    printf("Far %ld, Mor %ld, AllMM %ld, OutMM %f\n\n", kombi_far, kombi_mor, *Allowed_Mismatch, *Outside_Mismatch);
}


MO_Mismatch** Sonde::get_matching_species(int ptserver_id, bool match_also_revcompl, int match_weight, int match_mis, const char *match_seq, MO_Liste *convert, long *number_of_species, GB_ERROR& error) {
    MO_Mismatch **ret_list   = NULp;
    const char   *servername = arb_look_and_start_ptserver(AISC_MAGIC_NUMBER, ptserver_id, error);

    error = NULp;

    if (servername) {
        char           *match_name, *match_mismatches, *match_wmismatches;
        T_PT_MATCHLIST  match_list;
        long            match_list_cnt = -1;
        bytestring      bs;
        int             i              = 0;

        // @@@ maybe DRY section below with similar section (in this directory)
        mp_gl_struct mp_pd_gl;
        mp_pd_gl.link = aisc_open(servername, mp_pd_gl.com, AISC_MAGIC_NUMBER, &error);
        mp_pd_gl.locs.clear();

        if (!error && !mp_pd_gl.link) {
            error = "Cannot contact Probe bank server";
        }

        if (!error && MP_init_local_com_struct(mp_pd_gl) != 0) {
            error = "Cannot contact Probe bank server (2)";
        }

        if (!error &&
            aisc_put(mp_pd_gl.link, PT_LOCS, mp_pd_gl.locs,
                     LOCS_MATCH_ALSO_REVCOMP,   (long)match_also_revcompl, // also match reverse-complement?
                     LOCS_COMPLEMENT_FIRST,     (long)0,                   // (use sequence passed below as is. do not complement it.)
                     LOCS_MATCH_SORT_BY,        (long)match_weight,        // Weighted
                     LOCS_MATCH_MAX_MISMATCHES, (long)match_mis,           // Mismatches
                     LOCS_SEARCHMATCH,          match_seq,                 // Sequence
                     NULp))
        {
            error = "Connection to PT_SERVER lost (4)";
        }

        bs.data = NULp;
        if (!error) {
            char *locs_error = NULp;

            aisc_get(mp_pd_gl.link, PT_LOCS, mp_pd_gl.locs,
                     LOCS_MATCH_LIST,      match_list.as_result_param(),
                     LOCS_MATCH_LIST_CNT,  &match_list_cnt,
                     LOCS_MP_MATCH_STRING, &bs, // @@@ want unittest for this function
                     LOCS_ERROR,           &locs_error,
                     NULp);

            if (locs_error[0]) {
                error = GBS_static_string(locs_error);
            }
            free(locs_error);
        }

        if (bs.data) {
            char toksep[2];
            toksep[0] = 1;
            toksep[1] = 0;

            ret_list = new MO_Mismatch*[match_list_cnt];

            match_name        = strtok(bs.data, toksep);
            match_mismatches  = strtok(NULp, toksep);
            match_wmismatches = strtok(NULp, toksep);

            mp_assert(convert->get_mo_liste() != NULp); // failed to populate MO_Liste

            while (match_name && match_mismatches && match_wmismatches) {
                ret_list[i] = new MO_Mismatch;
                ret_list[i]->nummer = convert->get_index_by_entry(match_name);
                if (match_weight == NON_WEIGHTED)
                    ret_list[i]->mismatch = atof(match_mismatches);
                else                            // WEIGHTED und WEIGHTED_PLUS_POS
                    ret_list[i]->mismatch = atof(match_wmismatches);


                match_name        = strtok(NULp, toksep);
                match_mismatches  = strtok(NULp, toksep);
                match_wmismatches = strtok(NULp, toksep);

                i++;
            }
        }
        else {
            error = "No matching species found.";
        }

        *number_of_species = match_list_cnt;

        aisc_close(mp_pd_gl.link, mp_pd_gl.com);
        free(bs.data);
    }

    return ret_list;
}


double Sonde::check_for_min(long k, MO_Mismatch** probebacts, long laenge) {
    long    i = k+1;
    double  min;

    min = probebacts[k]->mismatch;                  // min ist gleich mismatch des ersten MOs
    while ((i<laenge) && (probebacts[k]->nummer == probebacts[i]->nummer)) {
        if (min > probebacts[i]->mismatch) {
            // wenn min groesser ist als mismatch des naechsten MOs -> setze min auf groesse des naechsten
            min = probebacts[i]->mismatch;
        }
        i++; // checke naechsten MO
    }
    return min;
}



int Sonde::gen_Hitliste(MO_Liste *Bakterienliste) {
    // Angewandt auf eine frische Sonde generiert diese Methode die Hitliste durch eine
    // Anfrage an die Datenbank, wobei der Name der Sonde uebergeben wird

    MO_Mismatch** probebacts;
    long          i, k;         // Zaehlervariable
    long          laenge           = 0;
    double        mm_to_search     = 0;
    int           mm_int_to_search = 0;


    // DATENBANKAUFRUF
    mm_to_search = mp_gl_awars.greyzone + mp_gl_awars.outside_mismatches_difference + get_Allowed_Mismatch_no(0);
    if (mm_to_search > (int) mm_to_search)
        mm_int_to_search = (int) mm_to_search + 1;
    else
        mm_int_to_search = (int) mm_to_search;

    GB_ERROR error;
    probebacts = get_matching_species(mp_gl_awars.ptserver,
                                      mp_gl_awars.complement,
                                      mp_gl_awars.weightedmismatches,
                                      mm_int_to_search,
                                      kennung,
                                      Bakterienliste,
                                      &laenge,
                                      error);

    // ACHTUNG probebacts mit laenge enthaelt nur laenge-1 Eintraege von 0 bis laenge -2
    if (!laenge || !probebacts) {
        mp_assert(error);
        aw_message(error);
        if (!laenge) aw_message("This probe matches no species!");
        if (!probebacts) {
            aw_message("This probe matches no species!");
            return 11;
        }
        return 1;
    }
    else {
        mp_assert(!error);
    }

    // Ptrliste ist Nullterminiert
    // Sortieren des Baktnummernfeldes:

    heapsort(laenge, probebacts);

    double min_mm;          // Minimaler Mismatch
    // laenge ist die Anzahl der Eintraege in probebact
    // Korrekturschleife, um Mehrfachtreffer auf das gleiche Bakterium abzufangen

    for (k=0;  k < laenge-1;  k++) {
        if (probebacts[k]->nummer == probebacts[k+1]->nummer) {
            min_mm = check_for_min(k, probebacts, laenge);
            probebacts[k]->mismatch = min_mm;
            while ((k<laenge-1) && (probebacts[k]->nummer == probebacts[k+1]->nummer)) {
                probebacts[k+1]->mismatch = min_mm;
                k++;
            }
        }
    }

    // Das hier funktioniert, da Liste sortiert ist
    minelem = probebacts[0]->nummer;
    maxelem = probebacts[laenge-1]->nummer;

    // Probebacts besteht aus eintraegen der Art (Nummer, Mismatch)
    hitliste = new Hit*[laenge+1];
    for (i=0; i<laenge+1; i++)
        hitliste[i]=NULp;

    for (i=0; i<laenge; i++) {
        hitliste[i] = new Hit(probebacts[i]->nummer);
        hitliste[i]->set_mismatch_at_pos(0, probebacts[i]->mismatch);
    }
    length_hitliste = laenge;

    // Loesche hitflags wieder
    long bl_index = 0;
    Bakt_Info** baktliste = Bakterienliste->get_mo_liste();
    Bakt_Info** bl_elem = baktliste+1;
    while (bl_elem[bl_index]) {
        bl_elem[bl_index]->kill_flag();
        bl_index++;
    }
    // Loeschen der Temps
    for (i=0; i<laenge; i++) {
        delete probebacts[i];
    }
    delete [] probebacts;
    return 0;
}



Hit* Sonde::get_hitdata_by_number(long index) {
    // Gibt Zeiger auf ein Hit Element zurueck, welches an Stelle index steht, vorerst nur zur Ausgabe gedacht
    if (hitliste && (index < length_hitliste))
        return hitliste[index];
    else
        return NULp;
}




void Sonde::heapsort(long feldlaenge, MO_Mismatch** Nr_Mm_Feld) {
    // Heapsortfunktion, benutzt sink(), sortiert Feld von longs
    long        m=0, i=0;
    MO_Mismatch*    tmpmm;

    for (i=(feldlaenge-1)/2; i>-1; i--) {
        sink(i, feldlaenge-1, Nr_Mm_Feld);
    }
    for (m=feldlaenge-1; m>0; m--) {
        tmpmm =  Nr_Mm_Feld[0];
        Nr_Mm_Feld[0] =  Nr_Mm_Feld[m];
        Nr_Mm_Feld[m] = tmpmm;

        sink(0, m-1, Nr_Mm_Feld);
    }
}

void Sonde::sink(long i, long t, MO_Mismatch** A) {
    // Algorithmus fuer den Heapsort
    long        j, k;
    MO_Mismatch*    tmpmm;

    j = 2*i;
    k = j+1;
    if (j <= t) {
        if           (A[i]->nummer >= A[j]->nummer) j = i;
        if (k <= t && A[k]->nummer >  A[j]->nummer) j = k;

        if (i != j) {
            tmpmm = A[i]; A[i] = A[j]; A[j] = tmpmm;
            sink(j, t, A);
        }
    }
}

void Sonde::set_bitkennung(Bitvector* bv) {
    bitkennung = bv;
}



// ########################################################################################################
/* Bakt_Info haengt in der MO_Liste drinnen. Hier werden u.a. die Hitflags gespeichert
 */
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Methoden Bakt_Info~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Bakt_Info::Bakt_Info(const char* n) {
    name = ARB_strdup(n);                       // MEL  (match_name in mo_liste)
    hit_flag = 0;
}

Bakt_Info::~Bakt_Info() {
    free(name);
    hit_flag = 0;
}


// ##########################################################################################################
// Hit speichert die  Trefferinformation
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Methoden Hit

Hit::Hit(long baktnummer) {
    // Mismatch Array mit Laenge = anzahl Sonden in Experiment
    int i=0;
    mismatch = new double[mp_gl_awars.no_of_probes+1];
    for (i=0; i<mp_gl_awars.no_of_probes+1; i++) // LOOP_VECTORIZED=2
        mismatch[i]=101;

    baktid = baktnummer;
}

Hit::~Hit() {
    delete [] mismatch;
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

// --------------------------------------------------------------------------------

#ifdef UNIT_TESTS
#ifndef TEST_UNIT_H
#include <test_unit.h>
#endif

#include <arb_strbuf.h>

static char *create_list(const MO_Liste& list) {
    GBS_strstruct buf(1000);
    int           count = list.get_laenge();
    for (int c = 0; c<=count+1; c++) {
        if (c) buf.put(',');
        const char *name = list.get_entry_by_index(c);
        buf.cat(name ? name : "(null)");
    }
    return buf.release();
}

static char *create_list(MO_Mismatch **list, int count, const MO_Liste& spec) {
    GBS_strstruct buf(1000);
    for (int c = 0; c<count; c++) {
        if (c) buf.put(',');
        const MO_Mismatch* mm = list[c];
        const char *name = spec.get_entry_by_index(mm->nummer);
        buf.nprintf(30, "%s/%3.1f", name, mm->mismatch);
    }
    return buf.release();
}

void TEST_get_matching_species() {
    // tests ptserver functions 'MP_MATCH_STRING' + 'MP_ALL_SPECIES_STRING' (against regression)

    // test here runs versus database ../UNIT_TESTER/run/TEST_pt_src.arb
    TEST_SETUP_GLOBAL_ENVIRONMENT("ptserver");

    GB_shell  shell;
    GBDATA   *gb_main = GB_open("TEST_pt_src.arb", "rw");
    TEST_REJECT_NULL(gb_main);

    Sonde s("some-probe", 5, 3, 20);

    GB_ERROR error;

    MO_Liste::set_gb_main(gb_main);
    MO_Liste *Bakterienliste;
    Bakterienliste = new MO_Liste;

    for (int pass = 0; pass<=1; ++pass) {
        error = Bakterienliste->get_all_species(TEST_SERVER_ID);
        TEST_EXPECT_NO_ERROR(error);

        // test content of 'Bakterienliste':
        TEST_EXPECT_EQUAL(Bakterienliste->get_laenge(), 22);
        TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(create_list(*Bakterienliste),
                                                      "(null),BcSSSS00,Bl0LLL00,ClnCorin,CltBotul,CPPParap,ClfPerfr,DlcTolu2,PbcAcet2,PbrPropi,Stsssola,DsssDesu,LgtLytic,DcdNodos,FrhhPhil,PsAAAA00,PslFlave,HllHalod,VbrFurni,VblVulni,VbhChole,AclPleur,PtVVVulg,(null)");

        if (pass == 0) { delete Bakterienliste; Bakterienliste = new MO_Liste; }
    }

    for (int pass = 0; pass<=1; ++pass) {
        long   laenge           = 0;
        double mm_to_search     = 0.0 + 1.0 + 0;
        int    mm_int_to_search = int(mm_to_search-0.000001)+1;

        MO_Mismatch** probebacts = s.get_matching_species(TEST_SERVER_ID,
                                                          1,           // mp_gl_awars.complement,
                                                          2,           // mp_gl_awars.weightedmismatches,
                                                          mm_int_to_search,
                                                          "atgatgatg", // kennung,
                                                          Bakterienliste,
                                                          &laenge,
                                                          error);

        TEST_EXPECT_NO_ERROR(error);

        // test content of probebacts:
        TEST_EXPECT_EQUAL(laenge, 11);
        TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(create_list(probebacts, laenge, *Bakterienliste),
                                                      "BcSSSS00/0.2,ClfPerfr/1.0,LgtLytic/1.0,FrhhPhil/1.0,ClfPerfr/1.1,VbrFurni/1.1,VblVulni/1.1,Bl0LLL00/1.1,AclPleur/1.2,VbrFurni/1.5,VblVulni/1.5");

        // cleanup
        for (int i=0; i<laenge; i++) {
            delete probebacts[i];
        }
        delete [] probebacts;
    }

    delete Bakterienliste;

    GB_close(gb_main);
}

#endif // UNIT_TESTS

// --------------------------------------------------------------------------------

