// C++ code for finding an upper bound on the number of recombination events
// UpperBound
//
// Author: Yun S. Song
// October 2004
/*
SYNOPSIS: shrub [-d #] [-l #] [-a [#]] [-r #] [-g name] data-filename

OPTIONS: (See README-SHRUB for a more detailed description of the program.)
  -d num    Display setting.
            0: Only display final results (default).
            1: Display the original and the reduced data.
            2: Display as in 1 + display local bound matrices for global
               lower bounds.

  -l num    Fast lower bound method to be used in branch and bound.
            1: Hudson and Kaplan's bound (default).
            2: Approximate haplotype bound using distinct columns and rows.

  -a [num]  Obtain only a fast upper bound.
            num = the maximum number of row removals to be tried for every
            matrix in step 3' of the algorithm.  If a number is not specified,
            then all rows satisfying the condition in step 3' will be tried.

  -r num    Number of '-a 1' runs to be executed. (default is 1)

  -g name   Output an ARG with the computed number of recombination
            events into a file.  The output is in GML format.

*/

#include <iostream>
#include <math.h>
#include <vector>
#include <fstream>
#include <time.h>
#include <map>
#include <stdio.h>

#include "basicdefs.h"
#include "myMatrix.h"
#include "readData.h"
#include "fullupperbound.h"
#include "fastlowerbound.h"
#include "fastupperbound.h"
#include "generateARG.h"

using namespace std;

using std::ifstream;

// global variables used in option
short displayData=0;
short flbMethod=1;
short displayARG=0;
bool  approximate=false;
unsigned short MAX_U_SHORT=65535;
unsigned short maxRows2Look=MAX_U_SHORT;
long randomseed=-12345432;
unsigned short reducedMatRowSize;
unsigned int numberOfRuns=1;
unsigned int currentRun;
typedef std::map<vector<bool>, unsigned short> mapVec2Short;

std::map<unsigned short, unsigned short> ScoreMap;
mapVec2Short LBMap, PathScoreMap;

// These are used in Branch and Bound
// LBMap contains forward lower bounds 
// PathScoreMap contains sum of backward rec weights 

// vector<unsigned long> powerTable;  // stores powers of 2

// used for debugging
// unsigned int numberOfRuns=0;

int main(int argc, char *argv[]){
 
  char *datafname;
  char *graphfile;
  getoptions(argc,argv,&graphfile, &datafname);

  float HKtime=0.0;  
  float Cn2time=0.0;
  float uppertime=0.0;
  time_t start_time, finish_time;

  unsigned int upperBound=10000;
  unsigned int currentTotal=0;
  vector<unsigned short> curPath, optimalPath;

  BinMatrix inputData;
  USMatrix localBounds;
  //  BinMatrix subData;
  vector<unsigned short> initialRowMap, reducedRowMap;

  inputData=readData(datafname);  // Read Sequence Data


  printf("Input data has %u rows and %u columns\n", inputData.rowsize(),inputData.colsize());


  if(inputData.rowsize()==0){
    printf("\n*** Final Upper Bound is %u\n",0);
    exit (1);
  }

  if(displayData > 0){inputData.display(); newline();}

  BinMatrix copyInputData;
  copyInputData = inputData;
  vector<unsigned int> removedSeq;


  initialRowMap=table( 0,(unsigned short)( inputData.rowsize()-1));

  inputData=reduceData(inputData,initialRowMap);
  //  printf("Reduced data has %u rows and %u columns\n", inputData.rowsize(),inputData.colsize());  if(displayData > 0){inputData.display(); newline();}


  // define row numbers for the reduced Data
  reducedRowMap=table( 0,(unsigned short)( inputData.rowsize()-1));
  reducedMatRowSize=inputData.rowsize();
  //  powerTable=powerOf2(inputData.rowsize());

  /*  comment out for analyzing simulated data sets */

  //----------------------  HK Bound  ------------------------------
  time(&start_time);  

  // store incompatibility information into localBounds
  localBounds=buildIncompMatrix(inputData);

  if(displayData > 1){
    printf("Incompatibility Matrix\n");
    localBounds.display();
  }
  printf("\n*** HK lower bound is %u\n", globalBound(localBounds));   
  time(&finish_time);
  HKtime=finish_time-start_time;
  //----------------------------------------------------------------


  //----------------------  C(n,2) Bound   -------------------------
  time(&start_time); 
  dRdC_updateIncMatrix(localBounds,inputData);
  if(displayData > 1){
    printf("\nUpper diagonal: Local bounds.  Lower diagonal: Incompatibility.\n");
    localBounds.display();
  }
  printf("\n*** Approximate haplotype lower bound is %u\n", globalBound(localBounds)); 
  time(&finish_time);
  Cn2time=finish_time-start_time;
  localBounds.clear();
  //----------------------------------------------------------------
  


  //---------------------- Upper Bound -----------------------------
  newline();
  time(&start_time);
  if(inputData.rowsize()>3){
    //compute a fast upper bound
    //  cerr << "max rows to look = " << maxRows2Look << endl;
    ReadSeed();
    WriteSeed();

    if(maxRows2Look==1){
      if(numberOfRuns > 1) printf("Performing %u runs.\n", numberOfRuns);
      else printf("Performing %u run.\n", numberOfRuns);
      cout << "The current best fast upper bound is\n " ;
      for(currentRun=0; currentRun < numberOfRuns; currentRun++){
	fastupperbound(inputData,currentTotal,&upperBound,curPath,optimalPath,reducedRowMap);
	PathScoreMap.clear();  // need to clear this for multiple runs
      }
      printf("\n*** Fast upper bound is %u\n\n", upperBound);
    }
    else{
      if(numberOfRuns > 1) printf("'-r num' option specification will be ignored.\n");
      printf("Performing 1 run.\n");
      //    cout << "Tracking fast upper bound computation:\n " ;
      cout << "The current best fast upper bound is\n " ;
      fastupperbound(inputData,currentTotal,&upperBound,curPath,optimalPath,reducedRowMap);
      printf("\n*** Fast upper bound is %u\n\n", upperBound);
    }
    
    if(approximate==false){
      //    cout << "Tracking full branch and bound computation:\n " ;
      PathScoreMap.clear();
      cout << "The current best upper bound is\n" ;
      cerr << upperBound << " ";
      upperbound(inputData,currentTotal,&upperBound,curPath,optimalPath,reducedRowMap);
      //    newline();
      printf("\n*** The full upper bound is %u\n",upperBound);
    }
  }
  else{
    printf("*** The full upper bound is %u\n",0);
  }
  time(&finish_time);
  uppertime=finish_time-start_time;
  //---------------------------------------------------------------

  printf("\nRunning time of HK lower bound computation                    : %.2f\n", HKtime );
  printf("Running time of approximate haplotype lower bound computation : %.2f\n", Cn2time );
  printf("Running time of upper bound computation                       : %.2f\n", uppertime );
 
  //  printf("Total number of runs is : %u\n",numberOfRuns);

  if(displayARG > 0){  
    cout << "\nAn optimal sequence of row removals in the reduced data: ";
    printVecPlusOne(optimalPath);
    //    printVec(optimalPath);
    cout << "In the original data, this corresponds to:";
    for(unsigned short iN=0; iN < optimalPath.size(); iN++){
      cout << " " << initialRowMap[optimalPath[iN]]+1;
    };
    cout << endl;
  }


  //  need to implement relabelling due to the first coalescence
  if(displayARG > 0 ){
    generateARG(copyInputData,optimalPath,graphfile);
  }

  //  powerTable.clear();
  inputData.clear();
  return 0;

}

