/* created on 26 Oct, 2004 by Ying Wang
 * Using adjacent matrix to represent graph, to make the program faster and more efficient*/

#include "Main.h"

int main(int argc, char* argv[])
{
   if(argc!=2)
   { cerr<<"wrong number of arguments"<<endl; exit(-1); }

   time_t seed = time(NULL);
   srand((unsigned)seed);
   long* seedptr=new long;
   if(seedptr==NULL)
   { cout<<"Out of memory."<<endl;  exit(-1); }

   clock_t start,end;
   start = clock();

   //Load parameters
   int datatype;
   int numDis, numNormal;  //sample size
   long N0; double grate; //current population size, population growth rate
   double sco; int mutage; 
   double lowbound, highbound;
   double intervsize; //in Mb
   int markertype = SNP;
   double mu, mutloc, strpden;
   int homorec; int useIceland=0;
   double cutofflevel;
   int nmarker_limit;
   int simuRep;
   double f_DD, f_DN, f_NN;
   int whichchrom; long leftend, rightend;
   double init_gbm, drift_gbm, var_gbm;
   int printRecs;
   int computeLDs;
     
 
   //Input parameters from file
   ifstream paramf;
   paramf.open(argv[1], ios::in);
   if(!paramf)
   { cerr<<"The parameters file can not be opened!"<<endl; exit(-1); }
   paramf.seekg(0L, ios::beg);

   char dtype;  paramf>>dtype;  //data type
   if(dtype == 'h') datatype = HAPLOTYPE; else datatype = GENOTYPE;
   if(datatype==GENOTYPE)
   {
      //Penetrance parameters
      paramf>>f_DD;  paramf>>f_DN; paramf>>f_NN;
    }
    paramf>>numDis;   
    paramf>>numNormal; 
    paramf>>N0; 
    paramf>>grate; 
    paramf>>mutage;  
    paramf>>sco;  
    paramf>>lowbound; paramf>>highbound;	
    paramf>>homorec;
    if(!homorec)
    {
         paramf>>useIceland;
         if(useIceland) 
         {
            paramf>>whichchrom;
            paramf>>leftend;
            paramf>>rightend;
            intervsize =double( rightend-leftend)/1000000; //in Mb
         }
	 else
         {
            paramf>>init_gbm;
            paramf>>drift_gbm; paramf>>var_gbm;
         }
    }
   if(!useIceland) {paramf>>intervsize; }
   paramf>>mutloc; 
   char mtype; paramf>>mtype;  
   if(mtype == 's') markertype = SNP; else markertype = STRP;
   paramf>>mu; 
   if(markertype == STRP){paramf>>strpden;}
   paramf>>cutofflevel; 
   paramf>>nmarker_limit; 
   paramf>>simuRep;  
   char outputfile[20];
   paramf>>outputfile;  
   paramf>>printRecs;   
   paramf>>computeLDs; 
   char ldsfile[20];
   if(computeLDs==1)  paramf>>ldsfile; 
   paramf.close();
  
    ofstream ofs;
    strcat(outputfile, ".xml");
    ofs.open(outputfile);
    if (!ofs){
        cerr<<outputfile<<" can't be opened."<<endl;
        exit(-1);
    }
    ofs<<"<?xml version=\"1.0\"?>"<<endl;
    ofs<<"<DATA>"<<endl;
 
    ofstream oflds; 
    if(computeLDs==1)
    {
	strcat(ldsfile, ".xml");
        oflds.open(ldsfile);
         if (!oflds){
           cerr<<ldsfile<<" can't be opened for writing."<<endl;
           exit(-1);
         }
	 oflds<<"<?xml version=\"1.0\"?>"<<endl;
         oflds<<"<DATA>"<<endl;
    }	 

    //output parameters to file
    ofs<<"<Parameters>"<<endl;
    ofs<<"<datatype>"<<datatype<<"</datatype>"<<endl;
    if(datatype == GENOTYPE)
    {  ofs<<"<f_DD>"<<f_DD<<"</f_DD>"<<endl; ofs<<"<f_DN>"<<f_DN<<"</f_DN>"<<endl;
       ofs<<"<f_NN>"<<f_NN<<"</f_NN>"<<endl;}
    ofs<<"<num_disease>"<<numDis<<"</num_disease>"<<endl;
    ofs<<"<num_normal>"<<numNormal<<"</num_normal>"<<endl;
    ofs<<"<population_size>"<<N0<<"</population_size>"<<endl;
    ofs<<"<growth_rate>"<<grate<<"</growth_rate>"<<endl;
    ofs<<"<disease_age>"<<mutage<<"</disease_age>"<<endl;
    ofs<<"<selection_coeff>"<<sco<<"</selection_coeff>"<<endl;
    ofs<<"<lowbound_freq>"<<lowbound<<"</lowbound_freq>"<<endl;
    ofs<<"<highbound_freq>"<<highbound<<"</highbound_freq>"<<endl;
    ofs<<"<interval_size>"<<intervsize<<"</interval_size>"<<endl;
    ofs<<"<mutation_location>"<<mutloc<<"</mutation_location>"<<endl;
    ofs<<"<homorecmodel>"<<homorec<<"</homorecmodel>"<<endl;
    if(!homorec)
    {
        if(useIceland){
           ofs<<"<chromosome>"<<whichchrom<<"</chromosome>"<<endl;
           ofs<<"<leftend_region>"<<leftend<<"</leftend_region>"<<endl;
           ofs<<"<rightend_region>"<<rightend<<"</rightend_region>"<<endl;
        }
        else{
           ofs<<"<initial_rate>"<<init_gbm<<"</initial_rate>"<<endl;
           ofs<<"<drift_gbm>"<<drift_gbm<<"</drift_gbm>"<<endl; ofs<<"<var_gbm>"<<var_gbm<<"</var_gbm>"<<endl;
        }
    }
    ofs<<"<marker_type>"<<markertype<<"</marker_type>"<<endl;
    ofs<<"<mutation_rate>"<<mu<<"</mutation_rate>"<<endl;
    if(markertype== STRP) ofs<<"<strp_density>"<<strpden<<"</strp_density>"<<endl;
    ofs<<"<cutoff_level>"<<cutofflevel<<"</cutoff_level>"<<endl;
    ofs<<"<limit_minimumMarkers>"<<nmarker_limit<<"</limit_minimumMarkers>"<<endl;
    ofs<<"<num_replicates>"<<simuRep<<"</num_replicates>"<<endl;
    ofs<<"</Parameters>"<<endl;


   //checking average 
   double aveTMRCA=0;  double aveDisTMRCA=0; double totTime=0; int totMuts=0; int totMarkers=0;
   int nrec=0; int nrecDis=0; double aveDisFreq=0;
   int miniNumMarkers=-1;  //the minimum # markers among all simulated replicates
   int count=1;  int numRejects =0;
   while(count<=simuRep)
   {
	*seedptr = -1*rand();
        Graph G(datatype, numDis, numNormal, N0, grate, sco, mutage, intervsize, markertype, mu, strpden, mutloc, homorec, useIceland, cutofflevel, nmarker_limit);

	if(datatype==GENOTYPE) G.setPenetrParams(f_DD, f_DN, f_NN);
	if(!homorec)
	{
	   if(useIceland) G.setIcelandParams(whichchrom, leftend, rightend);
	   else   G.setGBMParams(init_gbm, drift_gbm, var_gbm); 
	}
	bool want = G.simuGenealogy(lowbound, highbound, seedptr);
   	if(!want)
	{ 
	   numRejects++;
	   if(numRejects>=100 && (double)numRejects/(double)(numRejects+count)>0.95)   
	   {
	     cout<<"\n\n<font color=\"red\">The proportion of rejected simulations is "<<(double)numRejects/(double)(numRejects+count);
	     cout<<" (greater than 95%). "<<endl;	
	     cout<<"<font color=\"red\">\nThe program was terminated.</font>"<<endl;
	     exit(-1);	
	   }
	   continue;
	}

	aveTMRCA += G.getTMRCA();
	aveDisTMRCA += G.getTMRCAdis();
	totTime += G.getTotalTime();
	nrec += G.getnumRecs();
	nrecDis += G.getnumRecsDis();
	totMuts += G.getnumMutations();
	totMarkers += G.getnumMarkers();
	aveDisFreq += G.currentDisFreq();

	int** realseqmatr = G.getRealSampleSeqMatr();
        vector<double> realmapdis = G.getRealMarkerPosit();
 	int nmarkers = realmapdis.size();	
	if(miniNumMarkers==-1){ miniNumMarkers = nmarkers;}
	else if(miniNumMarkers > nmarkers) { miniNumMarkers = nmarkers;}

	//compute LDs between the mutation and markers
	if(computeLDs==1 && G.getnumDisChr()!=0 )
	{
	   double* D_prime = new double[nmarkers];
	   G.linkagedis(realseqmatr, nmarkers, G.getnumNormChr(), G.getnumDisChr(), D_prime);	
	   oflds<<"<No"<<count<<">"<<endl;
	   for(int i=0; i<nmarkers; i++) oflds<<D_prime[i]<<"\t"; 
	   oflds<<"</No"<<count<<">"<<endl;
	   delete [] D_prime;
	}

	 //output to screen
	 cout<<"\n\nNo. "<<count<<endl;
	 if(datatype == GENOTYPE) 
	 {
	 	cout<<"The actual number of disease and normal chromosomes in the sample:\t";
		cout<<G.getnumDisChr()<<", "<<G.getnumNormChr()<<endl;
	 }
         cout<<"The time to the MRCA of the sample:\t"<<G.getTMRCA()<<endl;
         cout<<"The time to the MRCA of the sample of disease chromosomes:\t"<<G.getTMRCAdis()<<endl;
         cout<<"Number of recombinations occurred in the history of the sample:\t"<<G.getnumRecs()<<endl;
         cout<<"Number of recombinations occurred in the history of the disease sample:\t"<<G.getnumRecsDis()<<endl;
         cout<<"Number of mutations occurred in the history of the sample:\t"<<G.getnumMutations()<<endl;
         cout<<"Number of markers satisfied the polymorphism cutoff level:\t "<<nmarkers<<endl;
         cout<<"Current frequency of the disease allele:\t"<<G.currentDisFreq()<<endl;
	 //Output recombination rate
	if(printRecs==1)
	{
	   if(homorec==0)
 	   {
	      double* recarray = G.getnonhomoRecsArray();
	      int arraysize = G.getnumDiffRecRates();
	      if(useIceland==1)
	      {
		int* disarray = G.getvaryRec_position();
		cout<<"Icelandic recombination data: "<<endl;
		cout<<leftend<<" ~ "<<disarray[0]+leftend<<"\t"<<recarray[0]<<endl;
		for(int k=1; k<arraysize; k++)
		  cout<<disarray[k-1]+leftend<<" ~ "<<disarray[k]+leftend<<"\t"<<recarray[k]<<endl;
	      }
	      else 
	      {
		cout<<"Recombination rates along the interval (every kb) simulated by GBM"<<endl;
		for(int k=0; k<arraysize-1; k++) 
		  cout<<(double)k<<" ~ "<<(double)(k+1)<<"\t"<<recarray[k]<<endl;
	      }
	   }
	}

	//--------------------------------------------------------
        //---------------------Start to output--------------------
        ofs<<"<Num_"<<count<<">"<<endl;
        ofs<<"<num_loci>"<<nmarkers<<"</num_loci>"<<endl; //# markers
        ofs<<"<interval_size>"<<realmapdis[nmarkers-1]<<"</interval_size>"<<endl;
        if(datatype == HAPLOTYPE)
        {
            ofs<<"<haplotypes_normal>"<<endl;
            for(int x=0; x<numNormal; x++)
            {
               ofs<<"<n"<<x<<">";  for(int y=0; y<nmarkers; y++) {  ofs<<realseqmatr[x][y]<<" ";}
               ofs<<"</n"<<x<<">"<<endl;
            }
            ofs<<"</haplotypes_normal>"<<endl;
            ofs<<"<haplotypes_disease>"<<endl;
            int m=0;
            for(int x=numNormal; x<numNormal+numDis; x++)
           {
               ofs<<"<n"<<m<<">";
               for(int y=0; y<nmarkers; y++)  {ofs<<realseqmatr[x][y]<<" ";}
               ofs<<"</n"<<m<<">"<<endl;
               m++;
           }
           ofs<<"</haplotypes_disease>"<<endl;
        }
        else
        {
           int* genotypeDataNum=new int[(numDis+numNormal)*2];
           G.getGenotypeSample(genotypeDataNum, seedptr);

           //DD_normal, DN_normal, NN_normal
           ofs<<"<genotypes_normal>"<<endl;
           int m=0;
           for(int x=0; x<numNormal*2; )
           {
              ofs<<"<n"<<m<<"a>";
              for(int y=0; y<nmarkers; y++) {  ofs<<realseqmatr[genotypeDataNum[x]][y]<<" ";}
              ofs<<"</n"<<m<<"a>"<<endl;  x++;
              ofs<<"<n"<<m<<"b>";
              for(int y=0; y<nmarkers; y++) {  ofs<<realseqmatr[genotypeDataNum[x]][y]<<" ";}
              ofs<<"</n"<<m<<"b>"<<endl;  x++;
              m++;
           }
           ofs<<"</genotypes_normal>"<<endl;
           //DD_disease, DN_disease, NN_disease
	   ofs<<"<genotypes_disease>"<<endl;
           m=0;
           for(int x=numNormal*2; x<(numNormal+numDis)*2;)
          {
             ofs<<"<n"<<m<<"a>";
             for(int y=0; y<nmarkers; y++) {  ofs<<realseqmatr[genotypeDataNum[x]][y]<<" ";}
             ofs<<"</n"<<m<<"a>"<<endl; x++;
             ofs<<"<n"<<m<<"b>";
             for(int y=0; y<nmarkers; y++) {  ofs<<realseqmatr[genotypeDataNum[x]][y]<<" ";}
             ofs<<"</n"<<m<<"b>"<<endl; x++;
             m++;
          }
          ofs<<"</genotypes_disease>"<<endl;
          delete [] genotypeDataNum;
        }

        ofs<<"<phy_dis>";
        vector<double>::iterator ti;
        for(ti=realmapdis.begin(); ti!=realmapdis.end(); ti++)
        {    ofs<<(*ti)<<"\t"; }
        ofs<<"</phy_dis>"<<endl;
        ofs<<"<dis_frequency>"<<G.currentDisFreq()<<"</dis_frequency>"<<endl; //frequency of disease
        //population size at which the disease allele arose
	double foundingPop=N0*exp(-grate*mutage);
        ofs<<"<founding_popsize>"<<foundingPop<<"</founding_popsize>"<<endl;
        //ofs<<"<random_seed>"<<rand()<<"</random_seed>"<<endl;
        ofs<<"<TMRCA_diseaseTree>"<<G.getTMRCAdis()<<"</TMRCA_diseaseTree>"<<endl;
        ofs<<"<TMRCA_all>"<<G.getTMRCA()<<"</TMRCA_all>"<<endl;
        ofs<<"</Num_"<<count<<">"<<endl;
	//------------------finish output -----------------
	//-------------------------------------------------
	count++;
   }
   count--;
   
   end=clock();
   double elapsed = double(end-start);
   double secs = elapsed/CLOCKS_PER_SEC;

   cout<<endl<<"Summary (based on "<<count<<" replicates):"<<endl;
   cout<<"Average TMRCA:\t"<<aveTMRCA/((double)simuRep)<<endl;
   cout<<"Average TMRCA in disease tree:\t"<<aveDisTMRCA/(double)simuRep<<endl;
   cout<<"Average total branch lengths per genealogy:\t"<<totTime/(double)simuRep<<endl;
   cout<<"Average # mutations occurred per genealogy:\t"<<totMuts/(double)simuRep<<endl;
   cout<<"Average # recombinations per genealogy:\t"<<nrec/(double)simuRep<<endl;
   cout<<"Average # recombinations in disease tree:\t"<<(double)nrecDis/(double)simuRep<<endl;
   cout<<"Average # markers per genealogy:\t"<<totMarkers/(double)simuRep<<endl;
   cout<<"The proportion of rejected simulations is:\t"<<(double)numRejects/(double)(numRejects+count)<<endl;
   cout<<"Program running time (in seconds):\t"<<secs<<endl;
   cout<<endl<<"The minimum # markers among all simulation replicates:\t"<<miniNumMarkers<<endl;

   if(computeLDs==1)
   { oflds<<"</DATA>"<<endl;}	
 
   ofs<<"<Note>"<<endl;
   ofs<<"<total_num_genealogies>"<<count<<"</total_num_genealogies>"<<endl;
   ofs<<"<mean_recombinations>"<<(double)nrec/(double)count<<"</mean_recombinations>"<<endl;
   ofs<<"<running_time>"<<secs<<"</running_time>"<<endl;
   ofs<<"</Note>"<<endl;
   ofs<<"</DATA>"<<endl;
   ofs.close();

   delete seedptr;
   return 0;
}
