#include "DStruct.h"

int comp_qsort(const void* a , const void* b)
{
  double tmp = (*(PositNmuts**)a)->Posit - (*(PositNmuts**)b)->Posit;
  if(tmp<0) return -1;
  else if(tmp>0) return 1;
  else return 0;
}

bool compare_hash(hash* arg1, hash* arg2)
{
   if(arg1->key < arg2->key) return true;  else return false;
}

bool compare_hash2(hash2* arg1, hash2* arg2)
{
   if(arg1->id < arg2->id) return true;  else return false;
}


Node::Node(int Id, double time, int event, int caORco):
	ID(Id), WTime(time), Event(event), caseORcontrol(caORco), Sequ(NULL), recBPoint(0)
{}
Node::~Node()
{
  if(Sequ!=NULL) delete [] Sequ; Sequ=NULL;
}
void Node::setSequ(int* sequ, int length)
{
   if(sequ!=NULL) //set a new sequency 
   {
      Sequ = new int[length];
      if(Sequ==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}	
      for(int i=0; i<length; i++){ Sequ[i] = sequ[i]; }
    }
   else   //clear this sequency 
   {
      delete [] Sequ;
      Sequ=NULL;  
   }
}

/*---------------Graph----------------*/
Graph::Graph(int datatype, int disSize, int normalSize, long popsize, double gr, double sco, int mutage, double intervalsize, int markerty, double mu, double strpden, double mutloc, int homorec, int useIceland, double cutofflevel, int nmarker_limit):
	dataType(datatype), N0(popsize), grate(gr), seleCo(sco), disAge(mutage), intervSize(intervalsize), markerType(markerty), Mu(mu), strpDens(strpden), mutLoc(mutloc), homoRec(homorec), usingIcelandRecs(useIceland), cutoffLevel(cutofflevel), numMarkerLimit(nmarker_limit), TMRCAdis(0), numRecs(0), numRecsDis(0), numMutLoci(0), SampleSeqMatrix(NULL), RealSampleSeqMatr(NULL)
{
    //input error checkings
    if(disSize!=0 && mutage<=0)
    { cout<<"ERROR: disease sample size and mutation age should both be positive or zero."<<endl;
      exit(1);}

    //if Genotype, then numDis and numNormal need to be specified using penetrance model
    if(datatype == GENOTYPE) 
    {	  numDisIndiv = disSize; numNormIndiv = normalSize; }
    else { numDis = disSize; numNormal = normalSize;}

    DisSamplePath=NULL;

    hash* subr= new hash;
    subr->key = intervSize;
    subr->value = 1;
    markMasac.push_back(subr);
}

Graph::~Graph()
{
   if(disAge>0) delete [] DisSamplePath;  DisSamplePath=NULL;
   
   vector<Node*>::iterator vi;
   for(vi=NodesVector.begin(); vi!=NodesVector.end(); vi++)
   { delete (*vi); (*vi)=NULL;}
   NodesVector.clear();
   
   vector<WTimeEvent*>::iterator ti;
   for(ti=WTimeEventVector.begin(); ti!=WTimeEventVector.end(); ti++)
   { delete (*ti); (*ti)=NULL;}
   WTimeEventVector.clear();
  
   vector<int*>::iterator gi;
   for(gi=adjList.begin(); gi!=adjList.end(); gi++){ delete (*gi); (*gi)=NULL;} 
   adjList.clear();

   for(int i=0; i<numMutLoci; i++) {  delete MapDis[i];}
   delete [] MapDis; MapDis=NULL;

   if(numMutLoci>=numMarkerLimit){ delete [] MarkerPosit; MarkerPosit=NULL;}
   if(!homoRec) {delete [] nonhomoRecsArray; delete [] recRatesCDF;}
   if(usingIcelandRecs) {delete [] varyRec_position;}
   if(dataType == GENOTYPE) {delete [] num_geno_givenDis; delete [] num_geno_givenNorm;}	
   
   if(numMutLoci>=numMarkerLimit)
   {
      for(int i=0; i<numDis+numNormal; i++) 
      { delete [] SampleSeqMatrix[i]; if(RealNumMarkers>0) delete [] RealSampleSeqMatr[i];}
      delete [] SampleSeqMatrix; SampleSeqMatrix=NULL;
      if(RealNumMarkers>0) delete [] RealSampleSeqMatr; RealSampleSeqMatr=NULL;
   }

   vector<hash*>::iterator ci;
   for(ci=markMasac.begin(); ci!=markMasac.end(); ci++)
   {
	delete (*ci); (*ci)=NULL;
   }
   markMasac.clear();
}

void Graph::setIcelandParams(int nchrom, long leftend, long rightend)
{
   whichChrom=nchrom;
   leftEnd=leftend;  rightEnd=rightend;
   intervSize = (double)(rightend-leftend)/1000000;
}
  
void Graph::setGBMParams(double initrate, double drift, double var)
{
  initRate=initrate; driftGBM=drift; varGBM=var;	
}

void Graph::setPenetrParams(double f_DD, double f_DN, double f_NN)
{ fDD=f_DD; fDN=f_DN; fNN=f_NN; }

bool Graph::simuGenealogy(double lowboundfre, double highboundfre, long* seedptr)
{
      //allocate memory for the vecor of the sample path of freq of disease allele. 
      //and simulate
      if(disAge>0)
      { 
	 DisSamplePath = new double[disAge+1];
	 if(DisSamplePath==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
         simuSamplePath(lowboundfre, highboundfre, seedptr);	 
      }
      //penetrance model
      if(dataType==GENOTYPE) penetrance(seedptr);

      //recombination model
      if(homoRec){ aveRecRate= intervSize/100; }
      else
      {
	 if(usingIcelandRecs) getIcelandicRecs(whichChrom, leftEnd, rightEnd);
	 else simuRecGBM(intervSize, initRate, driftGBM, varGBM, seedptr);
      }
      createTipNodes(); 
      simuWaitTime(seedptr);  
      //printGraph();
      //if(!checkValid()) {cerr<<"Error on pass parts."<<endl; printGraph();exit(-1);};
      
      bool isWanted= addMutations(seedptr); 
      if(!isWanted) return false;
      forwardTraversal(seedptr);  
      //printGraph();
      codingSampleSeqs();
      RealNumMarkers = MarkersCutoff();  
      if(RealNumMarkers<numMarkerLimit) return false;
      return true;
}
bool Graph::checkValid()
{
   int sum=0;	
   for(int i=0; i<numNodes; i++)
   {
	for(int j=1; j<=adjList[i][0]*2; j+=2)
	{
	    if(adjList[i][j+1]!=0){sum+=adjList[i][j+1];}
	}
   }
   if(sum!=0) return false;
   else return true;
}

void Graph::createTipNodes()
{
    //generate nodes for tips, assign ID for each node. ID starts
    //from 0. store all pointers of nodes in a vector: NodesVector
    int i;
    for(i=0; i<numDis; i++)
    {
          Node* anode;
          anode = new Node(i, 0, TIP, CASE); 
	  if(anode==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
          NodesVector.push_back(anode);
    }
    for(; i<numDis+numNormal; i++)
    {
           Node* anode;
           anode = new Node(i, 0, TIP, CONTROL);  
	   if(anode==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
           NodesVector.push_back(anode);
    }
    //add to adjlist matrix
    for(int i=0; i<numDis+numNormal; i++)
    {
	   int* adlist = new int[5];
	   adlist[0] =0;
	   adjList.push_back(adlist);
    }
    numNodes= numDis+numNormal;
}
/*simulate the sample path of the frequency of disease allele, 
conditional on non-extinction and non-fixation
array of the mutant allele frequency is backword, start from present-day, 
end at the founding time of the disease
But, simulation is forward, start from the past when only one copy of disease allele, until the
present-day (if #disease allele <5, poisson dis. conditional on #disease >0, else, normal dis.)*/
void Graph::simuSamplePath(double lowbound, double highbound, long* seedptr)
{
  int ftime =disAge;
  DisSamplePath[0]=0;
  while(DisSamplePath[0]<=lowbound || DisSamplePath[0]>=highbound)
  {
      double num= 1;   //initially 1 copy of mutant allele
      double Nt= N0*exp(-ftime*grate);
      double fre= 1/(2*Nt);
      DisSamplePath[ftime]= fre;

      for(int gth=ftime-1; gth>=0; gth--)
      {
           Nt = N0*exp(-grate*gth);
           if(num<4)
           {
              double lamda;
              lamda = (fre + seleCo*fre*(1-fre))*2*Nt;
              num= poisson(lamda, seedptr);
              if(num<=0) {DisSamplePath[0]=0; break;}
              fre= num/(2*Nt);
              if(fre>=1) {DisSamplePath[0]=1; break;}
              DisSamplePath[gth] = fre;
           }
           else
           {
               double min = seleCo*fre*(1-fre)-sqrt(3*fre*(1-fre)/(2*Nt));
               double max = seleCo*fre*(1-fre)+sqrt(3*fre*(1-fre)/(2*Nt));
               double psv =  uniform(min, max, seedptr);
               fre = fre+psv;
               if(fre<=0) {DisSamplePath[0]=0; break;}
               if(fre>=1) {DisSamplePath[0]=1; break;}
               num= fre* 2* Nt;
               DisSamplePath[gth] = fre;
           }
      }
   }
}


void Graph::simuWaitTime(long* seedptr)
{
   int nDis = numDis; int nNorm = numNormal;
   double wtime=0;  double tc_d, tc_n, tr_d, tr_n, smallest; int event=-1;
   TotalTime=0;
   numRecs=0; numRecsDis=0; numRecsDwN=0; 
   double boundary =0;
   if(grate > 0) boundary= -log(2/(double)N0)/grate; //the time at which the population size is 2

   int id = numDis+numNormal;
   //two vectors containing IDs available to be chosen for case and control
   vector<int> disIDs;
   vector<int> normIDs;
   for(int i=0; i<numDis; i++) disIDs.push_back(i);
   for(int i=numDis; i<numDis+numNormal; i++) normIDs.push_back(i);
   vector<int> mrcaIDs; //without MRCA lineages
   
   int ncoal=0; //# of coalescent events, if >= sampale size-1, checking if there is any part of the chromosome found the MRCA
   
   while( nDis+nNorm >1 )
   {
	double wtime_preG = wtime;    
	//simulate waiting times for four types of events
	smallest=0;
	if(nDis>1) //coalescence in disease tree
	{ tc_d =coaltimeDiscrete(wtime, nDis, CASE, seedptr);
	   smallest=tc_d; event=COALDIS; }
	if(nDis>0 && aveRecRate!=0) //recom in disease tree 
	{  
	   tr_d = -2*log(1-ran1(seedptr))/(nDis*aveRecRate);  tr_d +=wtime;
	   if(tr_d<smallest || smallest==0)  {smallest=tr_d; event=RECDIS; }
	}	   
	if(nNorm>1) //coalescence in normal tree
	{
	   if(wtime>=disAge && grate>0)
	    { 
	       double partA = (4*N0*grate)/(nNorm*(nNorm-1))*log(1-ran1(seedptr));		    
	       tc_n = (1/grate)*log(exp(grate*wtime)-partA);
	    } 
	   else if(wtime>=disAge && grate ==0)
            { tc_n = -4*N0*log(1-ran1(seedptr))/(nNorm*(nNorm-1)); tc_n += wtime; }
	   else tc_n =coaltimeDiscrete(wtime, nNorm, CONTROL, seedptr);
	    if(tc_n<smallest || smallest==0) {smallest=tc_n; event=COALNORM;}
	}
	if(nNorm>0 && aveRecRate!=0) //rec in normal tree
	{ 
	    tr_n  = -2*log(1-ran1(seedptr))/(nNorm*aveRecRate); 
	    tr_n +=wtime; 
	    if(tr_n<smallest || smallest==0) {smallest=tr_n; event=RECNORM;}
	}
	if(grate>0 && smallest>boundary) {smallest = boundary;}

	//------------------added on May 16-----------------
	//---ignore the recombinations that break points are in the intervals have found the MRCAs
	int signal=1;
	if(event==RECNORM && mrcaNodes.size()!=0)//workingon at 11:00 May 17 2005
	{
	   double bkp= ran1(seedptr)*intervSize;
	   if(inSamllInterv(bkp)) signal=0;
	}
	if(signal==0) continue;
	//--------------------------------------------------
	
	
	wtime=smallest; //choose the smallest one
	
	if(event == RECDIS)
	{
	   if(wtime > disAge) event = RECNORM;	
	   else{ 
	      if(ran1(seedptr)<DisSamplePath[(int)wtime]) event = RECDD;
	      else event=RECDN;
	   }
	   if(event == RECDN) numRecsDwN++;
	   numRecs++; numRecsDis++;
	}	
	if(event == RECNORM)
	{
	   if(wtime<disAge && ran1(seedptr)<DisSamplePath[(int)wtime]) event = RECND;
	   else event = RECNN;
	   numRecs++;
	}
	
	//create a new internal node
	int type; 
	if(event==RECDD || event ==RECDN || event == COALDIS) type=CASE;
	else type = CONTROL;
	Node* anode = new Node(id++, wtime, event, type); anode->setpassPart(0);
	if(anode==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
	numNodes++;
	NodesVector.push_back(anode);

	//choose breakpoint for the recombination node
	if(event==RECDD ||  event==RECDN ||  event==RECND || event==RECNN)
	{
	   double breakpoint;	
	   if(homoRec) breakpoint = ran1(seedptr)*intervSize;
	   else breakpoint = chooseBreakpoint(seedptr);

	   //check if the breakpoint is in the samll intervals that all the chroms have coalesce
	   while(mrcaNodes.size()!=0 && inSamllInterv(breakpoint))
	   { 
	      if(homoRec) breakpoint = ran1(seedptr)*intervSize; 
	      else breakpoint = chooseBreakpoint(seedptr);
	   }
	   
	   anode->setRecBPoint(breakpoint);

	   hash* subr= new hash;
	   subr->key = breakpoint;
	   subr->value = 1;
	   markMasac.push_back(subr);
	   sort(markMasac.begin(), markMasac.end(), compare_hash);
	}

	//randomly find 1 or 2 lineage/s to recombine or coalesce
	chooseDescen(anode, disIDs, normIDs, seedptr);
	if(!mrcaIDs.empty())
	{
	   vector<int>::iterator vi;
	   for(vi=mrcaIDs.begin(); vi!=mrcaIDs.end(); vi++)
	   { normIDs.push_back(*vi);}
	}

	if(event==COALNORM || event==COALDIS) //if coalescence event, doing the forward traversal
        {
           ncoal++;
           if(ncoal>=numDis+numNormal-1 && wtime>=disAge)
           {
	      bool getamrca =  checkMasacMRCA(anode); 
	      if(getamrca){mrcaNodes.push_back(anode);}
           }
        }
	
	if(wtime>=disAge && disAge>0)
	{
           while(disIDs.size()!=0)
          { normIDs.push_back(disIDs[disIDs.size()-1]); disIDs.pop_back();}
        }

	//calculate TotalTime
	TotalTime += (nDis+nNorm)*(wtime - wtime_preG);

	//modify nDis or nNorm
	switch(event){
	  case RECDD:   nDis++; break;
	  case RECDN:   nNorm++; break; 
	  case COALDIS: nDis--; break; 
	  case RECND:   nDis++; break;
 	  case RECNN:   nNorm++; break; 
	  case COALNORM: nNorm--; break;
	}
		
	if(nDis==1 && TMRCAdis==0){ TMRCAdis=wtime; }
	if(wtime>=disAge && nDis==1)
	{ nDis--; nNorm++; }
   }
   TMRCA=wtime;
   //numNodes=NodesVector.size(); 
}

bool Graph::inSamllInterv(double bkp)
{
     for(int k=0; k<markMasac.size(); k++)
     {
	if(k==0)
	{ if(bkp<markMasac[0]->key && markMasac[0]->value==0) return true;}
	else
	{
	   if(bkp>=markMasac[k-1]->key && bkp<markMasac[k]->key && markMasac[k]->value==0) 
	   {return true;}
	}
     }
    return false;
}

bool Graph::checkMasacMRCA(Node* coalnode)
{
   vector<hash2*> chroms;	
   hash2* ahash = new hash2;
   ahash->id=coalnode->getID();
   ahash->start = 0; ahash->end=intervSize;
   chroms.push_back(ahash); 

   vector<hash2*>::iterator vi;
   while(1)
   {
      vector<hash2*> chroms_new;	   
      for(vi=chroms.begin(); vi!=chroms.end(); vi++)
      {
	if(adjList[(*vi)->id][0]==0)
	{ 
	  ahash = new hash2;
	  ahash->id = (*vi)->id; ahash->start = (*vi)->start; ahash->end = (*vi)->end;
	  chroms_new.push_back(ahash);
	}
	else
	{
	    for(int i=1; i<=adjList[(*vi)->id][0]*2; i+=2)
	    {
		ahash = new hash2;
		ahash->id = adjList[(*vi)->id][i];
		double bkp = NodesVector[adjList[(*vi)->id][i]]->getRecBPoint();
		if(adjList[(*vi)->id][i+1]==LEFT) //recom, pass left
		{ 
			if(bkp<(*vi)->start){ delete ahash; ahash=NULL;} //nothing to pass
			else if(bkp>(*vi)->start && bkp<(*vi)->end) {ahash->start=(*vi)->start; ahash->end=bkp;}
			else {ahash->start=(*vi)->start; ahash->end=(*vi)->end;}//pass all
		}
		else if(adjList[(*vi)->id][i+1]==RIGHT)//recom, pass right
		{ 
			if(bkp>(*vi)->end) { delete ahash; ahash=NULL;} //nothing to pass
			else if(bkp>(*vi)->start && bkp<(*vi)->end){ ahash->start=bkp; ahash->end=(*vi)->end;}
			else { ahash->start=(*vi)->start; ahash->end=(*vi)->end;}
		}
		else{ ahash->start=(*vi)->start; ahash->end=(*vi)->end;}//coal, pass all
		if(ahash!=NULL) chroms_new.push_back(ahash);
	    }
	}
      }
     /*cout<<"before combing: ";
     for(int i=0; i<chroms_new.size(); i++)
     { cout<<chroms_new[i]->id<<": "<<chroms_new[i]->start<<", "<<chroms_new[i]->end<<" || ";}cout<<endl;*/
      
      combineAdjCells(chroms_new);

      //copy chroms_new to chroms, and clear chroms_new
      copyVector(chroms, chroms_new);
   
      /*cout<<"after combing: ";
      for(int i=0; i<chroms.size(); i++)
      { cout<<chroms[i]->id<<": "<<chroms[i]->start<<", "<<chroms[i]->end<<" || ";}cout<<endl;*/

      //check if all have reached tips
      if(checkAllTips(chroms)){break;}
   }
   //check if all the tips have been visit (found the MRCA of all chroms) 
   bool overlap = false;
   if((signed)chroms.size() == numDis+numNormal) {overlap=getOverlapRegion(chroms);}
   clearVector(chroms);
   return overlap;
}

void Graph::combineAdjCells(vector<hash2*>& vec)
{
   //combine the same chroms
   vector<hash2*>::iterator vi;
   vector<hash2*> tmp;
   sort(vec.begin(), vec.end(), compare_hash2);
   int count=0;
   for(vi=vec.begin(); vi!=vec.end(); vi++)
   {
     if(vi!=vec.begin() && (*vi)->id == (*(vi-1))->id){
       if(tmp[count-1]->start>(*vi)->start) tmp[count-1]->start = (*vi)->start;
       else tmp[count-1]->end = (*vi)->end;
    }
    else{
       hash2* ahash = new hash2;
       ahash->id = (*vi)->id; ahash->start=(*vi)->start; ahash->end=(*vi)->end; count++;
       tmp.push_back(ahash);
    }
  }
  copyVector(vec, tmp);
}

bool Graph::getOverlapRegion(vector<hash2*>& vec)
{ 
  bool overlap=false;
  vector<hash2*>::iterator vi;
   double startp; double endp;
   startp = (*(vec.begin()))->start; endp = (*(vec.begin()))->end;
   for(vi=vec.begin()+1; vi!=vec.end(); vi++)
   {
     if((*vi)->start > startp)  startp = (*vi)->start;
     if((*vi)->end < endp) endp = (*vi)->end;
   }
   //update the vector of markMasac
   vector<hash*>::iterator ti;
   for(ti=markMasac.begin(); ti!=markMasac.end(); ti++)
   {
       if((*ti)->key>startp && (*ti)->key<=endp){ (*ti)->value=0; overlap=true;}		
   }
   return overlap;
}

bool Graph::checkAllTips(vector<hash2*> vec)
{
  vector<hash2*>::iterator vi;
  for(vi=vec.begin(); vi!=vec.end(); vi++)
  {
	if((*vi)->id>=numDis+numNormal) {return false;}
  }
  return true;
}

void Graph::clearVector(vector<hash2*>& vec)
{
    vector<hash2*>::iterator vi;
    for(vi=vec.begin(); vi!=vec.end(); vi++)
    {
	delete (*vi);
    }
    vec.clear();
}

void Graph::copyVector(vector<hash2*>& vec, vector<hash2*>& vec2)
{
    if(vec.size()!=0) clearVector(vec);	
    vector<hash2*>::iterator vi;
    for(vi=vec2.begin(); vi!=vec2.end(); vi++)
    {
	hash2* ahash = new hash2;
	ahash->id=(*vi)->id;
	ahash->start = (*vi)->start; ahash->end=(*vi)->end;
	vec.push_back(ahash);	
    }
    if(vec.size()!=vec2.size()){cerr<<"error in copying vectors"<<endl; exit(-1);}
    clearVector(vec2);
}

//simulate the waiting time to next coalescent event under discrete time model.
////making use of the recursion technique to improve the running time of the program 
////-->has been checked by comparing with continuous time model and non-recursion method
double Graph::coaltimeDiscrete(double T_old, int nlin, int caORco, long* seedptr)
{
    double U= ran1(seedptr);
    int gth=(int)T_old;
    double p=0; double A; double B; double C; double tmp;
    int foundingtime= disAge;

    A= (double)nlin*((double)nlin-1)/(4*(double)N0);
    if(gth<=foundingtime)
    {  if(caORco==CASE) p= A*exp(grate*(double)gth)/DisSamplePath[gth]; 
        else p= A*exp(grate*(double)gth)/(1-DisSamplePath[gth]); }
    else { p= A*exp(grate*gth);}

    tmp=p;
    while(p<U)
    {
         gth++;
	 if(caORco==CASE)
	 {
	     if(gth<foundingtime)
	     { B= exp(-A* exp(grate*(gth-1))/DisSamplePath[gth-1]); 
	       C= exp(grate)*DisSamplePath[gth-1]/DisSamplePath[gth];}
	     else{ return foundingtime-nlin*0.00001;  }//disease lingeages coalesce before foundingtime 
	 }
	 else
	 {
	      if(gth<=foundingtime)
	      { B= exp(-A* exp(grate*(gth-1))/(1-DisSamplePath[gth-1]));
		C= exp(grate)*(1-DisSamplePath[gth-1])/(1-DisSamplePath[gth]);}
	      else if(gth==foundingtime+1)
	      { B= exp(-A* exp(grate*(gth-1))/(1-DisSamplePath[gth-1])); 
		C= exp(grate)*(1-DisSamplePath[gth-1]);}
	      else  { B= exp(-A* exp(grate*(gth-1)));  C= exp(grate);}
	 }
	 tmp= tmp*B*C;   p += tmp;
    }
    if(gth==(int)T_old) return T_old+0.0001;
    return (double)gth;
}
//only for recombination node
int Graph::setuppassPart(Node* child, Node* parent, long* seedptr)
{
   if(child->getpassPart()==LEFT) return RIGHT;
   else if(child->getpassPart()==RIGHT) return LEFT;
   else{
     double breakpoint =child->getRecBPoint();
     if(child->getEvent()==RECNN || child->getEvent()==RECDD)
     {
 	if(ran1(seedptr)<0.5){ child->setpassPart(LEFT); return LEFT;}
	else { child->setpassPart(RIGHT); return RIGHT;}
     }
     else if(child->getEvent()==RECDN)
     {
	if((parent->isCaseORControl()==CASE && mutLoc<=breakpoint)
	  || (parent->isCaseORControl()==CONTROL && mutLoc>breakpoint) )
	{ child->setpassPart(LEFT); return LEFT;}
	else{ child->setpassPart(RIGHT); return RIGHT; }
     }
     else
     {
	if((parent->isCaseORControl()==CONTROL && mutLoc<=breakpoint)
          || (parent->isCaseORControl()==CASE && mutLoc>breakpoint) )
        { child->setpassPart(LEFT); return LEFT;}
        else{ child->setpassPart(RIGHT); return RIGHT; }
     }
   }
}

void Graph::chooseDescen(Node* anode, vector<int>& disIDs, vector<int>& normIDs, long* seedptr)
{ 
    int* adlist	= new int[5];
    int index;
    int found;
    int id = anode->getID(); int event = anode->getEvent();
	    
    if(event== RECDD){ index = random4(0, disIDs.size(), seedptr);
	       found = NodesVector[disIDs[index]]->getID(); disIDs.erase(disIDs.begin()+index);
	       disIDs.push_back(id);disIDs.push_back(id);
	       adlist[0]=1; adlist[1] = found;
	       Node* fd = NodesVector[found];
	       if(fd->getEvent()!=COALDIS && fd->getEvent()!=COALNORM && fd->getEvent()!=TIP)
	       { adlist[2] = setuppassPart(fd, anode, seedptr); }
	       else adlist[2]=0;}

    else if(event==RECDN){ index = random4(0, disIDs.size(), seedptr);
	       found = NodesVector[disIDs[index]]->getID(); disIDs.erase(disIDs.begin()+index);
	       disIDs.push_back(id); normIDs.push_back(id);
	       adlist[0]=1; adlist[1] = found;
	       Node* fd = NodesVector[found];
               if(fd->getEvent()!=COALDIS && fd->getEvent()!=COALNORM && fd->getEvent()!=TIP)
               { adlist[2] = setuppassPart(fd, anode, seedptr);}
	       else adlist[2]=0;}

    else if(event== COALDIS){ adlist[0]=2; 
  	       for(int i=1;i<5;){
		 index = random4(0, disIDs.size(), seedptr);
		 found = NodesVector[disIDs[index]]->getID();disIDs.erase(disIDs.begin()+index); 
		 adlist[i++] = found;
		 Node* fd = NodesVector[found];
                 if(fd->getEvent()!=COALDIS && fd->getEvent()!=COALNORM && fd->getEvent()!=TIP)
                 { adlist[i++] = setuppassPart(fd, anode, seedptr);}
		 else adlist[i++] = 0;
		 }
		 disIDs.push_back(id);}

    else if(event==RECND){ index = random4(0, normIDs.size(), seedptr);
	       found = NodesVector[normIDs[index]]->getID();normIDs.erase(normIDs.begin()+index);
	       normIDs.push_back(id); disIDs.push_back(id);
	       adlist[0]=1; adlist[1] = found;
	       Node* fd = NodesVector[found];
               if(fd->getEvent()!=COALDIS && fd->getEvent()!=COALNORM && fd->getEvent()!=TIP )
               { adlist[2] = setuppassPart(fd, anode, seedptr);}
	       else adlist[2]=0;}

    else if(event==RECNN){ index = random4(0, normIDs.size(), seedptr);
	       found = NodesVector[normIDs[index]]->getID(); normIDs.erase(normIDs.begin()+index);
	       normIDs.push_back(id); normIDs.push_back(id); 
	       adlist[0]=1; adlist[1] = found;
	       Node* fd = NodesVector[found];
               if(fd->getEvent()!=COALDIS && fd->getEvent()!=COALNORM && fd->getEvent()!=TIP)
               { adlist[2] = setuppassPart(fd, anode, seedptr);}
	       else adlist[2]=0;}
    

    else{ adlist[0]=2;
       	  for(int i=1;i<5;){ 
		 index = random4(0, normIDs.size(), seedptr);
		 found = NodesVector[normIDs[index]]->getID();normIDs.erase(normIDs.begin()+index);
		 adlist[i++] = found;
		 Node* fd = NodesVector[found];
                 if(fd->getEvent()!=COALDIS && fd->getEvent()!=COALNORM && fd->getEvent()!=TIP)
                 { adlist[i++] = setuppassPart(fd, anode, seedptr);}
                 else adlist[i++] = 0;
                 }
		 normIDs.push_back(id); }

    adjList.push_back(adlist);
}


bool Graph::addMutations(long* seedptr)
{
    int nmuts;
    int nsites = (int)(intervSize*1000000+1);
    numMutLoci=0;

    if(markerType==SNP)
    {
        //expected number of mutations for SNP markers, given the genealogy
        double lamda = TotalTime * Mu *nsites;
        //actual number of mutations for SNP markers, given the genealogy
        nmuts= poisson_normal(lamda, seedptr); //cout<<"num of mutatins: "<<nmuts<<endl;
	numMutations = nmuts;
        MapDis=new PositNmuts*[nmuts];
	if(MapDis==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
        for(int i=0; i<nmuts; i++)  //choose positions for markers 
	{
           double pos = (int)(ran1(seedptr)*nsites)*0.000001; //in Mb		
	   int signal=0;
	   for(int j=0; j<numMutLoci; j++)
	   { 
	      if(MapDis[j]->Posit == pos) 
	      {  signal=1; MapDis[j]->Nmuts[0]++; double U = ran1(seedptr);
		 MapDis[j]->Nmuts.push_back(U); break;} 
	   }
	   if(signal==0) //no duplicate position found
	   {
	       PositNmuts* newlocus = new PositNmuts;
	       if(newlocus==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
	       newlocus->Posit = pos; newlocus->Nmuts.push_back(1); double U = ran1(seedptr);
	       newlocus->Nmuts.push_back(U); MapDis[numMutLoci++]=newlocus; 
	    }
	}
    }
    else{
	int nstrpmks = poisson_normal(strpDens*intervSize, seedptr);
	//choose positions for strp markers
	 double* nstrploc=new double[nstrpmks];
	 if(nstrploc==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
	 for(int i=0; i<nstrpmks; i++)
	 { nstrploc[i] = ran1(seedptr)*nsites*0.000001;}
	 //expected number of mutations for strp markers, given the genealogy 
	 double lamda = TotalTime * Mu *nstrpmks;
	 //actual number of mutations, given the genealogy
	 nmuts= poisson_normal(lamda, seedptr); //cout<<"# mutations: "<<nmuts<<endl;
	 numMutations = nmuts;
	 MapDis=new PositNmuts*[nmuts];
	 if(MapDis==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
	 //allocate positions for these markers
	 for(int i=0; i<nmuts; i++)
	 {
            double pos= nstrploc[(int)(ran1(seedptr)*nstrpmks)];
            int signal=0;
	    for(int j=0; j<numMutLoci; j++)
	    {
	        if(MapDis[j]->Posit == pos)
	        {  signal=1; MapDis[j]->Nmuts[0]++; double U = ran1(seedptr);
	           MapDis[j]->Nmuts.push_back(U); break;}
	    }
	    if(signal==0) //no duplicate position found
	    {
		PositNmuts* newlocus = new PositNmuts;
	  	if(newlocus==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
		newlocus->Posit = pos; newlocus->Nmuts.push_back(1);double U = ran1(seedptr);
		newlocus->Nmuts.push_back(U); MapDis[numMutLoci++]=newlocus;
	    }
	 }
	 if(nstrploc!=NULL) delete [] nstrploc;
   }
   //cout<<"# mutant loci: "<<numMutLoci<<endl; 
   if(numMutLoci< numMarkerLimit)
	 return false;

   //sort according marker positions 
   qsort(MapDis, numMutLoci, sizeof(PositNmuts*), comp_qsort);

   //array only stores the position of markers 
    MarkerPosit=new double[numMutLoci];
    if(MarkerPosit==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}	
    for(int i=0; i<numMutLoci; i++) MarkerPosit[i]= MapDis[i]->Posit;
   return true;
}

void Graph::forwardTraversal(long* seedptr)
{ 
    int* sequ = new int[numMutLoci];	
    if(sequ==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}	

    //specify the sequency for the root node 
    for(int i=0; i<numMutLoci; i++) {if(markerType == SNP ) sequ[i]=ranchar(seedptr); else sequ[i]=20;}
    NodesVector[numNodes-1]->setSequ(sequ, numMutLoci); delete [] sequ;
   /* 
   for(int i=0; i<numMutLoci; i++)
    {	for(int j=0; j<=MapDis[i]->Nmuts[0]; j++) cout<<MapDis[i]->Nmuts[j]<<"\t"; cout<<endl;}*/

    double accuTime=0; 
    int* newSequ = new int[numMutLoci];
    if(newSequ==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}  	
	
    Node* child; Node* current; double accuTimePre;
    for(int i= numNodes-1; i>=numDis+numNormal; i--)
    {
	current = NodesVector[i];  

	for(int j=1; j<=adjList[i][0]*2; j+=2)
	{
	    child = NodesVector[adjList[i][j]]; 
	    accuTimePre = accuTime;
	    accuTime+= (current->getWTime()) - (child->getWTime());

	    for(int k =0; k<numMutLoci; k++) newSequ[k]=current->getSequ()[k];//copy seq
	    mutSequency(newSequ, accuTime, accuTimePre, seedptr);
	   if(child->getSequ()==NULL)
	   { child->setSequ(newSequ, numMutLoci); }
	   else
	   {
	      double breakpoint = child->getRecBPoint();	   
	      if(child->getEvent()!=COALDIS && child->getEvent()!=COALNORM && child->getEvent()!=TIP)
	      {
		 if(adjList[i][j+1]==LEFT) //pass left
		 {
		    for(int i=0; i<numMutLoci; i++)
		    {if(MapDis[i]->Posit >= breakpoint) break; child->getSequ()[i] = newSequ[i];}	    
		 }
		 else
		 {
		    for(int i=0; i<numMutLoci; i++) //pass right
		    { if(MapDis[i]->Posit >= breakpoint) {child->getSequ()[i] = newSequ[i];}}
		 }
	      }
	   }//child already has sequency
	}//for loop (visiting all child-nodes of the current node)
    }//for loop( visiting all internal nodes)
    
    delete [] newSequ;
}


void Graph::mutSequency(int* newSequ, double accuTime, double accuTimePre, long* seedptr)
{
    double branchW = accuTime/TotalTime, branchPreW= accuTimePre/TotalTime;
    for(int mth =0; mth<numMutLoci; mth++)
    {
	for(int z=1; z<=MapDis[mth]->Nmuts[0]; z++)
	{
	   if(branchW>=MapDis[mth]->Nmuts[z] && branchPreW < MapDis[mth]->Nmuts[z])
	   {
		//cout<<":\t"<<mth<<"\t"<<(char)newSequ[mth];
		newSequ[mth] =ranchar2(newSequ[mth], seedptr);  
	   	//cout<<"-->"<<(char)newSequ[mth]<<endl;
	   }
	}
    }
}

//Choose 1 out of 4 neucleotypes with equal probabilities. Only used for deciding the haplotype of the root 
int Graph::ranchar(long* seedptr)
{
   double p = ran1(seedptr);
   if(p<0.25) return'A';
   else if(p>=0.25 && p<0.5) return 'T';
   else if(p>=0.5 && p<0.75) return 'G';
   else return 'C';
}

//For SNPs, one nucleotype can mutate to other there nucleotypes with equal probabilities.
//For STRP markers, stepwise mutation model, P(n+1 | n) = P(n-1 | n) = 0.5.
int Graph::ranchar2(int oldType, long* seedptr)
{
   double p = ran1(seedptr);
   double x=1/3; double y=2/3;

   if(markerType == SNP)  
   {
       switch (oldType)
       {
           case 'A': if(p<x) return 'T'; else if(p>x && p<y) return 'G'; else return 'C'; 
           case 'T': if(p<x) return 'G'; else if(p>x && p<y) return 'C'; else return 'A'; 
	   case 'G': if(p<x) return 'C'; else if(p>x && p< y) return 'A'; else return 'T';
           default: if(p<x) return 'A'; else if(p>x && p< y) return 'T'; else return 'G';
       }
   }
   else   
   { if(p<0.5) return --oldType; else return ++oldType; }
}

void Graph::printGraph()
{
    cout<<"Node(\tWTime\tEvent\tBkPoint\tSequ):\t#conn\tconn/s\n";	
    for(int i=0; i<numNodes; i++)
    {
        cout<<"Node"<<i<<"(\t"<<NodesVector[i]->getWTime()<<"\t"<<NodesVector[i]->getEvent()<<"\t";
	cout<<NodesVector[i]->getRecBPoint()<<"\t";
	int* sequ = NodesVector[i]->getSequ();
	for(int j=0; j<numMutLoci; j++){cout<<(char)sequ[j];} cout<<"\t";
        for(int j=0; j<=adjList[i][0]*2; j++) cout<<adjList[i][j]<<"\t"; cout<<endl;
     }
    cout<<"Marker positions: "<<endl;
    for(int i=0; i<numMutLoci;i++)
    {
	cout<<MapDis[i]->Posit<<"\t";
    }cout<<endl;
}

//randomly choose recombination break point
double Graph::chooseBreakpoint(long* seedptr)
{
   double U = ran1(seedptr);
   for(int i=0; i<numDiffRecRates; i++)
    {
       //return position in Mb
       if(recRatesCDF[i]>U)
       {
            double breakp;
            if(usingIcelandRecs)
	    {
		if(i==0) breakp = double(random4(0, varyRec_position[0], seedptr))/1000000;
		else breakp = double(random4(varyRec_position[i-1], varyRec_position[i], seedptr))/1000000;
	    }
	    else{ breakp = double(random4(i*1000, (i+1)*1000, seedptr))/1000000; }
	    return breakp;
	}
    }
   return 0;
}

//simulate non-homogeneous recombination rate along chromosomal interval, using GBM model
//startrate is given in cM/Mb, intervalsize is given in Mb
void Graph::simuRecGBM(double intervalsize, double startrate, double drift, double var, long* seedptr)
{
   double init_rate = startrate*intervalsize;  //in cM
   double recsum=init_rate;  //cM per Mb 
   //simulate recombination rate every kp
   numDiffRecRates = (int)(intervSize*1000)+1;
   //simulate Geometric Brownian Motion with drift and diffusion coefficient (var)
   nonhomoRecsArray = new double[numDiffRecRates]; 
   if(nonhomoRecsArray==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}	
   nonhomoRecsArray[0]=init_rate;
   //reject the simulations that there are rates >5 cM/Mb
   int nreject=0;
   while(1)
   {	
      nonhomoRecsArray[0]=init_rate; recsum=init_rate;	
      int i;	
      for(i=1; i<numDiffRecRates; i++)
      {
         nonhomoRecsArray[i] = nonhomoRecsArray[i-1]*exp(drift*1000 + sqrt(var*1000)*stdnormal(seedptr));
	 if(nonhomoRecsArray[i]>5) break;
         recsum+=nonhomoRecsArray[i];
      }
      if(i==numDiffRecRates) break;
      nreject++;	
      if(nreject>=1000)
      {
	 cout<<"The proportion of rejected simulations of recombination rates using GBM is too high (1000/1000)."<<endl;
	 cout<<"The simulation program was terminated, please modify the GBM parameters."<<endl;
	 exit(-1);
      }	
    }
	
    //get the average recombination rate along the given-size interval
    aveRecRate = recsum/(double)(numDiffRecRates)*0.01;

    //CDF for choosing recombination break point
    recRatesCDF = new double[numDiffRecRates];
    if(recRatesCDF==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
    double tmp=0;
    for(int i=0; i<numDiffRecRates; i++)
    {
	 tmp+=nonhomoRecsArray[i]; recRatesCDF[i] = tmp/recsum;
    }
}

//non-homogeneous recombination rates along chromosomal interval, using Icelandic data
void Graph::getIcelandicRecs(int whichchrom, long leftend_interval, long rightend_interval)
{
    //get file name	
    char fname[17] = "deCODEMap/MapChr";
    char append[3];
    if(whichchrom<10){ append[0] = 48+whichchrom; append[1]='\0';}
    else {append[0] = 48+(whichchrom/10); append[1] = 48+(whichchrom%10); append[2]='\0';}
    strcat(fname, append);

    ifstream ifs;
    ifs.open(fname, ios::in);
    if(!ifs){ cout<<"The icelandic data: "<<fname<<" can not be opened!"<<endl;
	    exit(1);}
     ifs.seekg(0L,ios::beg);

     //read phydis and mapdis of the region between the leftend and the rightend
     vector<long> PhyDis; vector<double> MapDis;
     long phydis; double mapdis; long phydisNext; double mapdisNext;
     ifs>>phydis; ifs>>phydis;	 //begin and and of the chromosome
     ifs>>phydis; ifs>>mapdis; ifs>>phydisNext; ifs>>mapdisNext;
     while(1)
     {
	if(phydisNext>leftend_interval)
	{
	   PhyDis.push_back(phydis);MapDis.push_back(mapdis); 
	   phydis=phydisNext; mapdis=mapdisNext;
	   while(phydis >leftend_interval && phydis<rightend_interval)
	   { 	PhyDis.push_back(phydis); MapDis.push_back(mapdis); 
		ifs>>phydis;ifs>>mapdis;} 
	   PhyDis.push_back(phydis);
	   MapDis.push_back(mapdis);
	   break;
	}
	 phydis=phydisNext; mapdis=mapdisNext; 
	 if(ifs.eof()){break;}
	 ifs>>phydisNext; ifs>>mapdisNext;
     }
     int size =PhyDis.size(); 
	     
     varyRec_position= new int[size]; 
     if(varyRec_position==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
     nonhomoRecsArray = new double[size];
     if(nonhomoRecsArray==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}

     //speicfy the recombination rate for each sub-region
     int p=0;	
     nonhomoRecsArray[p]= 0.01*(MapDis[1] -MapDis[0])*
	     (double)(PhyDis[1]-leftend_interval)/(double)(PhyDis[1]-PhyDis[0]);
     varyRec_position[p++]=PhyDis[1]-leftend_interval;  
     for(int i=2; i<=size-2; i++)
     {
         nonhomoRecsArray[p] = 0.01*(MapDis[i]-MapDis[i-1]);
	 varyRec_position[p++] = PhyDis[i] - leftend_interval;
     }
     nonhomoRecsArray[p]= 0.01*(MapDis[size-1] -MapDis[size-2])*
	     (double)(rightend_interval-PhyDis[size-2])/(double)(PhyDis[size-1]-PhyDis[size-2]);
     varyRec_position[p++] = rightend_interval-leftend_interval; 
     numDiffRecRates = p;
    
     //get the total recombination rates for the interval
     aveRecRate=0;
     for(int i=0; i<numDiffRecRates; i++){ aveRecRate+= nonhomoRecsArray[i];}

     //CDF for choosing recombination break point
     recRatesCDF = new double[numDiffRecRates];
     if(recRatesCDF ==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
     double tmp=0;
     for(int i=0; i<numDiffRecRates; i++)
     {
         tmp+=nonhomoRecsArray[i];
         recRatesCDF[i] = tmp/aveRecRate;
     }
}

void Graph::codingSampleSeqs()
{
   SampleSeqMatrix= new int*[numDis+numNormal];
   if(SampleSeqMatrix==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   for(int k=0; k<numDis+numNormal; k++)
    {
	 SampleSeqMatrix[k] = new int[numMutLoci];
	 if(SampleSeqMatrix[k] ==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
    }	
   int chroms= numDis+numNormal;
   int* mat[chroms];  int p=0;
   for(int i=numDis; i<numDis+numNormal; i++) { mat[p++]= NodesVector[i]->getSequ();}
   for(int i=0; i<numDis; i++){ mat[p++] = NodesVector[i]->getSequ();}

   //SNPs, the maxmum number of alleles is 4. But for STRPs, it can be more.
   int k; int stand[MAXALLE];  
   for(int i=0; i<MAXALLE; i++) stand[i]=-1;

   for(int i=0; i<numMutLoci; i++)
   {
	 k=0;
	 for(int j=0; j<chroms; j++)
	 {
	    if(k==0)  {stand[k++]=mat[j][i];}
	    else
	    {
		int t=k-1;
		while(t>=0)
		{  if(mat[j][i]==stand[t]) {break;} t--;}
		if(t==-1) {stand[k++]=mat[j][i];}
	    }
	 }
	 for(int j=0; j<chroms; j++) {SampleSeqMatrix[j][i]=transform(stand,mat[j][i]);}
	 for(int z=0; z<MAXALLE; z++) stand[z]=-1;
   }
}

int Graph::transform(int tmp[], int another)
{
   int i=0;
   while(tmp[i]>0)
   {
      if(another==tmp[i]) return (i+1);
       i++;
   }
   return 0;
}

int Graph::MarkersCutoff()
{
   int nmarkers=0;
   int ntips = numDis+numNormal;
   //Marginal allele frequencies for each locus
   double** fre=new double*[numMutLoci];
   if(fre==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   for(int i=0; i<numMutLoci; i++)
   {
        fre[i]=new double[MAXALLE];
	if(fre[i]==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
        for(int j=0; j<MAXALLE; j++) fre[i][j]=0;
   }
   for(int i=0; i<numMutLoci; i++)
   {
         for(int j=0; j<ntips; j++)
         {  int tmp= SampleSeqMatrix[j][i]-1; fre[i][tmp] +=1;}
   }

   vector<int> intmapdis;
   for(int i=0; i<numMutLoci; i++)
   {
	 int signal=0;
	  for(int j=0; j<MAXALLE; j++)
	  {
	     fre[i][j]= fre[i][j]/(double)ntips;
	     if(fre[i][j]!=0 && fre[i][j]<cutoffLevel)  signal=1;
	     if(fre[i][1]==0) signal=1; //non-polymorphic
	  }
	  if(signal==0)
	  {nmarkers++; RealMarkerPosit.push_back(MarkerPosit[i]); intmapdis.push_back(i);}
   }    
   if(nmarkers==0)  return 0;

   vector<int>::iterator ii;
   RealSampleSeqMatr = new int*[ntips];
   if(RealSampleSeqMatr==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   for(int j=0; j<ntips; j++) { RealSampleSeqMatr[j]=new int[numMutLoci]; 
				if(RealSampleSeqMatr[j]==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}}
   int p=0;
   for(ii=intmapdis.begin(); ii!=intmapdis.end(); ii++)
   {
       for(int j=0; j<ntips; j++)
       {RealSampleSeqMatr[j][p]= SampleSeqMatrix[j][(*ii)]; }
        p++;
    }
    for(int i=0; i<numMutLoci; i++) {delete [] fre[i];} delete [] fre;
    return nmarkers;
}

void Graph::penetrance(long* seedptr)
{
   double f_DD=fDD, f_DN=fDN, f_NN=fNN;	
   double fdis = currentDisFreq(); double fnorm = 1-fdis;
   double prob_dis_3genotypes = (f_DD*fdis*fdis) + (f_DN*2*fdis*fnorm)
                                  + (f_NN*fnorm*fnorm);
   double prob_norm_3genotypes = (1-f_DD)*fdis*fdis + (1-f_DN)*2*fdis*fnorm
	                          + (1-f_NN)*fnorm*fnorm;

   //probability that genotype is DD, given disease phenotype
   double* prob_geno_givenDis=new double[3];  num_geno_givenDis=new int[3];
   if(prob_geno_givenDis==NULL || num_geno_givenDis==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   prob_geno_givenDis[0] = (f_DD*fdis*fdis)/prob_dis_3genotypes;  //DD 
   prob_geno_givenDis[1] = (f_DN*2*fdis*fnorm)/prob_dis_3genotypes; //DN
   prob_geno_givenDis[2] = (f_NN*fnorm*fnorm)/prob_dis_3genotypes;  //NN

   //probability that genotype is DD, given normal
   double* prob_geno_givenNorm=new double[3];  num_geno_givenNorm=new int[3];
   if(prob_geno_givenNorm==NULL || num_geno_givenNorm==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   prob_geno_givenNorm[0] = (1-f_DD)*fdis*fdis/prob_norm_3genotypes;  //DD
   prob_geno_givenNorm[1] = (1-f_DN)*2*fdis*fnorm/prob_norm_3genotypes; //DN
   prob_geno_givenNorm[2] = (1-f_NN)*fnorm*fnorm/prob_norm_3genotypes;  //NN

   //sample number of individuals with each genotype, given disease phenotype 
   multinomial(numDisIndiv, prob_geno_givenDis, 3, seedptr, num_geno_givenDis);

   //sample number of individuals with each genotype, given normal phenotype
   multinomial(numNormIndiv, prob_geno_givenNorm, 3, seedptr, num_geno_givenNorm);
  
   //specify number of disease-carrying chromosomes and normal chromosomes 
   numDis = 2*num_geno_givenDis[0] + num_geno_givenDis[1]
	   	     	+2*num_geno_givenNorm[0] + num_geno_givenNorm[1];
   numNormal = num_geno_givenDis[1] + 2*num_geno_givenDis[2]
	   		+num_geno_givenNorm[1] + 2*num_geno_givenNorm[2];

   delete [] prob_geno_givenDis; delete [] prob_geno_givenNorm;
}

//just store the number of each haplotype in realseqmatr 
void Graph::getGenotypeSample(int* genotypeDataNum, long* seedptr)
{
   //normal haplotypes: 0 -- (numNormal-1), 
   //disease haplotypes: numNormal--(numNormal+numDis-1),  
   //order: DD_normal, DN_normal, NN_normal, DD_disease, DN_disease, NN_disease 
   int k_dis=numNormal; int k_norm = 0;
   int* order = new  int[numNormal+numDis];
   if(order==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   ranOrder(order, seedptr);

   int i=0; int m=0;
   while(i<num_geno_givenNorm[0]*2) //DD_normal
   {  genotypeDataNum[m++] = order[k_dis++]; i++;}

   while(i<num_geno_givenNorm[0]*2+num_geno_givenNorm[1]*2) //DN_normal
   { genotypeDataNum[m++] = order[k_dis++]; genotypeDataNum[m++] = order[k_norm++]; i+=2;}

   while(i<numNormIndiv*2)  //NN_normal
   {   genotypeDataNum[m++] = order[k_norm++]; i++;}

   while(i<numNormIndiv*2+num_geno_givenDis[0]*2)  //DD_disease
   {  genotypeDataNum[m++] = order[k_dis++]; i++;}

   while(i<numNormIndiv*2+num_geno_givenDis[0]*2+num_geno_givenDis[1]*2) //DN_disease
   { genotypeDataNum[m++] = order[k_dis++]; genotypeDataNum[m++] = order[k_norm++]; i+=2;}

   while(i<numNormIndiv*2 + numDisIndiv*2)  //NN_disease
   { genotypeDataNum[m++] = order[k_norm++];i++;}
   delete [] order;
}

//randomly arrange order of haplotypes within disease group and normal group
//retrun an array of numbers: all normal haplotypes first, then disease haplotypes
void Graph::ranOrder(int* order, long* seedptr)
{
    for(int i=0; i<numDis+numNormal; i++) order[i]=i;
    //reshuffle 
    for(int i=0; i<(numDis+numNormal)*100; i++)
    {
	int num1 = random2(numNormal, seedptr);
	int num2 = random2(numNormal, seedptr);
	int tmp = order[num1]; order[num1] = order[num2]; order[num2]=tmp;
	if(numDis>1)
	{
	   num1 = random4(numNormal, numNormal+numDis, seedptr);
	   num2= random4(numNormal, numNormal+numDis, seedptr);
	   tmp = order[num1]; order[num1] = order[num2]; order[num2]=tmp;
	}
    }
}

//calculate absolute value of D', also suitable for multiple alleles
void Graph::linkagedis(int** realseqmatr, int nmarkers, int nnorm, int ndis, double* D_prime)
{
    //allele frequencies for each marker in normal sample and disease sample    
   double** fre=new double*[nmarkers];
   double** fre_norm=new double*[nmarkers];
   double** fre_dis=new double*[nmarkers];
   if(fre==NULL || fre_norm==NULL || fre_dis==NULL)
    { cerr<<"Out of memory!"<<endl; exit(-1);}
   for(int i=0; i<nmarkers; i++)
   {
      fre[i]=new double[MAXALLE]; fre_norm[i]=new double[MAXALLE];  fre_dis[i]=new double[MAXALLE];
      if(fre[i]==NULL || fre_norm[i]==NULL || fre_dis[i]==NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}	
      for(int j=0; j<MAXALLE; j++) {fre[i][j]=0; fre_norm[i][j]=0; fre_dis[i][j]=0; }
   }

   //number of alleles at each locus, mostly it should be 2 for SNPs.
   int* maxallele = new int[nmarkers];
   if(maxallele == NULL) { cerr<<"Out of memory!"<<endl; exit(-1);}
   for(int i=0; i<nmarkers; i++) maxallele[i] = 2;

    for(int i=0; i<nmarkers; i++)
    {
       for(int j=0; j<nnorm; j++)
       {
          int tmp= realseqmatr[j][i]-1; fre_norm[i][tmp] +=1; fre[i][tmp] +=1;
          tmp++; if(tmp>maxallele[i]) maxallele[i]=tmp;
        }
       for(int j=nnorm; j<ndis+nnorm; j++)
       {
          int tmp= realseqmatr[j][i]-1; fre_dis[i][tmp] +=1; fre[i][tmp] +=1;
          tmp++; if(tmp>maxallele[i]) maxallele[i]=tmp;
       }
   }

   for(int i=0; i<nmarkers; i++)
   {
        for(int j=0; j<maxallele[i]; j++)
        {
           fre_norm[i][j] = fre_norm[i][j]/(ndis+nnorm);  fre_dis[i][j]= fre_dis[i][j]/(ndis+nnorm);
           fre[i][j]= fre[i][j]/(ndis+nnorm);
        }
   }
   //cout<<"Marginal allele frequencies:\n";
   //for(int i=0; i<nmarkers; i++)
   //{ cout<<fre[i][0]<<"\t"<<fre[i][1]<<endl;}

   double disf= double(ndis)/(double(ndis)+double(nnorm));
   double normf = 1-disf;
   //calculate LD for each marker
   for(int i=0; i<nmarkers; i++)
   {
      double Dp_sum=0;
      for(int j=0; j<maxallele[i]; j++)
      {
         double D; double D_max;
         //disease
         D = fre_dis[i][j]-fre[i][j]*disf;
         if(D<0)
         {
             if(disf*fre[i][j]< (1-disf)*(1-fre[i][j])) D_max=disf*fre[i][j];
             else D_max=(1-disf)*(1-fre[i][j]);
         }
         else
         {
            if(disf*(1-fre[i][j])<(1-disf)*fre[i][j])  D_max=disf*(1-fre[i][j]);
            else D_max=(1-disf)*fre[i][j];
         }
         Dp_sum += disf*fre[i][j]*fabs(D)/D_max;

         //normal
         D = fre_norm[i][j]-fre[i][j]*normf;
         if(D<0)
         {
             if(normf*fre[i][j]< (1-normf)*(1-fre[i][j])) D_max=normf*fre[i][j];
             else D_max=(1-normf)*(1-fre[i][j]);
         }
         else
         {
            if(normf*(1-fre[i][j])<(1-normf)*fre[i][j])  D_max=normf*(1-fre[i][j]);
            else D_max=(1-normf)*fre[i][j];
         }
         Dp_sum += normf*fre[i][j]*fabs(D)/D_max;
     }//end of maxallele
    D_prime[i]= Dp_sum;
   }
}




