/* This program calculates the multilocus gene diversity by maximum
likelihood method, \hat D, and by UMVUE method, \tilde D.
It also generates B number of bootstrap samples and estimates the
variances of these estimators along with 100(1-\alpha)% bootstrap
confidence interval.

The exucutable file is called mult and it needs a seed number
to be typed at the screen.

The input file should have number of loci, NL, the number of
alleles at the first locus, m_1, and the counts of different genotypes
at the first loucs, the number of alleles at the second locus, m_2, the
counts of different genotypes at the second loucs and so on. 
Also it should have the number of bootstrap samples desired B, and
the \alpha for 100(1-\alpha)% bootstrap confidence interval.

The input file would look like, and is called genotype.file
--------------
NL
m_1
N[11] 
N[12] N[22]
N[13] N[23] N[33]
..................

......................

N[1m_1] N[2m_1] N[3m_1] .........N[m_1m_1]
m_2
N[11] 
N[12] N[22]
N[13] N[23] N[33]
..................

......................
N[1m_2] N[2m_2] N[3m_2] .........N[m_2m_2]
................
................
................
m_NL
N[11] 
N[12] N[22]
N[13] N[23] N[33]
..................

......................
N[1m_NL] N[2m_NL] N[3m_NL] .........N[m_NLm_NL]
B
alpha
---------------
This program is written to obtain the gene diversity using
estimation methods proposed in the paper Shete S. (2002),
Submitted for publication.
---------------
The output file is called diversity.out and would look like:
(except for the commas)

\hat D               \tilde D             << For Locus 1
\hat D               \tilde D             << For Locus 2
......               .......              .......
......               .......              .......
......               .......              .......
\hat D               \tilde D             << For Locus NL

\hat D               \tilde D             << Final D value (Averaging over all loci)
\hat[S.D.(\hat D)]   \hat[S.D.(\tilde D]  << S. D. of final D
L(\hat D)            L(\tilde D)          << Lower CI for D
U(\hat D)            U(\tilde D)          << Upper CI for D
----------------------
OUTPUT: The first column of the output is for MLE and
the second column of the output is for UMVUE.
First column:

MLE of D, 
estimated standard deviation of MLE of D,
lower limit for 100(1-\alpha)% bootstrap CI for D based on MLE, 
upper limit for 100(1-\alpha)% bootstrap CI for D based on MLE.

Second column:

UMVUE of D, 
estimated standard deviation of UMVUE of D,
lower limit for 100(1-\alpha)% bootstrap CI for D based on UMVUE, 
upper limit for 100(1-\alpha)% bootstrap CI for D based on UMVUE.

---------------
07/17/01
Sanjay Shete 
Last update 07/20/01 5:00 pm.
Last update 03/07/02 4:00 pm.
*/

#include <stdlib.h>
#include <stdio.h>
#include <iostream.h>
#include <fstream.h>  
#include <math.h>
#include <malloc.h>
#include <limits.h>
#include "matrix.h"
#include <vector>
#include <list>
using namespace std;

int Binomial(int, long double);
void Multinomial(int, int, matrix<long double>*);
void Estimate(int, int, matrix<int>*, long double*, long double*);

extern "C" double drand48(void);
extern "C" void srand48(long int seed);

  int NL;

  matrix<int>* O;
  matrix<long double>* Q;

  vector<matrix<int>*> mvO;
  vector<matrix<long double>*> mvQ;

main()
{ 
 ifstream getit;
 getit.open ("genotype.file", ios::in);

 ofstream putit;
 putit.open("diversity.out", ios::out);

 long int seed;
 int i,j,k,B;

 cout <<"Enter seed value = "<<"\n";
 cin >> seed;
 srand48 (seed);

  getit >> NL;
  int m[NL+1];
  int N[NL+1];
  long double Dhat[NL+1], Dtilde[NL+1], alpha;

  for (i=1; i<=NL; i++) {
    getit >> m[i];
    N[i] = 0;
    O = new matrix<int>(m[i]+1);
    for (j=1; j<=m[i]; j++) {
      for (k=1; k<=j; k++) {
	getit >> (*O)[k][j];
        N[i] += (*O)[k][j];
	(*O)[j][k] = (*O)[k][j];
      }
    }
    mvO.push_back(O);
  }
  Dhat[0] = 0.0;
  Dtilde[0] = 0.0;
  i=1;
  for (vector<matrix<int>*>::iterator vi=mvO.begin(); vi != mvO.end(); vi++) {

    long double tmp = 0.0;
    Q = new matrix<long double>(m[i]+1);
    for (j=1; j<=m[i]-1; j++) {
      for (k=j; k<=m[i]; k++) {
	long double const1 = (**vi)[j][k]/(long double) N[i];
        (*Q)[j][k] = const1/(1.0-tmp);
       	tmp += const1;
      }
    }
    mvQ.push_back(Q);
    Estimate(N[i],m[i],*vi,&Dhat[i],&Dtilde[i]);
    putit <<Dhat[i]<<"       "<<Dtilde[i]<<"\n";
    Dhat[0] += Dhat[i];
    Dtilde[0] += Dtilde[i];

    i++;
    delete *vi;
  }
    Dhat[0] /= (long double) NL;
    Dtilde[0] /= (long double) NL;
    if (NL>1) {
    putit <<Dhat[0]<<"       "<<Dtilde[0]<<"\n";}

  mvO.resize(0);
  getit >> B;
  getit >> alpha;
  list<long double> DBhat;
  list<long double> DBtilde;

 /* Bootstrapping starts next */

 for (int ii=1; ii<=B; ii++) {
   long double tmp1 = 0.0;
   long double tmp2 = 0.0;
 i=1;
  for (vector<matrix<long double>*>::iterator viQ=mvQ.begin(); viQ != mvQ.end(); viQ++) {
    O = new matrix<int>(m[i]+1);
    Multinomial(N[i],m[i],*viQ);
    mvO.push_back(O);
    Estimate(N[i],m[i],O,&Dhat[i],&Dtilde[i]);
    tmp1 += Dhat[i];
    tmp2 += Dtilde[i];
    i++;
  }
    DBhat.insert(DBhat.end(), tmp1/NL);
    DBtilde.insert(DBtilde.end(), tmp2/NL);
      }

 DBhat.sort();
 DBtilde.sort();
 
 long double DBVhat = 0.0;
 long double DBVtilde = 0.0;
for (list<long double>::iterator li=DBhat.begin(); li != DBhat.end(); li++)
  { 
  DBVhat += ((*li)-Dhat[0])*((*li)-Dhat[0]);}
  DBVhat /= (long double) (B-1);
  putit <<pow(DBVhat,0.5) <<"     ";
for (list<long double>::iterator li=DBtilde.begin(); li != DBtilde.end(); li++)
   { 
  DBVtilde += ((*li)-Dtilde[0])*((*li)-Dtilde[0]);}
  DBVtilde /= (long double) (B-1);
  putit <<pow(DBVtilde,0.5)<<"\n";

 list<long double>::iterator li=DBhat.begin();
   for (i=0; i<(int) B*alpha-1; i++) {li++;}
   putit <<*li<<"      ";

   li=DBtilde.begin();
   for (i=0; i<(int) B*alpha-1; i++) {li++;}
   putit <<*li<<"\n";

 list<long double>::reverse_iterator ri=DBhat.rbegin();
   for (i=0; i<(int) B*alpha-1; i++) {ri++;}
   putit <<*ri<<"      ";

   ri=DBtilde.rbegin();
   for (i=0; i<(int) B*alpha-1; i++) {ri++;}
   putit <<*ri<<"\n";
}

int Binomial(int L, long double q)
{/*generates sample from Bionomial distribution with parameters L and q. */

  int i, j;
  j=0;
  for (i=1; i<=L; i++)
   { if ( (drand48()) < q) j++;}
  return(j);
}

void Multinomial(int N, int m, matrix<long double>* cmvQ)
{
/*next we generate sample from multinomial distribution
 with parameters N[i] and P[11]..P[1m] P[22]..P[2m] .. P[mm]
 Hear these P[i][j] parameters are estimated above. The mvQ
 matrix is obtained earlier so that the counts are just Binomial. */

 int i,j;
 int T, K; 

 long double tmp;
 tmp = 0.0;

 T = N;
 K = 0;
 for (i=1; i<= m-1; i++)
    { for (j=i; j<= m; j++)
  { (*O)[i][j] = Binomial(T,(*cmvQ)[i][j]);
    (*O)[j][i] = (*O)[i][j];
    T -= (*O)[i][j];
    K += (*O)[i][j];
  }
    }
 (*O)[m][m] = N-K;
}

void Estimate(int N, int m, matrix<int>* vi, long double *Dhat, long double *Dtilde)
{
int sumNii, sumNij;
long double sumpi2;
int i,j;

long double p[m+1];

sumNii = 0;
sumpi2 = 0.0;

for (i=1; i<=m; i++)
   {sumNij = 0;
    for (j=1; j<=m; j++)
       {if(j != i) {sumNij += (*vi)[i][j];}
       }
    p[i] = ((long double) (*vi)[i][i] + sumNij/2.0)/(long double) N;
    sumNii += (*vi)[i][i];
    sumpi2 += p[i]*p[i];
   }

 (*Dhat) = 1.0 - sumpi2;
 (*Dtilde) = (N*(*Dhat)-0.5+ (long double) sumNii/(2*N))/(N-1);
}
