#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>

/*
   Program "hwmetc2.c"          (15/05/97)
   Updated for ANSI C; 18/07/00   - to compile type  cc hwmetc2.c -lm

   Karen L. Ayres               
   University of Reading 
   e-mail: k.l.ayres@reading.ac.uk 
   
   Metropolis-Hastings algorithm for obtaining a sample from the
   posterior distribution of the allele frequencies and the inbreeding
   coefficient f, for the Hardy-Weinberg inbreeding model
   (e.g. Malecot, 1969):

   p_ii=p_i(f+(1-f)p_i)
   p_ij=2p_ip_j(1-f)

   The method is detailed in Ayres & Balding, Heredity 1998
	
   Assumes constancy of f across loci, and so combines information
   from more than one locus (joint likelihood equals multiple of
   individual likelihoods).

   Only allows for positive values of f, which may be reasonable if
   consanguinity is thought to be the main cause of departure from HW,
   since consanguinity should lead to similar values of f across loci,
   and result in positive f.

   Reads from input file "hwmet.inp", which is a data file
   whose first line contains the integer 1, to represent that the
   resulting data is that for the first locus, and whose second line
   CONTAINS ONLY AN INTEGER WHICH IS THE NUMBER OF ALLELES IN THE
   RESULTING DATA SET - this is stored in "nall[loc]".
   The following lines in the input file should consist of a "nall[loc]"
   by "nall[loc]" matrix of observed genotype counts - counts for
   homozygous genotypes are on the diagonal, heterozygous genotypes
   are off the diagonal. The next line should then contain a 2, followed
   by the same format as for locus 1 etc. e.g.
   1 2
   12 34
   0 7
   2 2
   21 17
   0 19

   Outputs "f" parameter values to file "hwmet.out", and information
   about the implementation to "hwmet.stats".

   The observed genotype counts are stored in "n[loc][dim1]".

   The p[loc][i] each have a Dirichlet prior (independent for each
   locus) with parameters "pprior[loc][i]" (setting all pprior=1 
   gives a multivariate uniform prior) and f has a standard Beta(bpa,bpb) 
   prior, independent of all p's. All prior parameters should be 
   defined at the beginning of the program.

   A Metropolis-Hastings algorithm is run for "numit" iterations,
   the first "discard" of which are discarded as the burn-in.
   Subsequently, every "kp"th iteration is output.
   Note that "numit" is determined by the values of "kp", "discard"
   and "nout", where "nout" is the number of values desired to be
   output.

   The limiting bounds "epsln" and "epsln2" (>0) can be changed as 
   necessary in order to ensure reasonable acceptance rates.
   
   Density curves can be produced within a suitable statistical computer
   package by applying density estimation to the output from this
   program.
*/

#define epsln 0.01  /* limit on amount to update p's by (>0) */
#define epsln2 0.02  /* limit on amount to update f by (>0) */
#define bpa 1.0 /* parameter "alpha" for beta prior of f */
#define bpb 1.0 /* parameter "beta" for beta prior of f */

#define maxloc 6  /* maximum number of loci allowed */
#define maxall 15  /* maximum number of alleles allowed at a locus */
#define kp 50     /* interval between successive output values */
#define discard 5000L    /* "burn-in" value of the chain */
#define nout 10000L     /* number of output values required */
#define dpp 1.0 /* parameters for Dirichlet prior of each {p[loc][i]} */
#define numit (nout*kp+discard) /* number of iterations of alg. */
#define acrt (numit/50)  /* interval between output of acceptance rates */
#define dim1 ((maxall*(maxall+1))/2)

typedef double pvec[maxloc][maxall];

int     i, j, nn, loc, k[maxloc], nsam[maxloc], nall[maxloc], ngen[maxloc], nloc;
int     x[maxloc][maxall][maxall], n[maxloc][dim1], incp[maxloc], decp[maxloc];
long    l1, jmp, ix, iy, iz;
double  y, a1[maxloc], a2[maxloc], b1[maxloc], b2[maxloc], b3, b4, a3, a4;
double  r, f, newf, z, newz, genf[dim1];
pvec    p, newp, pprior;
FILE    *inp, *out1, *out2;

double WichHill()
{
/* Wichmann & Hill random number generator (Algorithm AS183 in Applied
   Statistics 31 (1982)) 
*/

ix=(171*ix)%30269;
iy=(172*iy)%30307;
iz=(170*iz)%30323;
return fmod((ix/30269.0)+(iy/30307.0)+(iz/30323.0),1.0);
}

void inpdat()
{
out1=fopen("hwmet.stats","w");
fprintf(out1,"Number of iterations %8ld\n",numit);
inp=fopen("hwmet.inp","r");

while ((nn=fscanf(inp,"%d",&nloc))!=EOF){
	fscanf(inp,"%d",&nall[nloc-1]);

/* Check for invalid input file */

	if (nloc>maxloc){
		fprintf(out1,"ERROR - TOO MANY LOCI - CHANGE MAXLOC\n");
		exit(1);
	}
	if (nall[nloc-1]>maxall){
		fprintf(out1,"ERROR - TOO MANY ALLELES - CHANGE MAXALL\n");
		exit(1);
	}

	ngen[nloc-1]=(nall[nloc-1]*(nall[nloc-1]+1))/2;
	for (i=0;i<nall[nloc-1];++i){
		for (j=0;j<nall[nloc-1];++j){
			fscanf(inp,"%d",&x[nloc-1][i][j]);
			nsam[nloc-1]+= x[nloc-1][i][j];
			k[nloc-1]+= x[nloc-1][i][j]+x[nloc-1][j][i];
		}
	}
}

/* observed genotype counts, using genotype identifier for ij: i+(j*(j+1))/2
   where i<=j (modified for i=0,...,k-1 from that given for i=1,...,k in
   Zaykin, Zhivotovsky & Weir (1995) - Genetica 96)
*/

for (loc=0;loc<nloc;++loc){
	for (i=0;i<nall[loc];++i){
		for (j=i;j<nall[loc];++j){
			n[loc][i+(j*(j+1))/2]=x[loc][i][j];
			if (i!=j){
				n[loc][i+(j*(j+1))/2]+= x[loc][j][i];
			}
		}
	}
}

/* set Dirichlet prior parameters for the p[loc][i] (>0) */

for (loc=0;loc<nloc;++loc){
	for (i=0;i<nall[loc];++i){
		pprior[loc][i]=dpp;
	}
}

/* output statistics to file */

fprintf(out1,"Beta prior parameters for f - alpha= %f  beta= %f\n",bpa,bpb);
fprintf(out1,"Dirichlet prior parameters for all alleles, at all loci - %8.6f\n",dpp);
fprintf(out1,"\nSample sizes:\n");
for (loc=0;loc<nloc;++loc){
	fprintf(out1,"Locus %3d, nsam= %6d\n",loc+1,nsam[loc]);
}
fprintf(out1,"\nAcceptance rates\n");

fclose(inp);
fflush(out1);
}

double lbetad(double fs)
{
/* evaluate log of the prior density for "fs" (excluding normalizing
   constant)
*/

return ((bpa-1.0)*log(fs)+(bpb-1.0)*log(1.0-fs));
}

double ldiricd(int l,pvec ps)
{
/* evaluate log of prior density for the allele frequency vector "ps" at
   locus "l" (excluding normalizing constant)
*/

double ldd=0.0;

for (i=0;i<nall[l];++i){
	ldd+= (pprior[l][i]-1.0)*log(ps[l][i]);
}
return ldd;
}

double rstgam(double alpha1)
{
/* Cheng & Feast algorithm (algorithm 3.20 in Ripley 1988) for
   generating a standard Gamma random variable with shape
   parameter alpha>1 
*/

int     acpt;
double  c1, c2, c3, c4, c5, u1, u2, w;

c1=alpha1-1.0;
c2=(alpha1-(1.0/(6.0*alpha1)))/c1;
c3=2.0/c1;
c4=c3+2.0;
c5=1.0/sqrt(alpha1);
do{
	do{
		u1=WichHill();
		u2=WichHill();
		if (alpha1>2.5){
			u1=u2+c5*(1.0-(1.86*u1));
		}
	}
	while (u1>=1.0 || u1<=0);

	w=(c2*u2)/u1;
	if ((c3*u1+w+(1.0/w))<=c4){
		acpt=1;
	}
	else if ((c3*log(u1)-log(w)+w)>=1.0){ /* repeat outer "do" loop */
		acpt=0;
	}
	else{
		acpt=1;
	}
	}
while (acpt!=1);

return c1*w;
}

void rdiric(pvec pars)
{
/* Generates "nloc" vectors of Dirichlet random variables with parameters in
   "pars", and stores in "pars" 
*/

int     np;
double  gsum;

for (loc=0;loc<nloc;++loc){
	gsum=0.0;
	for (np=0;np<nall[loc];++np){
		if (pars[loc][np]>1.0){
			pars[loc][np]=rstgam(pars[loc][np]);
		}
		else if (pars[loc][np]<1.0){    /* Ex. 3.22 in Ripley(1988) for alpha<1 */
			pars[loc][np]=rstgam(pars[loc][np]+1.0)*pow(WichHill(),1.0/pars[loc][np]);
		}
		else {    /* Exponential */
			pars[loc][np]= (-log(WichHill()));
		}
		gsum+= pars[loc][np];
	}
	for (np=0;np<nall[loc];++np){
		pars[loc][np]=pars[loc][np]/gsum;
	}
}
}

double rgam(double pp)
{
/* returns a standard Gamma random variable, with shape parameter "pp" */

double rgm;

if (pp>1.0){
	rgm=rstgam(pp);
}
else if (pp<1.0){    /* Ex. 3.22 in Ripley(1988) for alpha<1 */
	rgm=rstgam(pp+1.0)*pow(WichHill(),1.0/pp);
}
else {    /* Exponential */
	rgm= (-log(WichHill()));
}
return rgm;
}

double llkhd(int l,double fs,pvec ps)
{
/* evaluates the log of the joint likelihood of "fs" and "ps" for
   locus "l" (exlcuding multinomial constant)
*/

double  lld=0.0;
for (i=0;i<nall[l];++i){
	for (j=i+1;j<nall[l];++j){
		genf[i+(j*(j+1))/2]=2.0*(1-fs)*ps[l][i]*ps[l][j];
	}
}
for (i=0;i<nall[l];++i){
	genf[(i*(i+3))/2]=ps[l][i]*(fs+(1-fs)*ps[l][i]);
}

/* log likelihood */

for (i=0;i<ngen[l];++i){
	if (n[l][i]!=0){
		lld+= n[l][i]*log(genf[i]);
	}
}
return lld;
}

main()
{
srand((unsigned) time(NULL));
out2=fopen("hwmet.out","w");

/* initialize seeds for random number generator */

ix=(long)(rand()%30000)+1;
iy=(long)(rand()%30000)+1;
iz=(long)(rand()%30000)+1;

inpdat();

/* initialize "p" and "f" values */

for (loc=0;loc<nloc;++loc){
	for (i=0;i<nall[loc];++i){
		p[loc][i]=1.0;
	}
}
rdiric(p);
f=rgam((double)bpa);
f=f/(f+rgam((double)bpb));

/* initialize density value "z" */

z=lbetad(f);
for (loc=0;loc<nloc;++loc){
	z+= ldiricd(loc,p)+llkhd(loc,f,p);
}

/* begin Metropolis-Hastings algorithm */

for (l1=1;l1<=numit;++l1){

/* choose new candidate values "newp" and "newf" */

	for (loc=0;loc<nloc;++loc){
		incp[loc]=(int)(WichHill()*nall[loc]);
		do {
			decp[loc]=(int)(WichHill()*nall[loc]);
		}
		while (decp[loc]==incp[loc]);

		b1[loc]=(p[loc][incp[loc]]+epsln<p[loc][incp[loc]]+p[loc][decp[loc]])?p[loc][incp[loc]]+epsln:p[loc][incp[loc]]+p[loc][decp[loc]];
		if (b1[loc]>1.0){  /* can only occur through rounding error */
			b1[loc]=1.0;
		}
		a1[loc]=(p[loc][incp[loc]]-epsln>0)?p[loc][incp[loc]]-epsln:0;
		for (i=0;i<nall[loc];++i){
			newp[loc][i]=p[loc][i];
		}
		newp[loc][incp[loc]]= (b1[loc]-a1[loc])*WichHill()+a1[loc];
		newp[loc][decp[loc]]-= newp[loc][incp[loc]]-p[loc][incp[loc]];
	}
	a3=(0>f-epsln2)?0:f-epsln2;
	b3=(1<f+epsln2)?1:f+epsln2;
	newf=a3+WichHill()*(b3-a3);

/* evaluate density "newz" at candidate values */

	newz=lbetad(newf);
	for (loc=0;loc<nloc;++loc){
		newz+= ldiricd(loc,newp)+llkhd(loc,newf,newp);
	}

/* decide whether or not to accept candidate values */

	for (loc=0;loc<nloc;++loc){
		b2[loc]=(newp[loc][incp[loc]]+epsln<newp[loc][incp[loc]]+newp[loc][decp[loc]])?newp[loc][incp[loc]]+epsln:newp[loc][incp[loc]]+newp[loc][decp[loc]];
		if (b2[loc]>1.0){  /* can only occur through rounding error */
			b2[loc]=1.0;
		}
		a2[loc]=(newp[loc][incp[loc]]-epsln>0)?newp[loc][incp[loc]]-epsln:0;
	}

	a4=(0>newf-epsln2)?0:newf-epsln2;
	b4=(1<newf+epsln2)?1:newf+epsln2;

	r=newz-z+log(b3-a3)-log(b4-a4);
	for (loc=0;loc<nloc;++loc){
		r+= log(b1[loc]-a1[loc])-log(b2[loc]-a2[loc]);
	}

	if (log(WichHill())<r){ /* accept candidate values */
		z=newz;
		for (loc=0;loc<nloc;++loc){
			for (i=0;i<nall[loc];++i){
				p[loc][i]=newp[loc][i];
			}
		}
		f=newf;
		jmp++;
	}

/* output */

	if (((l1/kp)*kp==l1) && (l1>discard)){
		fprintf(out2,"%10.6f\n",f);
		fflush(out2);
	}

	if ((l1/acrt)*acrt==l1){     /* output acceptance rate */
		fprintf(out1,"%12.6f\n",((double)(jmp))/l1);
		fflush(out1);
	}
}

fclose(out1);
fclose(out2);
return 0;
}
