program diseqsim(input,output);

const
   maxsnp = 1000; (* Maximum number of SNPs in the whole region *)
   maxhap = 30000;  (* maximum number of chromosomes in the population at one time *)
   maxrare = 500;  (* Maximum number of rare SNP variants on one haplotype *)
   maxexon = 1;   (* Maximum number of exons per gene *)
   maxreg = 1;    (* Maximum number of regulatory regions per gene *)
type
   real = double;
var
   subdivision,hotspot:boolean;
   startsub,endsub,minsize:integer;
   expgen,mhsmean:integer; 
   mhsrate,hsprob,hspct,mixpar,x,y:real;
   hsstart,hsmean,seed1,seed2,seed3:integer;
   snpold:array[1..maxhap,0..maxrare] of integer;
   snpnew:array[1..maxhap,0..maxrare] of integer;
   numsnps,kkkkk,kkkk,genelength:integer;
   minpopsize,maxpopsize:integer;
   exon:array[1..maxexon,1..2] of integer;
   regulatory:array[1..maxreg,1..2] of integer;
   mr,mutrate: double;
tmpt,snpfreq:real;
   mhs:array[0..maxsnp] of integer;
   gcrate:real;
   gclength:integer;
   recrate: real;
   selection:array[1..maxsnp] of real;
   orighap,numhap, numhapnew:integer;
   numgen:integer;
   outfl,seed:text;
   model:text;
function randomnum:real;
{
   This function generates independent uniform deviates on the interval
(0,1).  See the reference:  Wichman BA, Hill ID (1982).  Algorithm 183:
an efficient and portable pseudo-random number generator.  Applied
Statistics 31,188-190.
   Global variables required:  seed1 seed2 seed3  (integer)
Seed1, seed2 and seed3 should be set to integer values before the first
entry.  Each call to randomnum updates the 3 seeds.  Integer arithmetic
up to 30323 is needed on your computer.
}
var
 r:real;
begin {randomnum}
 seed1:=171*(seed1 mod 177)-2*(seed1 div 177);
 seed2:=172*(seed2 mod 176)-35*(seed2 div 176);
 seed3:=170*(seed3 mod 178)-63*(seed3 div 178);
 if (seed1<0) then seed1:=seed1+30269;
 if (seed2<0) then seed2:=seed2+30307;
 if (seed3<0) then seed3:=seed3+30323;
 r:=seed1/30269.0+seed2/30307.0+seed3/30323.0;
 randomnum:=r-trunc(r);
  {  RANDOM:=MOD(R,1.000)  Note: a mod b = a - ((a div b)*b  }
end;  {randomnum}
function simexp(m:real):real;
var
  tt:real;
       
begin
   tt:=randomnum;
   if tt < 0.0000000001 then tt:=0.0000000001;
   simexp:= -(m)*ln(tt);
end;

function simerlang(mean:real;df:integer):real;
   
var
  ttt:real;
  i:integer;
begin
  ttt:=0;
  for i:= 1 to df do
     ttt:=ttt+simexp((mean/df));
  simerlang:=ttt;
end;


procedure default;
var
xx:char;
pos,i,j:integer;

(*
function simselect(pos,numpos:integer);

begin
   tmp:=randomnum;
   if tmp < neutralmut then tmpp:=0 else
   if tmp < (neutralmut + stopmut) then tmp:=-0.5 else
   if tmp < (neutralmut + stopmut + mildnegmut) then tmp:= -0.05 else tmp:=0.05;
   simselect:=tmp;
end;
*)

begin
  open(model,'model.summary',history:=new);
  rewrite(model);
   reset(seed,'seed.dat');
   readln(seed,seed1,seed2,seed3);
   writeln('What is the length of the region to be simulated? ');
   readln(genelength);
   if genelength < 100 then genelength:=100;
numhap:= maxhap+1;
while numhap > maxhap do begin
   writeln('What is the mean population size? (<= ',(maxhap/2):1:0,' ) ');
   readln(numhap);
   if numhap < 10 then numhap:=10;
   numhap:=numhap*2;
   orighap:=numhap;
end;
   writeln('What is the minimum population size tolerable in any generation? ');
   readln(minpopsize);
   writeln('What is the maximum population size tolerable in any generation?  (<= ',(maxhap/2):1:0,') ');
   readln(maxpopsize);
   minpopsize:=minpopsize*2;
   maxpopsize:=maxpopsize*2;
writeln(model,'Gene Length = ',genelength:1,'bp');
writeln(model,'Population size in haplotypes: ');
writeln(model,'Mean = ',numhap:1);
writeln(model,'Minimum = ',minpopsize:1);
   if maxpopsize > maxhap then maxpopsize:=maxhap;
writeln(model,'Maximum = ',maxpopsize:1);
   for i:= 1 to maxpopsize do
     for j:= 0 to maxrare do
       snpold[i,j]:=0;
   writeln('What is the variance of the population size? ');
   readln(mixpar);
   mixpar:=mixpar/maxhap;
   mixpar:=sqrt(mixpar);
writeln(model,'Size in generation N = ',(1-mixpar):4:2,'(popsize) + ',mixpar:4:2,'(exp[mean=popsize]).');
   writeln('What is the probability of a recombination somewhere within this region per meiosis? ');
   readln(recrate);
 
   writeln('What is the probability that a hotspot of recombination exists within this region? ');
   readln(hsprob);
   if hsprob > 0 then begin
     writeln('What is the length of such a hotspot in bp? ');
     readln(hsmean);
     writeln('What percentage of all recombination events in this region happen in this hotspot?');
     readln(hspct);
   end;
   
if randomnum < hsprob then hotspot:=true else hotspot:=false;
if hotspot then hsstart:=round(randomnum*(genelength-hsmean));

if recrate < 0.000000001 then recrate:=0.000000001;

writeln(model,'Recombination rate per Mb per meiosis = ',((recrate*1000000/genelength)):10:6);
if hotspot then begin
    writeln(model,'This region has a hotspot of recombination between positions ',hsstart:1,' and ',(hsstart+hsmean):1,'bp.');
    writeln(model,(hspct*100):10:6,'% of all recombinations on the region occur in this hotspot.');
end else writeln(model,'There is NO hotspot of recombination in this region.');
writeln(model);
   writeln('What is the mutation rate per nucleotide per meiosis? ');
   readln(mutrate);
if mutrate < 0.0000000000001 then mutrate:=0.00000000000001;
writeln(model,'Mutation rate per nucleotide per meiosis = ',mutrate:10:20);
   writeln('What is the mean number of mutation hotspots in this region? ');
   readln(mhsmean);
   if mhsmean > 0 then begin
       writeln('What is the mutation rate at the hotspot positions? ');
       readln(mhsrate);
        pos:=0;
        mhs[0]:=0;
        while pos < genelength do begin
           pos:=pos + round(simexp(genelength/mhsmean));
           if pos < genelength then begin
              mhs[0]:=mhs[0]+1;
              mhs[mhs[0]]:=pos;
           end;
        end;
        if mhs[0] > 0 then begin
         writeln(model,' There are ',mhs[0]:1,' hotspots of mutation in this region, at positions ');
         for i:= 1 to mhs[0] do
            writeln(model,mhs[i]:10,'bp');
         writeln(model,'The mutation rate at the hotspots is ',(mhsrate/mutrate):10:2,' times higher than elsewhere. ');
        end else writeln(model,' There are no hotspots of mutation in this region. ');
   end else begin
      mhs[0]:=0;
      mhs[1]:=0;
      writeln(model,' There are no hotspots of mutation in this region. ');
   end;
   writeln(model);   



   writeln('What is the probability of a gene conversion event per meiosis? ');

   readln(gcrate);
   writeln('What is the average length (in bp) of gene conversion tracts? ');
if   gcrate < 0.0000000000001 then gcrate:=0.0000000000001;

   readln(gclength);
writeln(model,'Probability of Gene conversion per meiosis per Mb = ',((gcrate*1000000/genelength)):10:6);
writeln(model,'Distribution of g.c. tract lengths = exp(',gclength:1,')');
   writeln('How many generations of evolution do you desire?');
   readln(numgen);
writeln(model,numgen:1,' generations of random mating assumed.');
writeln('Would you like to have the population subdivide at some point in history?');
readln(xx);
if (xx='y') or (xx='Y') then subdivision:=true else subdivision:=false;
if subdivision then begin
   writeln('In what generation should the subdivision start?');
   readln(startsub);
   writeln('(In what generation should the subdivision end?');
   readln(endsub);
   writeln('How many individuals in the smaller of the two subdivided populations? ');
   readln(minsize);
   writeln(model,'Population subdivides from generation ',startsub:1,' to generation ',endsub:1,' with smaller population size = ',minsize:1,'.');
end;
   writeln('How many generations of exponential growth at the end of this period? ');
   readln(expgen);


   writeln('How many pairwise unassociated SNPs at beginning of simulation?');
   readln(numsnps);
  if numsnps > 0 then begin
   writeln('What is the average heterozygosity of the unassociated SNPs in first generation?');
   readln(snpfreq);
  end else snpfreq:=0;
if numsnps > 0 then begin
writeln(model,numsnps:1,' SNPs are segregating from the first generation.');
writeln(model,'The mean starting frequency of the rare SNP allele is ',snpfreq:6:4);
end else writeln(model,'The starting population was monomorphic at all sites!');


(* Gene structure and selection 


writeln('What is the mean number of exons per gene? ');
readln(exonmnum);
writeln('What is the mean exon length? ');
readln(exonmean);
writeln('What is the mean intron length? ');
readln(intronmean);
writeln('What is the mean number of regulatory regions? ');
readln(regumnum);
writeln('What is the mean length of regulatory regions? ');
readln(regumean);
neutralmut:=(576-438)/576;
stopmut:=24/576;
writeln('What percentage of amino acid substitutions are mildly advantageous? ');
readln(advpct);
writeln('What percentage of amino acid substitutions are mildly deleterious? ');
readln(delpct);
writeln('What percentage of amino acid substitutions are severely deleterious in single copy? ');
readln(sevpct);
neutralmut:=neutralmut+( (1-advpct-delpct-sevpct)*(1-neutralmut-stopmut));
stopmut:=stopmut+(1-neutralmut-stopmut)*sevpct;
mildposmut:=advpct*(1-neutralmut-stopmut);
mildnegmut:=delpct*(1-neutralmut-stopmut);

exonnum:=1;
pos:=0;
if exonmnum > 1 then    *********Number of Exons
 while pos < 1 do begin
   pos:=pos + simexp(1/(exonmnum -1 ));
   if pos < 1 then exonnum:=exonnum+1;
 end;
exontot:=0;
for i:= 1 to exonnum do begin   *********Selection for or against mutations in each exon
   exon[i,0]:=round(simexp(exonmean));
   for j:= 1 to exon[i,0] do exon[i,j]:=simselect(j,exonnum);
   exontot:=exontot+exon[i,0];
end;
remain:=genelength - exontot;  ***********How many bp are left in the region?
*)
close(model);
end;


procedure meiosis(num,cat,kkk:integer;expogrow:boolean);
var
   nnumhap,maxd,ll,mmm,recpt,i,j,pos,aa,m,min,kk,a,b,k,chr,len:integer;
   aaa:array[0..maxrare] of integer;

begin
if expogrow = true then begin
    a:=num;
    b:=round(randomnum*(nnumhap) + 0.499999999999999);
end else
   if cat <> 2 then begin
     nnumhap:=kkk;
     a:=round(randomnum*(nnumhap) + 0.499999999999999);                  (* Parental genotype choice *)
     b:=round(randomnum*(nnumhap) + 0.4999999999999999);
   end else begin
     nnumhap:= (orighap-kkk);
     a:=round(randomnum*(nnumhap) + 0.499999999999999);                  (* Parental genotype choice *)
     b:=round(randomnum*(nnumhap) + 0.4999999999999999);
     a:=a+kkk;
     b:=b+kkk;
   end;
if expogrow then chr:=a else
   if randomnum < 0.50 then chr:=a else chr:=b;     (* Segregation *)



   if randomnum < recrate then                      (* Recombination and Hot Spots *)
      if (not hotspot) or (randomnum > hspct) then  recpt:=round(randomnum*(genelength) + 0.49999999999999999) else 
         recpt:=hsstart + round(randomnum*(hsmean) + 0.49999999999999999999) else
      recpt:=genelength+10;

   i:=1;
   while (snpold[chr,i] > 0) and (snpold[chr,i] < recpt) do begin
      snpnew[num,i]:=snpold[chr,i];
      i:=i+1;
   end;
if recpt < genelength then begin
   if chr = a then chr:=b else chr:=a;
   j:=1;
   while (snpold[chr,j] > 0) do begin
      if snpold[chr,j] > recpt then begin
         snpnew[num,i]:=snpold[chr,j];
         i:=i+1;
      end;
      j:=j+1;
      snpnew[num,i]:=0;
   end;         
end else snpnew[num,i]:=0;   

   pos:=0;                                          (* Mutation *)

   while pos <= genelength do begin
         tmpt:=simexp(1/mutrate);
         if tmpt > genelength then tmpt:=genelength;
         pos:=pos + round(tmpt);
      if pos <= genelength then  begin
         snpnew[num,i]:=pos;
         snpnew[num,(i+1)]:=0;
         i:=i+1;
      end;
   end; 
  
   if mhs[0] > 0 then               (*Hot spots of mutation*)
     for mmm:= 1 to mhs[0] do    
      if randomnum < mhsrate then begin
         snpnew[num,i]:=mhs[mmm];
         snpnew[num,(i+1)]:=0;
         i:=i+1;
      end;

         





   if randomnum < gcrate then begin                (* gene conversion *)
      aa:=round(randomnum*genelength);
      len:=round (simexp((gclength)));
      if aa < recpt then if chr=a then chr:=b else chr:=a;
      j:=1;
      while (snpold[chr,j] > 0) do begin
         if (snpold[chr,j] > aa) and  (snpold[chr,j] < (aa+len)) then
            for k:= 1 to i do if snpnew[num,k] = snpold[chr,j] then begin
              kk:=k;
              while snpnew[num,(kk)] > 0 do  begin
                snpnew[num,kk]:=snpnew[num,(kk+1)];
                kk:=kk+1;
              end;
              
         end;
         j:=j+1;
      end;

      if chr=a then chr:=b else chr:=a;
      j:=1;  
      while (snpold[chr,j] > 0) do begin
         
         if (snpold[chr,j] > aa) and  (snpold[chr,j] < (aa+len)) then begin
            snpnew[num,i]:=snpold[chr,j];
            snpnew[num,(i+1)]:=0;
         end;
         j:=j+1;
      end;
   end; 

   for m:= 1 to i do begin                           (* Sort *)
    min:= genelength+1;
    kk:=0;
    for k:= 1 to i do
      if (snpnew[num,k] < min) and (snpnew[num,k] > 0) then begin
         min:=snpnew[num,k];
         kk:=k;
      end;
    if (min > 0) and (min < (genelength))then aaa[m]:=min else aaa[m]:=0;
    snpnew[num,kk]:=0;
   end;
   for k:= 1 to i do
      snpnew[num,k]:=aaa[k];
   maxd:=i;
   for k:= 2 to i do
      if (snpnew[num,k] > 0) and (snpnew[num,k] = snpnew[num,(k-1)]) then begin
         snpnew[num,(maxd + 1)]:=0;
         for ll:= (k) to maxd do
           snpnew[num,k] := snpnew[num,(k+1)];
         maxd:=maxd-1;
      end;

end;


procedure generation(gennum:integer);
var
  kk,ii,jj:integer;
  expos:boolean;
tt:real;
begin
if gennum > numgen then expos:=true else expos:=false;
if subdivision and ( gennum <= endsub) and (gennum >= startsub) then begin
   kk:=minsize;
   for ii:= 1 to kk do
      meiosis(ii,1,kk,expos);
   for ii:=(kk+1) to orighap do
      meiosis(ii,2,kk,expos);
   kk:=orighap;
end else begin
   if gennum > (numgen-1000) then kk:=orighap else
   kk:=maxhap+10;
   tt:=0.05 + 0.95*(numgen-gennum)/numgen;
   
   while ( kk > (orighap +(maxpopsize-orighap)*tt)) or (kk < (orighap - (orighap-minpopsize)*tt))do
begin
      kk:= round((1-mixpar)*numhap) + (round(simexp(mixpar*numhap)));
  end;
   for ii:= 1 to kk do begin 
       meiosis(ii,0,kk,expos);
   end;
end;
   for ii:= 1 to kk do
      for jj:= 0 to maxrare do
          snpold[ii,jj]:=snpnew[ii,jj];
   numhap:=kk;
end;


procedure simstart;

var
  pos,ff,i:integer;
  gf:real;

begin

  pos:=0;
  if numsnps > 0 then 
   while pos < genelength do begin
      pos:=pos+round(simexp(genelength/numsnps));
      gf:=0;
      while (gf < 0.0) or (gf > 0.5) do
         gf:= simexp(snpfreq);
      
      for i:= 1 to (numhap) do
         if randomnum < gf then begin
             ff:=1;
             while snpold[i,ff] > 0 do ff:=ff+1;
             snpold[i,ff]:=pos;
         end;
    end;

end;

procedure outhaps;
var
i,j:integer;
begin
   if kkkk = 5000 then
   open(outfl,'haplotypes.5000',record_length:=30000) else
if kkkk = 10000 then
   open(outfl,'haplotypes.10000',record_length:=30000) else
if kkkk = 15000 then
   open(outfl,'haplotypes.15000',record_length:=30000) else
if kkkk = 20000 then
   open(outfl,'haplotypes.20000',record_length:=30000) else
if kkkk = 25000 then
   open(outfl,'haplotypes.25000',record_length:=30000) else
if kkkk = 30000 then
   open(outfl,'haplotypes.30000',record_length:=30000) else
if kkkk = 35000 then
   open(outfl,'haplotypes.35000',record_length:=30000) else
if kkkk = 40000 then
   open(outfl,'haplotypes.40000',record_length:=30000) else
if kkkk = 45000 then
   open(outfl,'haplotypes.45000',record_length:=30000) else
if kkkk = 50000 then
   open(outfl,'haplotypes.50000',record_length:=30000) else
if kkkk = 55000 then
   open(outfl,'haplotypes.55000',record_length:=30000) else
if kkkk = 60000 then
   open(outfl,'haplotypes.60000',record_length:=30000) else
if kkkk = 65000 then    open(outfl,'haplotypes.65000',record_length:=30000) else
if kkkk = 70000 then
   open(outfl,'haplotypes.70000',record_length:=30000) else
if kkkk = 75000 then
   open(outfl,'haplotypes.75000',record_length:=30000) else
if kkkk = 80000 then
   open(outfl,'haplotypes.80000',record_length:=30000) else
if kkkk = 85000 then
   open(outfl,'haplotypes.85000',record_length:=30000) else
if kkkk = 90000 then
   open(outfl,'haplotypes.90000',record_length:=30000) else
if kkkk = 95000 then
   open(outfl,'haplotypes.95000',record_length:=30000) else
if kkkk = 100000 then
   open(outfl,'haplotypes.100000',record_length:=30000) else
   if kkkk = 105000 then
   open(outfl,'haplotypes.105000',record_length:=30000) else
if kkkk = 110000 then
   open(outfl,'haplotypes.110000',record_length:=30000) else
if kkkk = 115000 then
   open(outfl,'haplotypes.115000',record_length:=30000) else
if kkkk = 120000 then
   open(outfl,'haplotypes.120000',record_length:=30000) else
if kkkk = 125000 then
   open(outfl,'haplotypes.125000',record_length:=30000) else
if kkkk = 130000 then
   open(outfl,'haplotypes.130000',record_length:=30000) else
if kkkk = 135000 then
   open(outfl,'haplotypes.135000',record_length:=30000) else
if kkkk = 140000 then
   open(outfl,'haplotypes.140000',record_length:=30000) else
if kkkk = 145000 then
   open(outfl,'haplotypes.145000',record_length:=30000) else
if kkkk = 150000 then
   open(outfl,'haplotypes.150000',record_length:=30000) else
if kkkk = 155000 then
   open(outfl,'haplotypes.155000',record_length:=30000) else
if kkkk = 160000 then
   open(outfl,'haplotypes.160000',record_length:=30000) else
if kkkk = 165000 then
   open(outfl,'haplotypes.165000',record_length:=30000) else
if kkkk = 170000 then
   open(outfl,'haplotypes.170000',record_length:=30000) else
if kkkk = 175000 then
   open(outfl,'haplotypes.175000',record_length:=30000) else
if kkkk = 180000 then
   open(outfl,'haplotypes.180000',record_length:=30000) else
if kkkk = 185000 then
   open(outfl,'haplotypes.185000',record_length:=30000) else
if kkkk = 190000 then
   open(outfl,'haplotypes.190000',record_length:=30000) else
if kkkk = 195000 then
   open(outfl,'haplotypes.195000',record_length:=30000) else
if kkkk = 200000 then
   open(outfl,'haplotypes.200000',record_length:=30000) else
   open(outfl,'haplotypes.out',record_length:=30000);
   rewrite(outfl);
writeln(outfl,numhap:10,' HAPLOTYPES TO FOLLOW: Generation number ',kkkk:1);
   for i:= 1 to numhap do begin
      j:=1;

      write(outfl,i:10, ' :'); 
      while snpnew[i,j] > 0 do begin
         write(outfl,' ',snpnew[i,j]:1);
         j:=j+1;
      end;
      writeln(outfl);
   end;
close(outfl);
end;

begin
   default;
   simstart;
   for kkkk:= 1 to (numgen+expgen) do begin
      writeln(kkkk:10,numhap:10);
      generation(kkkk);
      if (kkkk = 5000) or (kkkk = 10000) or (kkkk = 15000) or (kkkk = 20000) or (kkkk = 25000) or (kkkk = 30000) or (kkkk = 35000) or
       (kkkk = 40000) or (kkkk = 45000) or (kkkk = 50000) or (kkkk = 55000) or (kkkk = 60000) or (kkkk = 65000) or (kkkk = 70000) or
       (kkkk = 75000) or (kkkk = 80000) or (kkkk = 85000) or (kkkk = 90000) or (kkkk = 95000) or (kkkk = 100000) or
       (kkkk = 105000) or (kkkk = 110000) or (kkkk = 115000) or (kkkk = 120000) or (kkkk = 125000) or (kkkk = 130000) or (kkkk = 135000) or
       (kkkk = 140000) or (kkkk = 145000) or (kkkk = 150000) or (kkkk = 155000) or (kkkk = 160000) or (kkkk = 165000) or (kkkk = 170000) or
       (kkkk = 175000) or (kkkk = 180000) or (kkkk = 185000) or (kkkk = 190000) or (kkkk = 195000) or (kkkk = 200000) then outhaps;
   end;
   
outhaps;
   rewrite(seed,'seed.dat');
   writeln(seed,seed1:10,seed2:10,seed3:10);
end.
