program diseqsim(input,output); const maxsnp = 1000; (* Maximum number of SNPs in the whole region *) maxhap = 30000; (* maximum number of chromosomes in the population at one time *) maxrare = 500; (* Maximum number of rare SNP variants on one haplotype *) maxexon = 1; (* Maximum number of exons per gene *) maxreg = 1; (* Maximum number of regulatory regions per gene *) type real = double; var subdivision,hotspot:boolean; startsub,endsub,minsize:integer; expgen,mhsmean:integer; mhsrate,hsprob,hspct,mixpar,x,y:real; hsstart,hsmean,seed1,seed2,seed3:integer; snpold:array[1..maxhap,0..maxrare] of integer; snpnew:array[1..maxhap,0..maxrare] of integer; numsnps,kkkkk,kkkk,genelength:integer; minpopsize,maxpopsize:integer; exon:array[1..maxexon,1..2] of integer; regulatory:array[1..maxreg,1..2] of integer; mr,mutrate: double; tmpt,snpfreq:real; mhs:array[0..maxsnp] of integer; gcrate:real; gclength:integer; recrate: real; selection:array[1..maxsnp] of real; orighap,numhap, numhapnew:integer; numgen:integer; outfl,seed:text; model:text; function randomnum:real; { This function generates independent uniform deviates on the interval (0,1). See the reference: Wichman BA, Hill ID (1982). Algorithm 183: an efficient and portable pseudo-random number generator. Applied Statistics 31,188-190. Global variables required: seed1 seed2 seed3 (integer) Seed1, seed2 and seed3 should be set to integer values before the first entry. Each call to randomnum updates the 3 seeds. Integer arithmetic up to 30323 is needed on your computer. } var r:real; begin {randomnum} seed1:=171*(seed1 mod 177)-2*(seed1 div 177); seed2:=172*(seed2 mod 176)-35*(seed2 div 176); seed3:=170*(seed3 mod 178)-63*(seed3 div 178); if (seed1<0) then seed1:=seed1+30269; if (seed2<0) then seed2:=seed2+30307; if (seed3<0) then seed3:=seed3+30323; r:=seed1/30269.0+seed2/30307.0+seed3/30323.0; randomnum:=r-trunc(r); { RANDOM:=MOD(R,1.000) Note: a mod b = a - ((a div b)*b } end; {randomnum} function simexp(m:real):real; var tt:real; begin tt:=randomnum; if tt < 0.0000000001 then tt:=0.0000000001; simexp:= -(m)*ln(tt); end; function simerlang(mean:real;df:integer):real; var ttt:real; i:integer; begin ttt:=0; for i:= 1 to df do ttt:=ttt+simexp((mean/df)); simerlang:=ttt; end; procedure default; var xx:char; pos,i,j:integer; (* function simselect(pos,numpos:integer); begin tmp:=randomnum; if tmp < neutralmut then tmpp:=0 else if tmp < (neutralmut + stopmut) then tmp:=-0.5 else if tmp < (neutralmut + stopmut + mildnegmut) then tmp:= -0.05 else tmp:=0.05; simselect:=tmp; end; *) begin open(model,'model.summary',history:=new); rewrite(model); reset(seed,'seed.dat'); readln(seed,seed1,seed2,seed3); writeln('What is the length of the region to be simulated? '); readln(genelength); if genelength < 100 then genelength:=100; numhap:= maxhap+1; while numhap > maxhap do begin writeln('What is the mean population size? (<= ',(maxhap/2):1:0,' ) '); readln(numhap); if numhap < 10 then numhap:=10; numhap:=numhap*2; orighap:=numhap; end; writeln('What is the minimum population size tolerable in any generation? '); readln(minpopsize); writeln('What is the maximum population size tolerable in any generation? (<= ',(maxhap/2):1:0,') '); readln(maxpopsize); minpopsize:=minpopsize*2; maxpopsize:=maxpopsize*2; writeln(model,'Gene Length = ',genelength:1,'bp'); writeln(model,'Population size in haplotypes: '); writeln(model,'Mean = ',numhap:1); writeln(model,'Minimum = ',minpopsize:1); if maxpopsize > maxhap then maxpopsize:=maxhap; writeln(model,'Maximum = ',maxpopsize:1); for i:= 1 to maxpopsize do for j:= 0 to maxrare do snpold[i,j]:=0; writeln('What is the variance of the population size? '); readln(mixpar); mixpar:=mixpar/maxhap; mixpar:=sqrt(mixpar); writeln(model,'Size in generation N = ',(1-mixpar):4:2,'(popsize) + ',mixpar:4:2,'(exp[mean=popsize]).'); writeln('What is the probability of a recombination somewhere within this region per meiosis? '); readln(recrate); writeln('What is the probability that a hotspot of recombination exists within this region? '); readln(hsprob); if hsprob > 0 then begin writeln('What is the length of such a hotspot in bp? '); readln(hsmean); writeln('What percentage of all recombination events in this region happen in this hotspot?'); readln(hspct); end; if randomnum < hsprob then hotspot:=true else hotspot:=false; if hotspot then hsstart:=round(randomnum*(genelength-hsmean)); if recrate < 0.000000001 then recrate:=0.000000001; writeln(model,'Recombination rate per Mb per meiosis = ',((recrate*1000000/genelength)):10:6); if hotspot then begin writeln(model,'This region has a hotspot of recombination between positions ',hsstart:1,' and ',(hsstart+hsmean):1,'bp.'); writeln(model,(hspct*100):10:6,'% of all recombinations on the region occur in this hotspot.'); end else writeln(model,'There is NO hotspot of recombination in this region.'); writeln(model); writeln('What is the mutation rate per nucleotide per meiosis? '); readln(mutrate); if mutrate < 0.0000000000001 then mutrate:=0.00000000000001; writeln(model,'Mutation rate per nucleotide per meiosis = ',mutrate:10:20); writeln('What is the mean number of mutation hotspots in this region? '); readln(mhsmean); if mhsmean > 0 then begin writeln('What is the mutation rate at the hotspot positions? '); readln(mhsrate); pos:=0; mhs[0]:=0; while pos < genelength do begin pos:=pos + round(simexp(genelength/mhsmean)); if pos < genelength then begin mhs[0]:=mhs[0]+1; mhs[mhs[0]]:=pos; end; end; if mhs[0] > 0 then begin writeln(model,' There are ',mhs[0]:1,' hotspots of mutation in this region, at positions '); for i:= 1 to mhs[0] do writeln(model,mhs[i]:10,'bp'); writeln(model,'The mutation rate at the hotspots is ',(mhsrate/mutrate):10:2,' times higher than elsewhere. '); end else writeln(model,' There are no hotspots of mutation in this region. '); end else begin mhs[0]:=0; mhs[1]:=0; writeln(model,' There are no hotspots of mutation in this region. '); end; writeln(model); writeln('What is the probability of a gene conversion event per meiosis? '); readln(gcrate); writeln('What is the average length (in bp) of gene conversion tracts? '); if gcrate < 0.0000000000001 then gcrate:=0.0000000000001; readln(gclength); writeln(model,'Probability of Gene conversion per meiosis per Mb = ',((gcrate*1000000/genelength)):10:6); writeln(model,'Distribution of g.c. tract lengths = exp(',gclength:1,')'); writeln('How many generations of evolution do you desire?'); readln(numgen); writeln(model,numgen:1,' generations of random mating assumed.'); writeln('Would you like to have the population subdivide at some point in history?'); readln(xx); if (xx='y') or (xx='Y') then subdivision:=true else subdivision:=false; if subdivision then begin writeln('In what generation should the subdivision start?'); readln(startsub); writeln('(In what generation should the subdivision end?'); readln(endsub); writeln('How many individuals in the smaller of the two subdivided populations? '); readln(minsize); writeln(model,'Population subdivides from generation ',startsub:1,' to generation ',endsub:1,' with smaller population size = ',minsize:1,'.'); end; writeln('How many generations of exponential growth at the end of this period? '); readln(expgen); writeln('How many pairwise unassociated SNPs at beginning of simulation?'); readln(numsnps); if numsnps > 0 then begin writeln('What is the average heterozygosity of the unassociated SNPs in first generation?'); readln(snpfreq); end else snpfreq:=0; if numsnps > 0 then begin writeln(model,numsnps:1,' SNPs are segregating from the first generation.'); writeln(model,'The mean starting frequency of the rare SNP allele is ',snpfreq:6:4); end else writeln(model,'The starting population was monomorphic at all sites!'); (* Gene structure and selection writeln('What is the mean number of exons per gene? '); readln(exonmnum); writeln('What is the mean exon length? '); readln(exonmean); writeln('What is the mean intron length? '); readln(intronmean); writeln('What is the mean number of regulatory regions? '); readln(regumnum); writeln('What is the mean length of regulatory regions? '); readln(regumean); neutralmut:=(576-438)/576; stopmut:=24/576; writeln('What percentage of amino acid substitutions are mildly advantageous? '); readln(advpct); writeln('What percentage of amino acid substitutions are mildly deleterious? '); readln(delpct); writeln('What percentage of amino acid substitutions are severely deleterious in single copy? '); readln(sevpct); neutralmut:=neutralmut+( (1-advpct-delpct-sevpct)*(1-neutralmut-stopmut)); stopmut:=stopmut+(1-neutralmut-stopmut)*sevpct; mildposmut:=advpct*(1-neutralmut-stopmut); mildnegmut:=delpct*(1-neutralmut-stopmut); exonnum:=1; pos:=0; if exonmnum > 1 then *********Number of Exons while pos < 1 do begin pos:=pos + simexp(1/(exonmnum -1 )); if pos < 1 then exonnum:=exonnum+1; end; exontot:=0; for i:= 1 to exonnum do begin *********Selection for or against mutations in each exon exon[i,0]:=round(simexp(exonmean)); for j:= 1 to exon[i,0] do exon[i,j]:=simselect(j,exonnum); exontot:=exontot+exon[i,0]; end; remain:=genelength - exontot; ***********How many bp are left in the region? *) close(model); end; procedure meiosis(num,cat,kkk:integer;expogrow:boolean); var nnumhap,maxd,ll,mmm,recpt,i,j,pos,aa,m,min,kk,a,b,k,chr,len:integer; aaa:array[0..maxrare] of integer; begin if expogrow = true then begin a:=num; b:=round(randomnum*(nnumhap) + 0.499999999999999); end else if cat <> 2 then begin nnumhap:=kkk; a:=round(randomnum*(nnumhap) + 0.499999999999999); (* Parental genotype choice *) b:=round(randomnum*(nnumhap) + 0.4999999999999999); end else begin nnumhap:= (orighap-kkk); a:=round(randomnum*(nnumhap) + 0.499999999999999); (* Parental genotype choice *) b:=round(randomnum*(nnumhap) + 0.4999999999999999); a:=a+kkk; b:=b+kkk; end; if expogrow then chr:=a else if randomnum < 0.50 then chr:=a else chr:=b; (* Segregation *) if randomnum < recrate then (* Recombination and Hot Spots *) if (not hotspot) or (randomnum > hspct) then recpt:=round(randomnum*(genelength) + 0.49999999999999999) else recpt:=hsstart + round(randomnum*(hsmean) + 0.49999999999999999999) else recpt:=genelength+10; i:=1; while (snpold[chr,i] > 0) and (snpold[chr,i] < recpt) do begin snpnew[num,i]:=snpold[chr,i]; i:=i+1; end; if recpt < genelength then begin if chr = a then chr:=b else chr:=a; j:=1; while (snpold[chr,j] > 0) do begin if snpold[chr,j] > recpt then begin snpnew[num,i]:=snpold[chr,j]; i:=i+1; end; j:=j+1; snpnew[num,i]:=0; end; end else snpnew[num,i]:=0; pos:=0; (* Mutation *) while pos <= genelength do begin tmpt:=simexp(1/mutrate); if tmpt > genelength then tmpt:=genelength; pos:=pos + round(tmpt); if pos <= genelength then begin snpnew[num,i]:=pos; snpnew[num,(i+1)]:=0; i:=i+1; end; end; if mhs[0] > 0 then (*Hot spots of mutation*) for mmm:= 1 to mhs[0] do if randomnum < mhsrate then begin snpnew[num,i]:=mhs[mmm]; snpnew[num,(i+1)]:=0; i:=i+1; end; if randomnum < gcrate then begin (* gene conversion *) aa:=round(randomnum*genelength); len:=round (simexp((gclength))); if aa < recpt then if chr=a then chr:=b else chr:=a; j:=1; while (snpold[chr,j] > 0) do begin if (snpold[chr,j] > aa) and (snpold[chr,j] < (aa+len)) then for k:= 1 to i do if snpnew[num,k] = snpold[chr,j] then begin kk:=k; while snpnew[num,(kk)] > 0 do begin snpnew[num,kk]:=snpnew[num,(kk+1)]; kk:=kk+1; end; end; j:=j+1; end; if chr=a then chr:=b else chr:=a; j:=1; while (snpold[chr,j] > 0) do begin if (snpold[chr,j] > aa) and (snpold[chr,j] < (aa+len)) then begin snpnew[num,i]:=snpold[chr,j]; snpnew[num,(i+1)]:=0; end; j:=j+1; end; end; for m:= 1 to i do begin (* Sort *) min:= genelength+1; kk:=0; for k:= 1 to i do if (snpnew[num,k] < min) and (snpnew[num,k] > 0) then begin min:=snpnew[num,k]; kk:=k; end; if (min > 0) and (min < (genelength))then aaa[m]:=min else aaa[m]:=0; snpnew[num,kk]:=0; end; for k:= 1 to i do snpnew[num,k]:=aaa[k]; maxd:=i; for k:= 2 to i do if (snpnew[num,k] > 0) and (snpnew[num,k] = snpnew[num,(k-1)]) then begin snpnew[num,(maxd + 1)]:=0; for ll:= (k) to maxd do snpnew[num,k] := snpnew[num,(k+1)]; maxd:=maxd-1; end; end; procedure generation(gennum:integer); var kk,ii,jj:integer; expos:boolean; tt:real; begin if gennum > numgen then expos:=true else expos:=false; if subdivision and ( gennum <= endsub) and (gennum >= startsub) then begin kk:=minsize; for ii:= 1 to kk do meiosis(ii,1,kk,expos); for ii:=(kk+1) to orighap do meiosis(ii,2,kk,expos); kk:=orighap; end else begin if gennum > (numgen-1000) then kk:=orighap else kk:=maxhap+10; tt:=0.05 + 0.95*(numgen-gennum)/numgen; while ( kk > (orighap +(maxpopsize-orighap)*tt)) or (kk < (orighap - (orighap-minpopsize)*tt))do begin kk:= round((1-mixpar)*numhap) + (round(simexp(mixpar*numhap))); end; for ii:= 1 to kk do begin meiosis(ii,0,kk,expos); end; end; for ii:= 1 to kk do for jj:= 0 to maxrare do snpold[ii,jj]:=snpnew[ii,jj]; numhap:=kk; end; procedure simstart; var pos,ff,i:integer; gf:real; begin pos:=0; if numsnps > 0 then while pos < genelength do begin pos:=pos+round(simexp(genelength/numsnps)); gf:=0; while (gf < 0.0) or (gf > 0.5) do gf:= simexp(snpfreq); for i:= 1 to (numhap) do if randomnum < gf then begin ff:=1; while snpold[i,ff] > 0 do ff:=ff+1; snpold[i,ff]:=pos; end; end; end; procedure outhaps; var i,j:integer; begin if kkkk = 5000 then open(outfl,'haplotypes.5000',record_length:=30000) else if kkkk = 10000 then open(outfl,'haplotypes.10000',record_length:=30000) else if kkkk = 15000 then open(outfl,'haplotypes.15000',record_length:=30000) else if kkkk = 20000 then open(outfl,'haplotypes.20000',record_length:=30000) else if kkkk = 25000 then open(outfl,'haplotypes.25000',record_length:=30000) else if kkkk = 30000 then open(outfl,'haplotypes.30000',record_length:=30000) else if kkkk = 35000 then open(outfl,'haplotypes.35000',record_length:=30000) else if kkkk = 40000 then open(outfl,'haplotypes.40000',record_length:=30000) else if kkkk = 45000 then open(outfl,'haplotypes.45000',record_length:=30000) else if kkkk = 50000 then open(outfl,'haplotypes.50000',record_length:=30000) else if kkkk = 55000 then open(outfl,'haplotypes.55000',record_length:=30000) else if kkkk = 60000 then open(outfl,'haplotypes.60000',record_length:=30000) else if kkkk = 65000 then open(outfl,'haplotypes.65000',record_length:=30000) else if kkkk = 70000 then open(outfl,'haplotypes.70000',record_length:=30000) else if kkkk = 75000 then open(outfl,'haplotypes.75000',record_length:=30000) else if kkkk = 80000 then open(outfl,'haplotypes.80000',record_length:=30000) else if kkkk = 85000 then open(outfl,'haplotypes.85000',record_length:=30000) else if kkkk = 90000 then open(outfl,'haplotypes.90000',record_length:=30000) else if kkkk = 95000 then open(outfl,'haplotypes.95000',record_length:=30000) else if kkkk = 100000 then open(outfl,'haplotypes.100000',record_length:=30000) else if kkkk = 105000 then open(outfl,'haplotypes.105000',record_length:=30000) else if kkkk = 110000 then open(outfl,'haplotypes.110000',record_length:=30000) else if kkkk = 115000 then open(outfl,'haplotypes.115000',record_length:=30000) else if kkkk = 120000 then open(outfl,'haplotypes.120000',record_length:=30000) else if kkkk = 125000 then open(outfl,'haplotypes.125000',record_length:=30000) else if kkkk = 130000 then open(outfl,'haplotypes.130000',record_length:=30000) else if kkkk = 135000 then open(outfl,'haplotypes.135000',record_length:=30000) else if kkkk = 140000 then open(outfl,'haplotypes.140000',record_length:=30000) else if kkkk = 145000 then open(outfl,'haplotypes.145000',record_length:=30000) else if kkkk = 150000 then open(outfl,'haplotypes.150000',record_length:=30000) else if kkkk = 155000 then open(outfl,'haplotypes.155000',record_length:=30000) else if kkkk = 160000 then open(outfl,'haplotypes.160000',record_length:=30000) else if kkkk = 165000 then open(outfl,'haplotypes.165000',record_length:=30000) else if kkkk = 170000 then open(outfl,'haplotypes.170000',record_length:=30000) else if kkkk = 175000 then open(outfl,'haplotypes.175000',record_length:=30000) else if kkkk = 180000 then open(outfl,'haplotypes.180000',record_length:=30000) else if kkkk = 185000 then open(outfl,'haplotypes.185000',record_length:=30000) else if kkkk = 190000 then open(outfl,'haplotypes.190000',record_length:=30000) else if kkkk = 195000 then open(outfl,'haplotypes.195000',record_length:=30000) else if kkkk = 200000 then open(outfl,'haplotypes.200000',record_length:=30000) else open(outfl,'haplotypes.out',record_length:=30000); rewrite(outfl); writeln(outfl,numhap:10,' HAPLOTYPES TO FOLLOW: Generation number ',kkkk:1); for i:= 1 to numhap do begin j:=1; write(outfl,i:10, ' :'); while snpnew[i,j] > 0 do begin write(outfl,' ',snpnew[i,j]:1); j:=j+1; end; writeln(outfl); end; close(outfl); end; begin default; simstart; for kkkk:= 1 to (numgen+expgen) do begin writeln(kkkk:10,numhap:10); generation(kkkk); if (kkkk = 5000) or (kkkk = 10000) or (kkkk = 15000) or (kkkk = 20000) or (kkkk = 25000) or (kkkk = 30000) or (kkkk = 35000) or (kkkk = 40000) or (kkkk = 45000) or (kkkk = 50000) or (kkkk = 55000) or (kkkk = 60000) or (kkkk = 65000) or (kkkk = 70000) or (kkkk = 75000) or (kkkk = 80000) or (kkkk = 85000) or (kkkk = 90000) or (kkkk = 95000) or (kkkk = 100000) or (kkkk = 105000) or (kkkk = 110000) or (kkkk = 115000) or (kkkk = 120000) or (kkkk = 125000) or (kkkk = 130000) or (kkkk = 135000) or (kkkk = 140000) or (kkkk = 145000) or (kkkk = 150000) or (kkkk = 155000) or (kkkk = 160000) or (kkkk = 165000) or (kkkk = 170000) or (kkkk = 175000) or (kkkk = 180000) or (kkkk = 185000) or (kkkk = 190000) or (kkkk = 195000) or (kkkk = 200000) then outhaps; end; outhaps; rewrite(seed,'seed.dat'); writeln(seed,seed1:10,seed2:10,seed3:10); end.