From Genome Analysis Wiki
Jump to navigationJump to search
1,596 bytes added
, 10:05, 5 August 2011
Line 154: |
Line 154: |
| == Further Time Savings == | | == Further Time Savings == |
| | | |
− | The recipe above imputes whole chromosomes, one a time. A further time savings is possible by imputing chromosomes in chunks, a process that can be facilitated using the [[ChunkChromosome]] tool. | + | The recipe above imputes whole chromosomes, one a time. A further time savings is possible by imputing chromosomes in chunks, a process that can be facilitated using the [[ChunkChromosome]] tool. |
| + | |
| + | Recall that the original analysis script might have looked like this: |
| + | |
| + | <source lang="bash"> |
| + | #!/bin/tcsh |
| + | |
| + | # Build SNP lists for each chromosome |
| + | foreach chr (`seq 1 22`) |
| + | awk '{ if ($1 == "M") print $2; }' < chr${chr}.dat > chr${chr}.snps |
| + | end |
| + | |
| + | # Estimate haplotypes for all individuals, one chromosome at a time |
| + | foreach chr (`seq 1 22`) |
| + | mach -d chr$chr.dat -p chr$chr.ped --rounds 20 --states 200 --phase --sample 5 --prefix chr$chr.haps & |
| + | end |
| + | wait |
| + | |
| + | # Impute into phased haplotypes |
| + | foreach chr (`seq 1 22`) |
| + | minimac --refHaps ref.hap.$chr.gz --refSnps ref.snps.$chr.gz --haps chr$chr.haps.gz --snps chr$chr.snps --rounds 5 --states 200 --prefix chr$chr.imputed & |
| + | end |
| + | wait |
| + | </source> |
| + | |
| + | <source lang="bash"> |
| + | #!/bin/tcsh |
| + | |
| + | @ length = 2500 |
| + | @ overlap = 500 |
| + | |
| + | # Estimate haplotypes for all individuals, in 5000 marker chunks, with 500 marker overhang |
| + | foreach chr (`seq 1 22`) |
| + | |
| + | ChunkChromosome -d chr$chr.dat -n $length -o $overlap |
| + | |
| + | foreach chunk (chunk*-chr$chr.dat) |
| + | |
| + | mach -d $chunk -p chr$chr.ped --prefix ${chunk:r} \ |
| + | --rounds 20 --states 200 --phase --sample 5 >& ${chunk:r}-mach.log & |
| + | |
| + | end |
| + | |
| + | end |
| + | wait |
| + | |
| + | # Impute into phased haplotypes |
| + | foreach chr (`seq 1 22`) |
| + | |
| + | foreach chunk (chunk*-chr$chr.dat) |
| + | |
| + | set haps = /data/1000g/hap/all/20101123.chr$chr.hap.gz |
| + | set snps = /data/1000g/snps/chr$chr.snps |
| + | |
| + | minimac --refHaps $haps --refSnps $snps --rounds 5 --states 200 \ |
| + | --haps ${chunk:r}.gz --snps ${chunk}.snps --autoClip autoChunk-chr$chr.dat \ |
| + | --prefix ${chunk:r}.imputed >& ${chunk:r}-minimac.log & |
| + | |
| + | end |
| + | |
| + | end |
| + | wait |
| + | |
| + | </source> |
| | | |
| == X Chromosome Imputation == | | == X Chromosome Imputation == |