Changes

From Genome Analysis Wiki
Jump to navigationJump to search
no edit summary
Line 56: Line 56:     
  ${BIN}/qplot --plot ${OUT}/NA12878.exon.sample.deduped.bam.qplot.pdf --stats ${OUT}/NA12878.exon.sample.deduped.bam.qplot.stats --reference ${REF}/human_g1k_v37_chr20.fa --dbsnp ${REF}/dbsnp.b130.ncbi37.chr20.tbl --gccontent  ${REF}/ncbi37.chr20.gc ${OUT}/NA12878.exon.sample.deduped.bam
 
  ${BIN}/qplot --plot ${OUT}/NA12878.exon.sample.deduped.bam.qplot.pdf --stats ${OUT}/NA12878.exon.sample.deduped.bam.qplot.stats --reference ${REF}/human_g1k_v37_chr20.fa --dbsnp ${REF}/dbsnp.b130.ncbi37.chr20.tbl --gccontent  ${REF}/ncbi37.chr20.gc ${OUT}/NA12878.exon.sample.deduped.bam
 +
 +
== Steps (Thursday) ==
 +
 +
0. SETTING UP ENVIRONMENTAL VARIABLES
 +
 +
setenv BIN /home/hyun/wed/bin
 +
setenv IN /home/hyun/wed/input
 +
setenv REF /home/hyun/wed/ref
 +
 +
setenv OUT ~/seq/wednesday/output
 +
mkdir --p ${OUT}
 +
 +
1. EXON-TARGETTED DATA : COMPUTING GENOTYPE LIKELHOOD FROM BAM FILES
 +
 +
${BIN}/samtools-hybrid pileup -g -f ${REF}/human_g1k_v37_chr20.fa ${OUT}/NA12878.exon.sample.deduped.bam > ${OUT}/NA12878.exon.sample.glf
 +
 +
2. EXON-TARGETTED DATA : VIEW THE GENOTYPE LIKELIHOOD FORMAT
 +
 +
${BIN}/samtools-hybrid glfview ${OUT}/NA12878.exon.sample.glf | less
 +
 +
TYPE 'q' to finish
 +
 +
3. EXON-TARGETTED DATA : SINGLE-SAMPLE GENOTYPE CALLING using GLFSINGLE
 +
 +
${BIN}/glfSingle --maxDepth 10000 --minMapQuality 20 -p 0.9 -g ${OUT}/NA12878.exon.sample.glf -b ${OUT}/NA12878.exon.sample.vcf
 +
 +
4. EXON-TARGETTED DATA : VIEW THE VCF FILES AND COUNT # OF SNPS
 +
 +
less ${OUT}/NA12878.exon.sample.vcf
 +
 +
grep -v ^# ${OUT}/NA12878.exon.sample.vcf | wc -l
 +
 +
5. DEEP-COVERAGE GENOME : COMPUTE THE GENOTYPE LIKELIHOOD
 +
 +
${BIN}/samtools-hybrid pileup -g -f ${REF}/human_g1k_v37_chr20.fa ${IN}/NA12878.highcov.sample.bam > ${OUT}/NA12878.highcov.sample.glf
 +
 +
6. DEEP-COVERAGE GENOME : SINGLE-SAMPLE VARIANT CALLING
 +
 +
${BIN}/glfSingle --maxDepth 10000  --minMapQuality 20 -p 0.9 -g ${OUT}/NA12878.highcov.sample.glf -b ${OUT}/NA12878.highcov.sample.vcf
 +
 +
7. DEEP-COVERAGE GENOME : VIEW THE VCF FILES AND COUNT # OF SNPS
 +
 +
less ${OUT}/NA12878.highcov.sample.vcf
 +
 +
8. VIEW THE VCF FILES AND COUNT # OF SNPS
 +
 +
grep -v ^# ${OUT}/NA12878.highcov.sample.vcf | wc -l
 +
 +
9. EVALUATE OVERLAP BETWEEN THE TWO SETS OF VARIANT CALLS
 +
 +
cat ${OUT}/NA12878.exon.sample.vcf ${OUT}/NA12878.highcov.sample.vcf | grep -v ^# | cut -f 1,2 | sort | uniq -d | wc -l
 +
 +
cat ${OUT}/NA12878.exon.sample.vcf ${OUT}/NA12878.highcov.sample.vcf | grep -v ^# | cut -f 1,2 | sort | uniq -d
 +
 +
10. VIEW ACTUAL ALIGNMENT AT SNP POSITIONS
 +
 +
${BIN}/samtools-hybrid tview ${OUT}/NA12878.exon.sample.deduped.bam ${REF}/human_g1k_v37_chr20.fa
 +
 +
TYPE g, and  20:19989392
 +
TYPE g, and  20:20032998
 +
TYPE g, and  20:20139952
 +
 +
${BIN}/samtools-hybrid tview ${IN}/NA12878.highcov.sample.bam ${REF}/human_g1k_v37_chr20.fa
 +
 +
TYPE g, and  20:19989392
 +
TYPE g, and  20:20032998
 +
TYPE g, and  20:20139952
 +
 +
11. SUMMARIZE VCF STATISTICS
 +
 +
perl ${BIN}/vcfSummary.pl --vcf ${OUT}/NA12878.exon.sample.vcf --dbsnp ${REF}/dbsnp_129_b37.rod.chr20.map --bfile ${REF}/hapmap3_r3_b37_fwd.consensus.qc.poly.chr20
 +
 +
perl ${BIN}/vcfSummary.pl --vcf ${OUT}/NA12878.highcov.sample.vcf --dbsnp ${REF}/dbsnp_129_b37.rod.chr20.map --bfile ${REF}/hapmap3_r3_b37_fwd.consensus.qc.poly.chr20
 +
 +
== Filtering Examples in multisample calling ==
 +
* [[Media:Filtering example unfiltered.pdf | (BEFORE FILTERING)]]
 +
* [[Media:Filtering example.pdf | (AFTER FILTERING)]]
 +
 +
 +
== Where can you download this software? ==
 +
samtools-hybrid, glfSingle, superDeDuper, qplot can be downloaded at:
 +
https://github.com/statgen/statgen

Navigation menu