Line 262: |
Line 262: |
| export HK=/net/seqshop-server/home/hmkang/apigenome/bin | | export HK=/net/seqshop-server/home/hmkang/apigenome/bin |
| export EPACTS=/net/seqshop-server/home/mktrost/seqshop/epacts/ | | export EPACTS=/net/seqshop-server/home/mktrost/seqshop/epacts/ |
| + | export REF=/net/seqshop-server/home/mktrost/seqshop/singleSample/ref/gotcloud.ref |
| + | export GC=~/seqshop/gotcloud |
| + | |
| + | |
| + | export SAMPLE=SampleXX |
| + | export OUT=~/$SAMPLE/output |
| | | |
| ==== Annotation / Lookup against dbSNP ==== | | ==== Annotation / Lookup against dbSNP ==== |
Line 271: |
Line 277: |
| If you want to run this command across all chromosomes in parallel, you can use the special script run-command-wgs | | If you want to run this command across all chromosomes in parallel, you can use the special script run-command-wgs |
| | | |
− | $HK/run-make --repeat-chr --cmd "$HK/vcf-add-rsid -vcf $OUT/vcfs/chr1/chr1.filtered.vcf.gz --db $HK/../data/dbsnp_142.b37.vcf.gz --out $OUT/vcfs/chr1/chr1.filtered.rsid.vcf.gz" --numjobs 6 | + | $HK/run-make --repeat-chr --cmd "$HK/vcf-add-rsid -vcf $OUT/vcfs/chr1/chr1.filtered.vcf.gz --db $HK/../data/dbsnp_142.b37.vcf.gz --out $OUT/vcfs/chr1/chr1.filtered.rsid.vcf.gz" --numjobs 6 --out runmake.rsid |
| | | |
− | Looking up SNPs by rsID is possible by (for example) | + | Looking up SNPs by rsID is possible by (for example, rs17766217) -- How can we find its position? |
− | $HK/vcf-lookup-rsid --vcf $OUT/vcfs/chr1/chr1.filtered.vcf.gz --sepchr --rs rs17766217 | + | $HK/tabix $OUT/vcfs/chr8/chr8.filtered.rsid.vcf.gz 8:128504497 | less |
| * Be sure to look at the QUAL & your sample's PL, and not just the GL field. Check if QUAL is 0 or PL is 0,0,0 - NS is also probably 0; DP is probably 0. That means you probably didn't have any copies, so your GT may not be correct/is unknown. | | * Be sure to look at the QUAL & your sample's PL, and not just the GL field. Check if QUAL is 0 or PL is 0,0,0 - NS is also probably 0; DP is probably 0. That means you probably didn't have any copies, so your GT may not be correct/is unknown. |
| | | |
| If you want to browse the rsIDs of known GWAS SNPs, you can do this by | | If you want to browse the rsIDs of known GWAS SNPs, you can do this by |
− | cut -f 1,8,22 $HK/../data/gwascatalog/gwascatalog.txt | less | + | cut -f 1,8,12,13,22 $HK/../data/gwascatalog/gwascatalog.txt | grep -w rs17766217 |
| | | |
| ==== Annotating your genome ==== | | ==== Annotating your genome ==== |
Line 286: |
Line 292: |
| | | |
| Or you can run multiple chromosomes in parallel in one command | | Or you can run multiple chromosomes in parallel in one command |
− | $HK/run-make --repeat-chr --cmd "$EPACTS/bin/epacts anno --in $OUT/vcfs/chr1/chr1.filtered.rsid.vcf.gz --out $OUT/vcfs/chr1/chr1.filtered.rsid.anno.vcf.gz" --numjobs 6 | + | $HK/run-make --repeat-chr --cmd "$EPACTS/bin/epacts anno --in $OUT/vcfs/chr1/chr1.filtered.rsid.vcf.gz --out $OUT/vcfs/chr1/chr1.filtered.rsid.anno.vcf.gz" --numjobs 6 --out runmake.anno |
| | | |
| ==== Extracting only exonic SNPs ==== | | ==== Extracting only exonic SNPs ==== |
| | | |
| If you want to look at the exonic SNPs, you can extract using the following command | | If you want to look at the exonic SNPs, you can extract using the following command |
− | $HK/run-command-wgs --cmd "($HK/tabix -H $OUT/vcfs/chr1/chr1.filtered.rsid.anno.vcf.gz; zcat $OUT/vcfs/chr1/chr1.filtered.rsid.anno.vcf.gz | grep Exon;)| $HK/bgzip -c > $OUT/vcfs/chr1/chr1.filtered.rsid.anno.exon.vcf.gz" --numjobs 6 | + | $HK/run-make --repeat-chr --cmd "($HK/tabix -H $OUT/vcfs/chr1/chr1.filtered.rsid.anno.vcf.gz; zcat $OUT/vcfs/chr1/chr1.filtered.rsid.anno.vcf.gz | grep Exon;)| $HK/bgzip -c > $OUT/vcfs/chr1/chr1.filtered.rsid.anno.exon.vcf.gz" --numjobs 6 --out runmake.exome |
| | | |
| And they can be combined as follows | | And they can be combined as follows |
| (zcat $OUT/vcfs/chr1/chr1.filtered.rsid.anno.exon.vcf.gz; zcat $OUT/vcfs/chr[2-9]/chr*.filtered.rsid.anno.exon.vcf.gz $OUT/vcfs/chr??/chr*.filtered.rsid.anno.exon.vcf.gz $OUT/vcfs/chrX/chrX.filtered.rsid.anno.exon.vcf.gz | grep -v ^#) | $HK/bgzip -c > $OUT/wgs.filtered.rsid.anno.exon.vcf.gz | | (zcat $OUT/vcfs/chr1/chr1.filtered.rsid.anno.exon.vcf.gz; zcat $OUT/vcfs/chr[2-9]/chr*.filtered.rsid.anno.exon.vcf.gz $OUT/vcfs/chr??/chr*.filtered.rsid.anno.exon.vcf.gz $OUT/vcfs/chrX/chrX.filtered.rsid.anno.exon.vcf.gz | grep -v ^#) | $HK/bgzip -c > $OUT/wgs.filtered.rsid.anno.exon.vcf.gz |
| + | $HK/tabix -pvcf $OUT/wgs.filtered.rsid.anno.exon.vcf.gz |
| | | |
| ==== Exonic Variants NOT found by 1000G ==== | | ==== Exonic Variants NOT found by 1000G ==== |
Line 318: |
Line 325: |
| | | |
| Want to see this from the BAM file? Use samtools tview: | | Want to see this from the BAM file? Use samtools tview: |
− | $GC/bin/samtools tview $SAMPLE/output/bams/$SAMPLE.recal.bam $GC/gotcloud.ref/human.g1k.v37.fa | + | $GC/bin/samtools tview $SAMPLE/output/bams/$SAMPLE.recal.bam $REF/hs37d5.fa |
| Use 'g' & enter the Chr:Pos | | Use 'g' & enter the Chr:Pos |
| * Some patterns may indicate not real variants. | | * Some patterns may indicate not real variants. |
Line 327: |
Line 334: |
| | | |
| The phred score at the last column quantifies the degree of functional significance | | The phred score at the last column quantifies the degree of functional significance |
− |
| |
| | | |
| | | |
| </div> | | </div> |
| </div> | | </div> |
| + | |
| + | == OVERALL COURSE FEEDBACK! == |
| + | Please provide feedback: |
| + | https://docs.google.com/forms/d/1pxfPXKwWfA71ZJM99Sevs3MwAUz2UbHAR8dnRI-kRNM/viewform |