Line 719: |
Line 719: |
| | | |
| <div class=" mw-collapsible mw-collapsed"> | | <div class=" mw-collapsible mw-collapsed"> |
− | #converts in.bcf to tab format with selected INFO fields | + | #converts in.bcf to tab format with selected INFO and FILTER fields |
− | vt info2tab in.bcf -v -t EX_RL,FZ_RL,MDUST,LOBSTR,VNTRSEEK,RMSK,EX_REPEAT_TRACT | + | vt info2tab in.bcf -u PASS -t EX_RL,FZ_RL,MDUST,LOBSTR,VNTRSEEK,RMSK,EX_REPEAT_TRACT |
− | | |
| <div style="height:6em; overflow:auto; border: 2px solid #FFF"> | | <div style="height:6em; overflow:auto; border: 2px solid #FFF"> |
| + | INPUT |
| + | ===== |
| 20 17548608 . A AC . PASS CENTERS=vbi;NCENTERS=1;OLD_MULTIALLELIC=20:17548598:GAAAAAAAAAAAAA/GAAAAAAAAAAAA/GAAAAAAAAAAAAAA/GAAAAAAAAAA/GAAAAAAAAAAA/GAAAAAAAAAACAAA;OLD_VARIANT=20:17548598:GAAAAAAAAAAAAAG/GAAAAAAAAAACAAAG;EX_MOTIF=C;EX_MLEN=1;EX_RU=C;EX_BASIS=C;EX_BLEN=1;EX_REPEAT_TRACT=17548608,17548609;EX_COMP=100,0,0,0;EX_ENTROPY=0;EX_ENTROPY2=0;EX_KL_DIVERGENCE=2;EX_KL_DIVERGENCE2=4;EX_REF=2;EX_RL=2;EX_LL=3;EX_RU_COUNTS=0,2;EX_SCORE=0;EX_TRF_SCORE=-14;FZ_MOTIF=A;FZ_MLEN=1;FZ_RU=A;FZ_BASIS=A;FZ_BLEN=1;FZ_REPEAT_TRACT=17548599,17548611;FZ_COMP=100,0,0,0;FZ_ENTROPY=0;FZ_ENTROPY2=0;FZ_KL_DIVERGENCE=2;FZ_KL_DIVERGENCE2=4;FZ_REF=13;FZ_RL=13;FZ_LL=14;FZ_RU_COUNTS=13,13;FZ_SCORE=1;FZ_TRF_SCORE=26;FLANKSEQ=GAAAAAAAAA[A]AAAGAAGGAA;MDUST;LOBSTR | | 20 17548608 . A AC . PASS CENTERS=vbi;NCENTERS=1;OLD_MULTIALLELIC=20:17548598:GAAAAAAAAAAAAA/GAAAAAAAAAAAA/GAAAAAAAAAAAAAA/GAAAAAAAAAA/GAAAAAAAAAAA/GAAAAAAAAAACAAA;OLD_VARIANT=20:17548598:GAAAAAAAAAAAAAG/GAAAAAAAAAACAAAG;EX_MOTIF=C;EX_MLEN=1;EX_RU=C;EX_BASIS=C;EX_BLEN=1;EX_REPEAT_TRACT=17548608,17548609;EX_COMP=100,0,0,0;EX_ENTROPY=0;EX_ENTROPY2=0;EX_KL_DIVERGENCE=2;EX_KL_DIVERGENCE2=4;EX_REF=2;EX_RL=2;EX_LL=3;EX_RU_COUNTS=0,2;EX_SCORE=0;EX_TRF_SCORE=-14;FZ_MOTIF=A;FZ_MLEN=1;FZ_RU=A;FZ_BASIS=A;FZ_BLEN=1;FZ_REPEAT_TRACT=17548599,17548611;FZ_COMP=100,0,0,0;FZ_ENTROPY=0;FZ_ENTROPY2=0;FZ_KL_DIVERGENCE=2;FZ_KL_DIVERGENCE2=4;FZ_REF=13;FZ_RL=13;FZ_LL=14;FZ_RU_COUNTS=13,13;FZ_SCORE=1;FZ_TRF_SCORE=26;FLANKSEQ=GAAAAAAAAA[A]AAAGAAGGAA;MDUST;LOBSTR |
| 20 17548608 . AAAAG A . PASS CENTERS=ox1;NCENTERS=1;EX_MOTIF=AAAG;EX_MLEN=4;EX_RU=AAAG;EX_BASIS=AG;EX_BLEN=2;EX_REPEAT_TRACT=17548609,17548612;EX_COMP=100,0,0,0;EX_ENTROPY=0;EX_ENTROPY2=0;EX_KL_DIVERGENCE=2;EX_KL_DIVERGENCE2=4;EX_REF=0.75;EX_RL=4;EX_LL=4;EX_RU_COUNTS=0,1;EX_SCORE=0.75;EX_TRF_SCORE=-1;FZ_MOTIF=A;FZ_MLEN=1;FZ_RU=A;FZ_BASIS=A;FZ_BLEN=1;FZ_REPEAT_TRACT=17548599,17548611;FZ_COMP=100,0,0,0;FZ_ENTROPY=0;FZ_ENTROPY2=0;FZ_KL_DIVERGENCE=2;FZ_KL_DIVERGENCE2=4;FZ_REF=13;FZ_RL=13;FZ_LL=13;FZ_RU_COUNTS=13,13;FZ_SCORE=1;FZ_TRF_SCORE=26;FLANKSEQ=GAAAAAAAAA[AAAAG]AAGGAACTAC;MDUST;LOBSTR;OLD_VARIANT=20:17548598:GAAAAAAAAAAAAAG/GAAAAAAAAAA | | 20 17548608 . AAAAG A . PASS CENTERS=ox1;NCENTERS=1;EX_MOTIF=AAAG;EX_MLEN=4;EX_RU=AAAG;EX_BASIS=AG;EX_BLEN=2;EX_REPEAT_TRACT=17548609,17548612;EX_COMP=100,0,0,0;EX_ENTROPY=0;EX_ENTROPY2=0;EX_KL_DIVERGENCE=2;EX_KL_DIVERGENCE2=4;EX_REF=0.75;EX_RL=4;EX_LL=4;EX_RU_COUNTS=0,1;EX_SCORE=0.75;EX_TRF_SCORE=-1;FZ_MOTIF=A;FZ_MLEN=1;FZ_RU=A;FZ_BASIS=A;FZ_BLEN=1;FZ_REPEAT_TRACT=17548599,17548611;FZ_COMP=100,0,0,0;FZ_ENTROPY=0;FZ_ENTROPY2=0;FZ_KL_DIVERGENCE=2;FZ_KL_DIVERGENCE2=4;FZ_REF=13;FZ_RL=13;FZ_LL=13;FZ_RU_COUNTS=13,13;FZ_SCORE=1;FZ_TRF_SCORE=26;FLANKSEQ=GAAAAAAAAA[AAAAG]AAGGAACTAC;MDUST;LOBSTR;OLD_VARIANT=20:17548598:GAAAAAAAAAAAAAG/GAAAAAAAAAA |
− |
| |
| </div> | | </div> |
− | | + | OUTPUT |
− | CHROM POS REF ALT N_ALLELE EX_RL FZ_RL MDUST LOBSTR VNTRSEEK RMSK EX_REPEAT_TRACT_1 EX_REPEAT_TRACT_2 | + | ====== |
− | 20 17548608 A AC 2 2 13 1 1 0 0 17548608 17548608 | + | CHROM POS REF ALT N_ALLELE PASS EX_RL FZ_RL MDUST LOBSTR VNTRSEEK RMSK EX_REPEAT_TRACT_1 EX_REPEAT_TRACT_2 |
− | 20 17548608 AAAAG A 2 4 13 1 1 0 0 17548609 17548609 | + | 20 17548608 A AC 2 1 2 13 1 1 0 0 17548608 17548608 |
| + | 20 17548608 AAAAG A 2 1 4 13 1 1 0 0 17548609 17548609 |
| | | |
| <div class="mw-collapsible-content"> | | <div class="mw-collapsible-content"> |
| usage : vt info2tab [options] <in.vcf> | | usage : vt info2tab [options] <in.vcf> |
| | | |
− | options : -v print variant CHROM,POS,REF,ALT,N_ALLELE [false] | + | options : -d debug [false] |
− | -d debug [false]
| |
| -f filter expression [] | | -f filter expression [] |
− | -t list of info tags to be extracted [] | + | -u list of filter tags to be extracted []-t list of info tags to be extracted [] |
| -o output tab delimited file [-] | | -o output tab delimited file [-] |
| -I file containing list of intervals [] | | -I file containing list of intervals [] |
Line 1,053: |
Line 1,053: |
| -i Intervals | | -i Intervals |
| -? displays help | | -? displays help |
| + | </div> |
| + | </div> |
| + | |
| + | === Profile Mendelian Errors === |
| + | |
| + | Profile Mendelian errors |
| + | |
| + | <div class=" mw-collapsible mw-collapsed"> |
| + | #profile mendelian errors found in vt.genotypes.bcf, generate [[media:mendel.pdf|tables]] in the directory mendel, requires pdflatex. |
| + | vt profile_mendelian vt.genotypes.bcf -p trios.ped -x mendel |
| + | |
| + | pedigree file format is described in [[Vt#Pedigree File|here]]. |
| + | |
| + | #this is a sample output for mendelian error profiling. |
| + | #R and A stand for reference and alternate allele respectively. |
| + | #Error% - mendelian error (confounded with de novo mutation) |
| + | #HomHet - Homozygous-Heterozygous genotype ratios |
| + | #Het% - proportion of hets |
| + | Mendelian Errors <br> |
| + | Father Mother R/R R/A A/A Error(%) HomHet Het(%) |
| + | R/R R/R 14889 210 38 1.64 nan nan |
| + | R/R R/A 3403 3497 74 1.06 0.97 50.68 |
| + | R/R A/A 176 1482 155 18.26 nan nan |
| + | R/A R/R 3665 3652 68 0.92 1.00 49.91 |
| + | R/A R/A 1015 3151 990 0.00 0.64 61.11 |
| + | R/A A/A 43 1300 1401 1.57 1.08 48.13 |
| + | A/A R/R 172 1365 147 18.94 nan nan |
| + | A/A R/A 47 1164 1183 1.96 1.02 49.60 |
| + | A/A A/A 20 78 5637 1.71 nan nan <br> |
| + | Parental R/R R/A A/A Error(%) HomHet Het(%) |
| + | R/R R/R 14889 210 38 1.64 nan nan |
| + | R/R R/A 7068 7149 142 0.99 0.99 50.28 |
| + | R/R A/A 348 2847 302 18.59 nan nan |
| + | R/A R/A 1015 3151 990 0.00 0.64 61.11 |
| + | R/A A/A 90 2464 2584 1.75 1.05 48.81 |
| + | A/A A/A 20 78 5637 1.71 nan nan <br> |
| + | Parental R/R R/A A/A Error(%) HomHet Het(%) |
| + | HOM HOM 14909 288 5675 1.66 nan nan |
| + | HOM HET 7158 9613 2726 1.19 1.00 49.90 |
| + | HET HET 1015 3151 990 0.00 0.64 61.11 |
| + | HOMREF HOMALT 348 2847 302 18.59 nan nan <br> |
| + | total mendelian error : 2.505% |
| + | no. of trios : 2 |
| + | no. of variants : 25346 |
| + | |
| + | <div class="mw-collapsible-content"> |
| + | profile_mendelian v0.5 |
| + | |
| + | usage : vt profile_mendelian [options] <in.vcf> |
| + | |
| + | options : -q minimum genotype quality |
| + | -d minimum depth |
| + | -r reference sequence fasta file [] |
| + | -x output latex directory [] |
| + | -p pedigree file |
| + | -I file containing list of intervals [] |
| + | -i intervals |
| + | -? displays help |
| </div> | | </div> |
| </div> | | </div> |
Line 1,252: |
Line 1,310: |
| </div> | | </div> |
| | | |
− | === Profile Mendelian Errors === | + | === Profile NA12878 === |
| | | |
| Profile Mendelian errors | | Profile Mendelian errors |
| | | |
| <div class=" mw-collapsible mw-collapsed"> | | <div class=" mw-collapsible mw-collapsed"> |
− | #profile mendelian errors found in vt.genotypes.bcf, generate [[media:mendel.pdf|tables]] in the directory mendel, requires pdflatex. | + | #profile NA12878 overlap with broad knowledgebase and illumina platinum genomes for the file vt.genotypes.bcf for chromosome 20. |
− | vt profile_mendelian vt.genotypes.bcf -p trios.ped -x mendel | + | vt profile_na12878 vt.genotypes.bcf -g na12878.reference.txt -r hs37d5.fa -i 20 |
− | | |
− | pedigree file format is described in [http://csg.sph.umich.edu//abecasis/merlin/tour/input_files.html here]
| |
| | | |
| #this is a sample output for mendelian error profiling. | | #this is a sample output for mendelian error profiling. |
Line 1,267: |
Line 1,323: |
| #HomHet - Homozygous-Heterozygous genotype ratios | | #HomHet - Homozygous-Heterozygous genotype ratios |
| #Het% - proportion of hets | | #Het% - proportion of hets |
− | Mendelian Errors <br>
| + | data set |
− | Father Mother R/R R/A A/A Error(%) HomHet Het(%)
| + | No Indels : 27770 [0.94] |
− | R/R R/R 14889 210 38 1.64 nan nan
| + | FS/NFS : 0.26 (8/23) <br> |
− | R/R R/A 3403 3497 74 1.06 0.97 50.68
| + | broad.kb |
− | R/R A/A 176 1482 155 18.26 nan nan
| + | A-B 13071 [1.19] |
− | R/A R/R 3665 3652 68 0.92 1.00 49.91
| + | A&B 14699 [0.76] |
− | R/A R/A 1015 3151 990 0.00 0.64 61.11
| + | B-A 21546 [0.62] |
− | R/A A/A 43 1300 1401 1.57 1.08 48.13
| + | Precision 52.9% |
− | A/A R/R 172 1365 147 18.94 nan nan
| + | Sensitivity 40.6% <br> |
− | A/A R/A 47 1164 1183 1.96 1.02 49.60
| + | illumina.platinum |
− | A/A A/A 20 78 5637 1.71 nan nan <br>
| + | A-B 17952 [0.88] |
− | Parental R/R R/A A/A Error(%) HomHet Het(%)
| + | A&B 9818 [1.07] |
− | R/R R/R 14889 210 38 1.64 nan nan
| + | B-A 2418 [0.88] |
− | R/R R/A 7068 7149 142 0.99 0.99 50.28
| + | Precision 35.4% |
− | R/R A/A 348 2847 302 18.59 nan nan
| + | Sensitivity 80.2% <br> |
− | R/A R/A 1015 3151 990 0.00 0.64 61.11
| + | broad.kb |
− | R/A A/A 90 2464 2584 1.75 1.05 48.81
| + | R/R R/A A/A ./. |
− | A/A A/A 20 78 5637 1.71 nan nan <br> | + | R/R 346 145 3 5473 |
− | Parental R/R R/A A/A Error(%) HomHet Het(%) | + | R/A 3 4133 9 758 |
− | HOM HOM 14909 288 5675 1.66 nan nan | + | A/A 2 136 2186 956 |
− | HOM HET 7158 9613 2726 1.19 1.00 49.90 | + | ./. 2 139 86 322 <br> |
− | HET HET 1015 3151 990 0.00 0.64 61.11 | + | Total genotype pairs : 6963 |
− | HOMREF HOMALT 348 2847 302 18.59 nan nan <br> | + | Concordance : 95.72% (6665) |
− | total mendelian error : 2.505%
| + | Discordance : 4.28% (298) <br> |
− | no. of trios : 2
| + | illumina.platinum |
− | no. of variants : 25346
| + | R/R R/A A/A ./. |
| + | R/R 1768 85 2 0 |
| + | R/A 10 4479 14 0 |
| + | A/A 13 180 3028 0 |
| + | ./. 71 98 70 0<br> |
| + | Total genotype pairs : 9579 |
| + | Concordance : 96.83% (9275) |
| + | Discordance : 3.17% (304) |
| + | |
| + | # This file contains information on how to process reference data sets. |
| + | # |
| + | # dataset - name of data set, this label will be printed. |
| + | # type - True Positives (TP) and False Positives (FP) |
| + | # overlap percentages labeled as (Precision, Sensitivity) and (False Discovery Rate, Type I Error) respectively |
| + | # - annotation |
| + | # file is used for GENCODE annotation of frame shift and non frame shift Indels |
| + | # filter - filter applied to variants for this particular data set |
| + | # path - path of indexed BCF file |
| + | #dataset type filter path |
| + | broad.kb TP PASS /net/fantasia/home/atks/dev/vt/bundle/public/grch37/broad.kb.241365variants.genotypes.bcf |
| + | illumina.platinum TP PASS /net/fantasia/home/atks/dev/vt/bundle/public/grch37/NA12878.illumina.platinum.5284448variants.genotypes.bcf |
| + | #gencode.v19 annotation . /net/fantasia/home/atks/dev/vt/bundle/public/grch37/gencode.v19.annotation.gtf.gz |
| + | <div class="mw-collapsible-content"> |
| + | profile_na12878 v0.5 |
| + | |
| + | usage : vt profile_na12878 [options] <in.vcf> |
| + | |
| + | options : -g file containing list of reference datasets [] |
| + | -I file containing list of intervals [] |
| + | -i intervals [] |
| + | -r reference sequence fasta file [] |
| + | -? displays help |
| + | </div> |
| + | </div> |
| | | |
| = Variant Calling = | | = Variant Calling = |
Line 1,638: |
Line 1,727: |
| </div> | | </div> |
| </div> | | </div> |
| + | |
| + | = Pedigree File = |
| + | |
| + | vt understands an augmented version introduced by [mailto:hmkang@umich.edu Hyun] of the PED described by [http://zzz.bwh.harvard.edu/plink/data.shtml#ped plink]. |
| + | The pedigree file format is as follows with the following mandatory fields: |
| + | |
| + | {| class="wikitable" |
| + | |- |
| + | ! scope="col"| Field |
| + | ! scope="col"| Description |
| + | ! scope="col"| Valid Values |
| + | ! scope="col"| Missing Values |
| + | |- |
| + | |Family ID<br> |
| + | Individual ID<br> |
| + | Paternal ID<br> |
| + | Maternal ID<br> |
| + | Sex<br> |
| + | Phenotype |
| + | |ID of this family <br> |
| + | ID(s) of this individual (comma separated) <br> |
| + | ID of the father <br> |
| + | ID of the mother <br> |
| + | Sex of the individual<br> |
| + | Phenotype |
| + | |[A-Za-z0-9_]+<br> |
| + | [A-Za-z0-9_]+(,[A-Za-z0-9_]+)* <br> |
| + | [A-Za-z0-9_]+ <br> |
| + | [A-Za-z0-9_]+<br> |
| + | 1=male, 2=female, other, male, female<br> |
| + | [A-Za-z0-9_]+ |
| + | | 0 <br> |
| + | cannot be missing <br> |
| + | 0 <br> |
| + | 0 <br> |
| + | other<br> |
| + | -9 |
| + | |} |
| + | |
| + | Examples: |
| + | |
| + | ceu NA12878 NA12891 NA12892 female -9 |
| + | yri NA19240 NA19239 NA19238 female -9 |
| + | |
| + | ceu NA12878 NA12891 NA12892 2 -9 |
| + | yri NA19240 NA19239 NA19238 2 -9 |
| + | |
| + | #allows tools like profile_mendelian to detect duplicates and check for concordance |
| + | ceu NA12878,NA12878A NA12891 NA12892 female case |
| + | yri NA19240 NA19239 NA19238 female control |
| + | |
| + | #allows tools like profile_mendelian to detect duplicates and check for concordance |
| + | ceu NA12412 0 0 female case |
| + | yri NA19650 0 0 female control |
| | | |
| = Resource Bundle = | | = Resource Bundle = |