Difference between revisions of "BamUtil: stats"
(add that deletions/skips are excluded from depth/q20 bases) |
|||
Line 87: | Line 87: | ||
{|border=1 | {|border=1 | ||
− | ! Field !! Description !!style="width: 80px"| Excludes Duplicates, QC Failures !!style="width: 80px"| Excludes Unmapped !!style="width: 80px"| Excludes MapQual = 255 !!style="width: 80px"| Excludes Below Min MapQual | + | ! Field !! Description !!style="width: 80px"| Excludes Duplicates, QC Failures !!style="width: 80px"| Excludes Unmapped !!style="width: 80px"| Excludes MapQual = 255 !!style="width: 80px"| Excludes Below Min MapQual !!style="width: 80px"| Excludes CIGAR Deletions, Skips |
|- | |- | ||
| chrom || Chromosome/reference name string from the SAM/BAM | | chrom || Chromosome/reference name string from the SAM/BAM | ||
Line 95: | Line 95: | ||
| chromEnd || 0-based end position (always 1 greater than start and not included in this region) | | chromEnd || 0-based end position (always 1 greater than start and not included in this region) | ||
|- | |- | ||
− | | Depth || # of reads that are mapped with acceptable Mapping Quality, and are not duplicates or QC failures || align="center"|X || align="center"|X || || align="center"|X | + | | Depth || # of reads that are mapped with acceptable Mapping Quality, and are not duplicates or QC failures || align="center"|X || align="center"|X || || align="center"|X || align="center"|X |
|- | |- | ||
− | | Q20Bases || # of bases at this position with a base quality (from the read) of Q20 or higher || align="center"|X || align="center"|X || || align="center"|X | + | | Q20Bases || # of bases at this position with a base quality (from the read) of Q20 or higher || align="center"|X || align="center"|X || || align="center"|X || align="center"|X |
|- | |- | ||
− | | Q20BasesPct(%) || Q20Bases / Depth || align="center"|X || align="center"|X || || align="center"|X | + | | Q20BasesPct(%) || Q20Bases / Depth || align="center"|X || align="center"|X || || align="center"|X || align="center"|X |
|- | |- | ||
− | | TotalReads || # of reads that span this position || || || || | + | | TotalReads || # of reads that span this position || || || || || |
|- | |- | ||
− | | MappedBases || # of reads marked mapped in the flag || align="center"|X || align="center"|X || || | + | | MappedBases || # of reads marked mapped in the flag || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | MappingRate(%) || MappedBases / TotalReads || align="center"|X || align="center"|X || || | + | | MappingRate(%) || MappedBases / TotalReads || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | MapRate_MQPass(%) || # of reads that have a Mapping Quality >= a minimum Mapping Quality / TotalReads || align="center"|X || align="center"|X || || | + | | MapRate_MQPass(%) || # of reads that have a Mapping Quality >= a minimum Mapping Quality / TotalReads || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | ZeroMapQual(%) || # of reads that have a Mapping Quality of 0 / TotalReads || align="center"|X || align="center"|X || || | + | | ZeroMapQual(%) || # of reads that have a Mapping Quality of 0 / TotalReads || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | MapQual<10(%) || # of reads that have a Mapping Quality < 10 / TotalReads || align="center"|X || align="center"|X || || | + | | MapQual<10(%) || # of reads that have a Mapping Quality < 10 / TotalReads || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | PairedReads(%) || # of reads marked paired in the flag / TotalReads || align="center"|X || align="center"|X || || | + | | PairedReads(%) || # of reads marked paired in the flag / TotalReads || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | ProperPaired(%) || # of reads marked paired AND proper paired in the flag / TotalReads || align="center"|X || align="center"|X || || | + | | ProperPaired(%) || # of reads marked paired AND proper paired in the flag / TotalReads || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | DupRate(%) || # of reads marked duplicate in the flag / TotalReads || || || || | + | | DupRate(%) || # of reads marked duplicate in the flag / TotalReads || || || || || |
|- | |- | ||
− | | QCFailRate(%) || # of reads marked QC failure in the flag / TotalReads || || || || | + | | QCFailRate(%) || # of reads marked QC failure in the flag / TotalReads || || || || || |
|- | |- | ||
− | | AverageMapQuality || sum of included mapping qualities / AverageMapQualCount || align="center"|X || align="center"|X || align="center"|X || | + | | AverageMapQuality || sum of included mapping qualities / AverageMapQualCount || align="center"|X || align="center"|X || align="center"|X || || |
|- | |- | ||
− | | AverageMapQualCount || # of mapping qualities in AverageMapQuality || align="center"|X || align="center"|X || align="center"|X || | + | | AverageMapQualCount || # of mapping qualities in AverageMapQuality || align="center"|X || align="center"|X || align="center"|X || || |
|- | |- | ||
|} | |} | ||
Line 148: | Line 148: | ||
Order/Descriptions: | Order/Descriptions: | ||
{|border=1 | {|border=1 | ||
− | ! Field !! Description !!style="width: 80px"| Excludes Duplicates, QC Failures !!style="width: 80px"| Excludes Unmapped !!style="width: 80px"| Excludes MapQual = 255 !!style="width: 80px"| Excludes Below Min MapQual | + | ! Field !! Description !!style="width: 80px"| Excludes Duplicates, QC Failures !!style="width: 80px"| Excludes Unmapped !!style="width: 80px"| Excludes MapQual = 255 !!style="width: 80px"| Excludes Below Min MapQual !!style="width: 80px"| Excludes CIGAR Deletions, Skips |
|- | |- | ||
| chrom || Chromosome/reference name string from the SAM/BAM | | chrom || Chromosome/reference name string from the SAM/BAM | ||
Line 156: | Line 156: | ||
| chromEnd || 0-based end position (always 1 greater than start and not included in this region) | | chromEnd || 0-based end position (always 1 greater than start and not included in this region) | ||
|- | |- | ||
− | | TotalReads || # of reads that span this position || || || || | + | | TotalReads || # of reads that span this position || || || || || |
|- | |- | ||
− | | Dups || # of reads marked duplicate in the flag || || || || | + | | Dups || # of reads marked duplicate in the flag || || || || || |
|- | |- | ||
− | | QCFail || # of reads marked QC failure in the flag || || || || | + | | QCFail || # of reads marked QC failure in the flag || || || || || |
|- | |- | ||
− | | Mapped || # of reads marked mapped in the flag || align="center"|X || align="center"|X || || | + | | Mapped || # of reads marked mapped in the flag || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | Paired || # of reads marked paired in the flag || align="center"|X || align="center"|X || || | + | | Paired || # of reads marked paired in the flag || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | ProperPaired || # of reads marked paired AND proper paired in the flag || align="center"|X || align="center"|X || || | + | | ProperPaired || # of reads marked paired AND proper paired in the flag || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | ZeroMapQual || # of reads that have a Mapping Quality of 0 || align="center"|X || align="center"|X || || | + | | ZeroMapQual || # of reads that have a Mapping Quality of 0 || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | MapQual<10(%) || # of reads that have a Mapping Quality < 10 || align="center"|X || align="center"|X || || | + | | MapQual<10(%) || # of reads that have a Mapping Quality < 10 || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | MapQual255 || # of reads that have a Mapping Quality = 255 || align="center"|X || align="center"|X || || | + | | MapQual255 || # of reads that have a Mapping Quality = 255 || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | PassMapQual || # of reads that have a Mapping Quality >= a minimum Mapping Quality || align="center"|X || align="center"|X || || | + | | PassMapQual || # of reads that have a Mapping Quality >= a minimum Mapping Quality || align="center"|X || align="center"|X || || || |
|- | |- | ||
− | | AverageMapQuality || sum of included mapping qualities / AverageMapQualCount || align="center"|X || align="center"|X || align="center"|X || | + | | AverageMapQuality || sum of included mapping qualities / AverageMapQualCount || align="center"|X || align="center"|X || align="center"|X || || |
|- | |- | ||
| AverageMapQualCount || # of mapping qualities in AverageMapQuality || align="center"|X || align="center"|X || align="center"|X || | | AverageMapQualCount || # of mapping qualities in AverageMapQuality || align="center"|X || align="center"|X || align="center"|X || | ||
+ | |- || | ||
+ | | Depth || # of reads that are mapped with acceptable Mapping Quality, and are not duplicates or QC failures || align="center"|X || align="center"|X || || align="center"|X || align="center"|X | ||
|- | |- | ||
− | + | | Q20Bases || # of bases at this position with a base quality (from the read) of Q20 or higher || align="center"|X || align="center"|X || || align="center"|X || align="center"|X | |
− | |||
− | | Q20Bases || # of bases at this position with a base quality (from the read) of Q20 or higher || align="center"|X || align="center"|X || || align="center"|X | ||
|- | |- | ||
|} | |} |
Revision as of 16:14, 7 October 2011
Overview of the stats
function of bamUtil
The stats
option on the BamUtil executable generates the specified statistics on a SAM/BAM file.
Parameters
Required Parameters: --in : the SAM/BAM file to calculate stats for Types of Statistics that can be generated: --basic : Turn on basic statistic generation --qual : Generate a count for each quality (displayed as non-phred quality) --phred : Generate a count for each quality (displayed as phred quality) --baseQC : Write per base statistics to the specified file. Optional Parameters: --maxNumReads : Maximum number of reads to process Defaults to -1 to indicate all reads. --unmapped : Only process unmapped reads (requires a bamIndex file) --bamIndex : The path/name of the bam index file (if required and not specified, uses the --in value + ".bai") --regionList : File containing the region list chr<tab>start_pos<tab>end<pos>. Positions are 0 based and the end_pos is not included in the region. Uses bamIndex. --minMapQual : The minimum mapping quality for filtering reads in the baseQC stats. --dbsnp : The dbSnp file of positions to exclude from baseQC analysis. --noeof : Do not expect an EOF block on a bam file. --params : Print the parameter settings
For all types of statistics, the bam file used is specified by --in
.
The optional parameters are also used for all types of statistics.
Usage:
./bam stats --in <inputFile> [--basic] [--qual] [--phred] [--baseQC <outputFileName>] [--maxNumReads <maxNum>] [--unmapped] [--bamIndex <bamIndexFile>] [--regionList <regFileName>] [--minMapQual <minMapQ>] [--dbsnp <dbsnpFile>] [--noeof] [--params]
Types of Statistics
Basic
Prints summary statistics for the file:
- TotalReads - # of reads that are in the file
- MappedReads - # of reads marked mapped in the flag
- PairedReads - # of reads marked paired in the flag
- ProperPair - # of reads marked paired AND proper paired in the flag
- DuplicateReads - # of reads marked duplicate in the flag
- QCFailureReads - # of reads marked QC failure in the flag
- MappingRate(%) - # of reads marked mapped in the flag / TotalReads
- PairedReads(%) - # of reads marked paired in the flag / TotalReads
- ProperPair(%) - # of reads marked paired AND proper paired in the flag / TotalReads
- DupRate(%) - # of reads marked duplicate in the flag / TotalReads
- QCFailRate(%) - # of reads marked QC failure in the flag / TotalReads
- TotalBases - # of bases in all reads
- BasesInMappedReads - # of bases in reads marked mapped in the flag
Qual/Phred
Prints a count of the number of times each quality value appears in the file.
phred
Displays Quality as phred integers [0-93]qual
Displays Quality as non-phred integers (phred + 33) [33-126]
BaseQC
The baseQC
option generates the following statistics:
A read spans a position if the read starts at or before the position, ends at or after the position and the position is not a clip. CIGAR operations allowed for the position are M/X/=/D/N. If the CIGAR is '*', only numbers for the specified reference position are incremented.
Currently there is no special logic to exclude positions/reads where the reference base is 'N' or the read base is 'N'.
BaseQC Output
There are two output options for BaseQC.
Percentage-Based Output Format
Order/Descriptions:
Field | Description | Excludes Duplicates, QC Failures | Excludes Unmapped | Excludes MapQual = 255 | Excludes Below Min MapQual | Excludes CIGAR Deletions, Skips |
---|---|---|---|---|---|---|
chrom | Chromosome/reference name string from the SAM/BAM | |||||
chromStart | 0-based start position | |||||
chromEnd | 0-based end position (always 1 greater than start and not included in this region) | |||||
Depth | # of reads that are mapped with acceptable Mapping Quality, and are not duplicates or QC failures | X | X | X | X | |
Q20Bases | # of bases at this position with a base quality (from the read) of Q20 or higher | X | X | X | X | |
Q20BasesPct(%) | Q20Bases / Depth | X | X | X | X | |
TotalReads | # of reads that span this position | |||||
MappedBases | # of reads marked mapped in the flag | X | X | |||
MappingRate(%) | MappedBases / TotalReads | X | X | |||
MapRate_MQPass(%) | # of reads that have a Mapping Quality >= a minimum Mapping Quality / TotalReads | X | X | |||
ZeroMapQual(%) | # of reads that have a Mapping Quality of 0 / TotalReads | X | X | |||
MapQual<10(%) | # of reads that have a Mapping Quality < 10 / TotalReads | X | X | |||
PairedReads(%) | # of reads marked paired in the flag / TotalReads | X | X | |||
ProperPaired(%) | # of reads marked paired AND proper paired in the flag / TotalReads | X | X | |||
DupRate(%) | # of reads marked duplicate in the flag / TotalReads | |||||
QCFailRate(%) | # of reads marked QC failure in the flag / TotalReads | |||||
AverageMapQuality | sum of included mapping qualities / AverageMapQualCount | X | X | X | ||
AverageMapQualCount | # of mapping qualities in AverageMapQuality | X | X | X |
This output does not include a MapQual255 count.
Sample Output
chrom chromStart chromEnd Depth Q20Bases Q20BasesPct(%) TotalReads MappedBases MappingRate(%) MapRate_MQPass(%) ZeroMapQual(%) MapQual<10(%) PairedReads(%) ProperPaired(%) DupRate(%) QCFailRate(%) AverageMapQuality AverageMapQualCount 1 100 101 2 2 100.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 11.000 3 1 101 102 2 0 0.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 11.000 3 1 102 103 0 0 0.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 0.000 0 1 103 104 0 0 0.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 0.000 0 1 104 105 2 0 0.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 11.000 3 1 105 106 2 2 100.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 11.000 3 1 110 111 0 0 0.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 0.000 0 1 111 112 2 2 100.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 11.000 3 1 112 113 2 2 100.000 3 3 100.000 66.667 33.333 66.667 100.000 0.000 0.000 0.000 11.000 3 1 10012 10013 14 0 0.000 42 33 78.571 52.381 26.190 52.381 85.714 35.714 14.286 14.286 11.000 21 1 10013 10014 14 10 71.429 39 30 76.923 51.282 25.641 51.282 84.615 38.462 15.385 15.385 11.000 21 1 10023 10024 0 0 0.000 39 30 76.923 51.282 25.641 51.282 84.615 38.462 15.385 15.385 0.000 0 1 10024 10025 14 12 85.714 39 30 76.923 51.282 25.641 51.282 84.615 38.462 15.385 15.385 11.000 21
Count-Based Output Format
Order/Descriptions:
Field | Description | Excludes Duplicates, QC Failures | Excludes Unmapped | Excludes MapQual = 255 | Excludes Below Min MapQual | Excludes CIGAR Deletions, Skips |
---|---|---|---|---|---|---|
chrom | Chromosome/reference name string from the SAM/BAM | |||||
chromStart | 0-based start position | |||||
chromEnd | 0-based end position (always 1 greater than start and not included in this region) | |||||
TotalReads | # of reads that span this position | |||||
Dups | # of reads marked duplicate in the flag | |||||
QCFail | # of reads marked QC failure in the flag | |||||
Mapped | # of reads marked mapped in the flag | X | X | |||
Paired | # of reads marked paired in the flag | X | X | |||
ProperPaired | # of reads marked paired AND proper paired in the flag | X | X | |||
ZeroMapQual | # of reads that have a Mapping Quality of 0 | X | X | |||
MapQual<10(%) | # of reads that have a Mapping Quality < 10 | X | X | |||
MapQual255 | # of reads that have a Mapping Quality = 255 | X | X | |||
PassMapQual | # of reads that have a Mapping Quality >= a minimum Mapping Quality | X | X | |||
AverageMapQuality | sum of included mapping qualities / AverageMapQualCount | X | X | X | ||
AverageMapQualCount | # of mapping qualities in AverageMapQuality | X | X | X | ||
Depth | # of reads that are mapped with acceptable Mapping Quality, and are not duplicates or QC failures | X | X | X | X | |
Q20Bases | # of bases at this position with a base quality (from the read) of Q20 or higher | X | X | X | X |