Line 1: |
Line 1: |
| + | '''NOTE: Not all validation Criteria has been listed here, and not all listed here have been implemented (Implemented checks are marked green.)''' |
| + | |
| === SAM Header Validation Rules === | | === SAM Header Validation Rules === |
| TODO | | TODO |
Line 13: |
Line 15: |
| |- | | |- |
| | All Required Fields are set | | | All Required Fields are set |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | If HD line is there, VN is also there. |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | HD/VN is not in valid format /^[0-9]+\.[0-9]+$/ |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | HD/SO is a valid value (unsorted, queryname, coordinate) |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | SQ/SN all SQ lines have a unique SN field |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | SQ/LN is in the range [1, (2^29) -1] |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | SQ/LN is not a number |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | RG/ID all RG lines have a unique ID field |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | RG/PL is a valid value (ILLUMINA, SOLID, LS454, HELICOS, PACBIO) |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | Header has X-lines or fewer (or a max number of SQ lines (this was a problem once of a file with a crazy number of header lines) |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 37: |
Line 93: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | QNAME does not contain [ \t\n\r] | + | | QNAME is valid: [!-?A-~] (printable characters minus space and '@') '''This is a new regular expression''' |
− | |style="background-color:green;"| | + | |style="background-color:red;"| |
− | |style="background-color:green;"| | + | |style="background-color:red;"| |
− | |style="background-color:green;"| | + | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
Line 47: |
Line 103: |
| |style="background-color:grey;"| N/A: just interpret the bits as an int. | | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| |style="background-color:green;"| | | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| |- | | |- |
| | FLAG is [0, (2^16)-1] | | | FLAG is [0, (2^16)-1] |
− | |style="background-color:green;"| | + | |style="background-color:green;"| Parse Error since it will be written into a 16 bit field. |
| |style="background-color:grey;"| N/A: only a 16 bit field | | |style="background-color:grey;"| N/A: only a 16 bit field |
| |style="background-color:green;"| | | |style="background-color:green;"| |
Line 56: |
Line 112: |
| |- | | |- |
| | RNAME does not contain [ \t\n\r@=] | | | RNAME does not contain [ \t\n\r@=] |
| + | |style="background-color:green;"| |
| + | |style="background-color:green;"| |
| + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |- |
| + | | RNAME is found in an SQ header record if there are any SQs in the header. |
| + | |style="background-color:green;"| |
| + | |style="background-color:green;"| |
| + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |- |
| + | | Reference Name length does not match specified length. |
| + | |style="background-color:grey;"| N/A: reference name length is in BAM format only |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |style="background-color:grey;"| N/A: reference name length is in BAM format only |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | POS is an integer [0-9]+ | + | | Reference ID is in range of the number of references |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: rID is in BAM format only |
− | |style="background-color:red;"|
| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |style="background-color:grey;"| N/A: rID is in BAM format only |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |- |
| + | | POS is an integer [0-9]+ |
| + | |style="background-color:green;"| |
| + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| + | |style="background-color:green;"| |
| + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| |- | | |- |
| | POS is [0, (2^29)-1] | | | POS is [0, (2^29)-1] |
| + | |style="background-color:green;"| Parse Error if it can't fit in the 32 bit field, other out of range is a validation error. |
| |style="background-color:red;"| | | |style="background-color:red;"| |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"|
| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
| | MAPQ is an integer [0-9]+ | | | MAPQ is an integer [0-9]+ |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| |- | | |- |
| | MAPQ is [0, (2^8)-1] | | | MAPQ is [0, (2^8)-1] |
| + | |style="background-color:green;"| Parse Error since it will be written into an 8 bit field. |
| + | |style="background-color:grey;"| N/A: only a 8 bit field |
| + | |style="background-color:green;"| |
| + | |style="background-color:grey;"| N/A: only a 8 bit field |
| + | |- |
| + | | <nowiki>CIGAR ([0-9]+[MIDNSHP])+|\*</nowiki> |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 85: |
Line 165: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | <nowiki>CIGAR ([0-9]+[MIDNSHP])+|\*</nowiki> | + | | CIGAR string matches the length of SEQ if both are not "*" |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
Line 104: |
Line 184: |
| |- | | |- |
| | MPOS is an integer [0-9]+ | | | MPOS is an integer [0-9]+ |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 116: |
Line 196: |
| |- | | |- |
| | ISIZE is an integer -?[0-9]+ | | | ISIZE is an integer -?[0-9]+ |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 145: |
Line 225: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | If QUAL is not “*” it is the same length as SEQ. | + | | If QUAL and SEQ are not “*” they are the same length. |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"|
| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
Line 163: |
Line 243: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | VTYPE is [AifZH] for SAM and [AcCsSiIfZH] | + | | VTYPE is [AifZH] for SAM and [AcCsSiIfZH] for BAM |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 204: |
Line 284: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |- |
| + | | For TAG = E2, length should be the same as the Read Length |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | For TAG = E2, each base should be different than the read Base (unless 'N') |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | For TAG = U2, length should be the same as the Read Length |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| |} | | |} |
| | | |
Line 211: |
Line 310: |
| | | |
| Consider may want to validate the cigar string against the read length... | | Consider may want to validate the cigar string against the read length... |
− |
| |
| | | |
| == Other Read Validation == | | == Other Read Validation == |