Line 1: |
Line 1: |
| + | '''NOTE: Not all validation Criteria has been listed here, and not all listed here have been implemented (Implemented checks are marked green.)''' |
| + | |
| === SAM Header Validation Rules === | | === SAM Header Validation Rules === |
| TODO | | TODO |
Line 13: |
Line 15: |
| |- | | |- |
| | All Required Fields are set | | | All Required Fields are set |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | If HD line is there, VN is also there. |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | HD/VN is not in valid format /^[0-9]+\.[0-9]+$/ |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | HD/SO is a valid value (unsorted, queryname, coordinate) |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | SQ/SN all SQ lines have a unique SN field |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | SQ/LN is in the range [1, (2^29) -1] |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | SQ/LN is not a number |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | RG/ID all RG lines have a unique ID field |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | RG/PL is a valid value (ILLUMINA, SOLID, LS454, HELICOS, PACBIO) |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | Header has X-lines or fewer (or a max number of SQ lines (this was a problem once of a file with a crazy number of header lines) |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 37: |
Line 93: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | QNAME does not contain [ \t\n\r] | + | | QNAME is valid: [!-?A-~] (printable characters minus space and '@') '''This is a new regular expression''' |
− | |style="background-color:green;"| | + | |style="background-color:red;"| |
− | |style="background-color:green;"| | + | |style="background-color:red;"| |
− | |style="background-color:green;"| | + | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
Line 50: |
Line 106: |
| |- | | |- |
| | FLAG is [0, (2^16)-1] | | | FLAG is [0, (2^16)-1] |
− | |style="background-color:green;"| | + | |style="background-color:green;"| Parse Error since it will be written into a 16 bit field. |
| |style="background-color:grey;"| N/A: only a 16 bit field | | |style="background-color:grey;"| N/A: only a 16 bit field |
| |style="background-color:green;"| | | |style="background-color:green;"| |
Line 81: |
Line 137: |
| | POS is an integer [0-9]+ | | | POS is an integer [0-9]+ |
| |style="background-color:green;"| | | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| |- | | |- |
| | POS is [0, (2^29)-1] | | | POS is [0, (2^29)-1] |
| + | |style="background-color:green;"| Parse Error if it can't fit in the 32 bit field, other out of range is a validation error. |
| |style="background-color:red;"| | | |style="background-color:red;"| |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"|
| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
| | MAPQ is an integer [0-9]+ | | | MAPQ is an integer [0-9]+ |
| |style="background-color:green;"| | | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:grey;"| N/A: just interpret the bits as an int. |
| |- | | |- |
| | MAPQ is [0, (2^8)-1] | | | MAPQ is [0, (2^8)-1] |
| + | |style="background-color:green;"| Parse Error since it will be written into an 8 bit field. |
| + | |style="background-color:grey;"| N/A: only a 8 bit field |
| + | |style="background-color:green;"| |
| + | |style="background-color:grey;"| N/A: only a 8 bit field |
| + | |- |
| + | | <nowiki>CIGAR ([0-9]+[MIDNSHP])+|\*</nowiki> |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 103: |
Line 165: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | <nowiki>CIGAR ([0-9]+[MIDNSHP])+|\*</nowiki> | + | | CIGAR string matches the length of SEQ if both are not "*" |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
Line 163: |
Line 225: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | If QUAL is not “*” it is the same length as SEQ. | + | | If QUAL and SEQ are not “*” they are the same length. |
− | |style="background-color:red;"| | + | |style="background-color:green;"| |
− | |style="background-color:red;"|
| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |style="background-color:green;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
Line 181: |
Line 243: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |- | | |- |
− | | VTYPE is [AifZH] for SAM and [AcCsSiIfZH] | + | | VTYPE is [AifZH] for SAM and [AcCsSiIfZH] for BAM |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
Line 222: |
Line 284: |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| |style="background-color:red;"| | | |style="background-color:red;"| |
| + | |- |
| + | | For TAG = E2, length should be the same as the Read Length |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | For TAG = E2, each base should be different than the read Base (unless 'N') |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| + | | For TAG = U2, length should be the same as the Read Length |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |style="background-color:red;"| |
| + | |- |
| |} | | |} |
| | | |
Line 229: |
Line 310: |
| | | |
| Consider may want to validate the cigar string against the read length... | | Consider may want to validate the cigar string against the read length... |
− |
| |
| | | |
| == Other Read Validation == | | == Other Read Validation == |