Line 104: |
Line 104: |
| | | |
| === Usage Examples === | | === Usage Examples === |
− | ==== Simple Read/Write SAM/BAM files ====
| + | [[Sam Library Usage Examples]] |
− | The following example reads in a sam/bam file and writes it out as a sam/bam file. The file format of the input sam/bam is determined by the SamFile class based on reading the type from the file. The file format of the output sam/bam file is determined by the '''SamFile''' class based on the extension of the output file. A ".bam" extension indicates a BAM file. All other extensions indicate SAM files.
| |
− | | |
− | <source lang="cpp">
| |
− | #include "SamFile.h"
| |
− | #include "SamValidation.h"
| |
− | | |
− | int main(int argc, char ** argv)
| |
− | {
| |
− | // Check for the appropriate number of arguments.
| |
− | if(argc != 3)
| |
− | {
| |
− | printf("./bam <inputFile> <outputFile.sam/bam>\n");
| |
− | exit(-1);
| |
− | }
| |
− | | |
− | // Open the input file for reading.
| |
− | SamFile samIn;
| |
− | if(!samIn.OpenForRead(argv[1]))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Open the output file for writing.
| |
− | SamFile samOut;
| |
− | if(!samOut.OpenForWrite(argv[2]))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samOut.GetStatusMessage());
| |
− | return(samOut.GetStatus());
| |
− | }
| |
− | | |
− | // Read the sam header.
| |
− | SamFileHeader samHeader;
| |
− | if(!samIn.ReadHeader(samHeader))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Write the sam header.
| |
− | if(!samOut.WriteHeader(samHeader))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samOut.GetStatusMessage());
| |
− | return(samOut.GetStatus());
| |
− | }
| |
− | | |
− | SamRecord samRecord;
| |
− | SamValidationErrors samInvalidErrors;
| |
− | | |
− | // Set writeStatus to success. It will be changed
| |
− | // to the failure reason if any of the writes fail.
| |
− | SamStatus::Status writeStatus = SamStatus::SUCCESS;
| |
− | | |
− | // Keep reading records until ReadRecord returns false.
| |
− | while(samIn.ReadRecord(samHeader, samRecord))
| |
− | {
| |
− | // Successfully read a record from the file, so write it.
| |
− | if(!samOut.WriteRecord(samHeader, samRecord))
| |
− | {
| |
− | // Failed to write a record.
| |
− | fprintf(stderr, "%s\n", samOut.GetStatusMessage());
| |
− | writeStatus = samOut.GetStatus();
| |
− | }
| |
− | }
| |
− | | |
− | if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
| |
− | {
| |
− | // Failed to read a record.
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | }
| |
− | | |
− | std::cout << std::endl << "Number of records read = " <<
| |
− | samIn.GetCurrentRecordCount() << std::endl;
| |
− | std::cout << "Number of records written = " <<
| |
− | samOut.GetCurrentRecordCount() << std::endl;
| |
− | | |
− | if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
| |
− | {
| |
− | // Failed reading a record.
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Since the reads were successful, return the status based
| |
− | // on the status of the writes. If any failed, return
| |
− | // their failure status.
| |
− | return(writeStatus);
| |
− | }
| |
− | </source>
| |
− | | |
− | | |
− | ==== Simple Read/Write Indexed BAM Files ====
| |
− | This example reads in the inputFilename bam file and writes it back out section by section to the specified outputFilename, starting with section -1. It also prints a count of the number of records in each section.
| |
− | <source lang="cpp">
| |
− | #include "SamFile.h"
| |
− | #include "SamValidation.h"
| |
− | | |
− | int ReadIndexedBam(const char* inputFilename,
| |
− | const char* outputFilename,
| |
− | const char* indexFilename)
| |
− | {
| |
− | // Open the input file for reading.
| |
− | SamFile samIn;
| |
− | if(!samIn.OpenForRead(inputFilename))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Open the bam index file for reading.
| |
− | if(!samIn.ReadBamIndex(indexFilename))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Open the output file for writing.
| |
− | SamFile samOut;
| |
− | if(!samOut.OpenForWrite(outputFilename))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samOut.GetStatusMessage());
| |
− | return(samOut.GetStatus());
| |
− | }
| |
− | | |
− | // Read the sam header.
| |
− | SamFileHeader samHeader;
| |
− | if(!samIn.ReadHeader(samHeader))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Write the sam header.
| |
− | if(!samOut.WriteHeader(samHeader))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samOut.GetStatusMessage());
| |
− | return(samOut.GetStatus());
| |
− | }
| |
− | | |
− | SamRecord samRecord;
| |
− | SamValidationErrors samInvalidErrors;
| |
− | | |
− | // Loop through each Reference.
| |
− | for(int i = -1; i < 23; i++)
| |
− | {
| |
− | int numSectionRecords = 0;
| |
− | samIn.SetReadSection(i);
| |
− | // Keep reading records until they aren't more.
| |
− | while(samIn.ReadRecord(samHeader, samRecord))
| |
− | {
| |
− | numSectionRecords++;
| |
− | // Successfully read a record from the file, so write it.
| |
− | if(!samOut.WriteRecord(samHeader, samRecord))
| |
− | {
| |
− | // Failed to write a record.
| |
− | fprintf(stderr, "%s\n", samOut.GetStatusMessage());
| |
− | }
| |
− | }
| |
− | | |
− | if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
| |
− | {
| |
− | // Failed to read a record.
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | }
| |
− | | |
− | std::cout << "Reference ID " << i << " has " << numSectionRecords
| |
− | << " records" << std::endl;
| |
− | }
| |
− |
| |
− | std::cout << "Number of records = " << samIn.GetCurrentRecordCount() << std::endl;
| |
− |
| |
− | return(0);
| |
− | }
| |
− | </source>
| |
− | | |
− | ==== Validate SAM/BAM Files Continuing Processing After Allowed Errors ====
| |
− | This example demonstrates calling validation on a SAM/BAM file. It also demonstrates a more complicated read loop that continues until a configurable number of errors are hit.
| |
− | <source lang="cpp">
| |
− | #include "SamFile.h"
| |
− | #include "SamValidation.h"
| |
− | | |
− | int Validate(String inFile, int quitAfterErrorNum, int maxReportedErrors, SamFile::SortedType sortType)
| |
− | {
| |
− | // Check to see if the in file was specified. If it wasn't this is an error.
| |
− | if(inFile == "")
| |
− | {
| |
− | // In file was not specified but it is mandatory.
| |
− | std::cerr << "--in is a mandatory argument for validate, "
| |
− | << "but was not specified" << std::endl;
| |
− | return(-1);
| |
− | }
| |
− | | |
− | SamFile samIn;
| |
− | // Open the file for reading.
| |
− | if(!samIn.OpenForRead(inFile))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Set the sorting validation type.
| |
− | samIn.setSortedValidation(sortType);
| |
− | | |
− | // Read the sam header.
| |
− | SamFileHeader samHeader;
| |
− | if(!samIn.ReadHeader(samHeader))
| |
− | {
| |
− | fprintf(stderr, "%s\n", samIn.GetStatusMessage());
| |
− | return(samIn.GetStatus());
| |
− | }
| |
− | | |
− | // Read the sam records.
| |
− | SamRecord samRecord;
| |
− | | |
− | // Track the status.
| |
− | SamStatus::Status status = SamStatus::SUCCESS;
| |
− | | |
− | // Keep reading records until the end of the file is reached.
| |
− | int numValidRecords = 0;
| |
− | int numInvalidRecords = 0;
| |
− | int numErrorRecords = 0;
| |
− | int numRecords = 0;
| |
− | int numReportedErrors = 0;
| |
− | int totalErrorRecords = 0;
| |
− | | |
− | SamValidationErrors invalidSamErrors;
| |
− | | |
− | // Keep reading records from the file until SamFile::ReadRecord
| |
− | // indicates to stop (returns false).
| |
− | while( (samIn.ReadRecord(samHeader, samRecord))||
| |
− | (SamStatus::isContinuableStatus(samIn.GetStatus()) &&
| |
− | ( (quitAfterErrorNum < 0) ||
| |
− | (numErrorRecords < quitAfterErrorNum) )) )
| |
− | {
| |
− | ++numRecords;
| |
− | if(samIn.GetStatus() == SamStatus::SUCCESS)
| |
− | {
| |
− | // Successfully set the record, so check to see if it is valid.
| |
− | // Clear any errors in the list.
| |
− | invalidSamErrors.clear();
| |
− | if(!SamValidator::isValid(samHeader, samRecord, invalidSamErrors))
| |
− | {
| |
− | // The record is not valid.
| |
− | ++numInvalidRecords;
| |
− | ++totalErrorRecords;
| |
− | if(numReportedErrors < maxReportedErrors)
| |
− | {
| |
− | std::cout << "Record " << numRecords << std::endl
| |
− | << invalidSamErrors << std::endl;
| |
− | numReportedErrors += invalidSamErrors.numErrors();
| |
− | status = SamStatus::INVALID;
| |
− | }
| |
− | }
| |
− | else
| |
− | {
| |
− | // Valid record, so increment the counter.
| |
− | ++numValidRecords;
| |
− | }
| |
− | }
| |
− | else
| |
− | {
| |
− | // Error reading the record.
| |
− | ++numErrorRecords;
| |
− | if(numReportedErrors < maxReportedErrors)
| |
− | {
| |
− | // report error.
| |
− | std::cout << "Record " << numRecords << std::endl
| |
− | << samIn.GetStatusMessage() << std::endl
| |
− | << std::endl;
| |
− | ++numReportedErrors;
| |
− | ++totalErrorRecords;
| |
− | status = samIn.GetStatus();
| |
− | }
| |
− | }
| |
− | }
| |
− | | |
− | if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
| |
− | {
| |
− | ++numErrorRecords;
| |
− | // The last read call had a failure, so report it.
| |
− | if(numReportedErrors < maxReportedErrors)
| |
− | {
| |
− | std::cout << "Record " << numRecords << ": ";
| |
− | std::cout << std::endl << samIn.GetStatusMessage() << std::endl;
| |
− | }
| |
− | status = samIn.GetStatus();
| |
− | }
| |
− | | |
− | printf("\nNumber of records read = %d\n", numRecords);
| |
− | printf("Number of valid records = %d\n", numValidRecords);
| |
− | printf("Returning: %d (%s)\n", status, SamStatus::getStatusString(status));
| |
− | return(status);
| |
− | }
| |
− | </source>
| |