Difference between revisions of "Sam Library Usage Examples"
(Remove unused code, add reference to BamUtil) |
|||
(One intermediate revision by the same user not shown) | |||
Line 3: | Line 3: | ||
= Usage Examples = | = Usage Examples = | ||
− | This page contains examples on how to use the [[C++ Library: | + | This page contains examples on how to use the [[C++ Library: libStatGen|SAM/BAM Library]]. |
+ | |||
+ | See [[BamUtil]] for more examples of how it uses libStatGen to perform many operations. | ||
== Simple Read/Write SAM/BAM files == | == Simple Read/Write SAM/BAM files == | ||
Line 37: | Line 39: | ||
SamRecord samRecord; | SamRecord samRecord; | ||
− | |||
// Set returnStatus to success. It will be changed | // Set returnStatus to success. It will be changed | ||
Line 91: | Line 92: | ||
SamRecord samRecord; | SamRecord samRecord; | ||
− | |||
// Loop through each Reference. | // Loop through each Reference. |
Latest revision as of 09:07, 7 September 2011
Usage Examples
This page contains examples on how to use the SAM/BAM Library.
See BamUtil for more examples of how it uses libStatGen to perform many operations.
Simple Read/Write SAM/BAM files
The following example reads in a sam/bam file and writes it out as a sam/bam file. The file format of the input sam/bam is determined by the SamFile class based on reading the type from the file. The file format of the output sam/bam file is determined by the SamFile class based on the extension of the output file. A ".bam" extension indicates a BAM file. All other extensions indicate SAM files.
#include "SamFile.h"
#include "SamValidation.h"
int main(int argc, char ** argv)
{
// Check for the appropriate number of arguments.
if(argc != 3)
{
printf("./bam <inputFile> <outputFile.sam/bam>\n");
exit(-1);
}
// Open the input file for reading.
SamFile samIn;
samIn.OpenForRead(argv[1]);
// Open the output file for writing.
SamFile samOut;
samOut.OpenForWrite(argv[2]);
// Read the sam header.
SamFileHeader samHeader;
samIn.ReadHeader(samHeader);
// Write the sam header.
samOut.WriteHeader(samHeader);
SamRecord samRecord;
// Set returnStatus to success. It will be changed
// to the failure reason if any of the writes fail.
SamStatus::Status returnStatus = SamStatus::SUCCESS;
// Keep reading records until ReadRecord returns false.
while(samIn.ReadRecord(samHeader, samRecord))
{
// Successfully read a record from the file, so write it.
samOut.WriteRecord(samHeader, samRecord);
}
std::cout << std::endl << "Number of records read = " <<
samIn.GetCurrentRecordCount() << std::endl;
std::cout << "Number of records written = " <<
samOut.GetCurrentRecordCount() << std::endl;
// Return success since a failure would have thrown
// an exception.
return(returnStatus);
}
Read Only One Reference/Chromosome from Sorted/Indexed BAM Files
This example reads in the inputFilename bam file and writes it back out section by section to the specified outputFilename, starting with section -1. It also prints a count of the number of records in each section.
#include "SamFile.h"
#include "SamValidation.h"
int ReadIndexedBam(const char* inputFilename,
const char* outputFilename,
const char* indexFilename)
{
// Open the input file for reading.
SamFile samIn;
samIn.OpenForRead(inputFilename);
// Open the bam index file for reading.
samIn.ReadBamIndex(indexFilename);
// Open the output file for writing.
SamFile samOut;
samOut.OpenForWrite(outputFilename);
// Read the sam header.
SamFileHeader samHeader;
samIn.ReadHeader(samHeader);
// Write the sam header.
samOut.WriteHeader(samHeader);
SamRecord samRecord;
// Loop through each Reference.
for(int i = -1; i < 23; i++)
{
int numSectionRecords = 0;
samIn.SetReadSection(i);
// Keep reading records until they aren't more.
while(samIn.ReadRecord(samHeader, samRecord))
{
numSectionRecords++;
// Successfully read a record from the file, so write it.
samOut.WriteRecord(samHeader, samRecord);
}
std::cout << "Reference ID " << i << " has " << numSectionRecords
<< " records" << std::endl;
}
std::cout << "Number of records = " << samIn.GetCurrentRecordCount() << std::endl;
return(0);
}
Read a Specified Region from a Sorted & Indexed BAM File
This example is found in pipeline/bam/WriteRegion.cpp and is part of the bam executable.
#include "SamFile.h"
#include "Parameters.h"
#include "BgzfFileType.h"
int writeRegion(int argc, char **argv)
{
// Extract command line arguments.
static const int UNSPECIFIED_INT = -1;
String inFile = "";
String outFile = "";
String indexFile = "";
String refName = "";
int refID = UNSPECIFIED_INT;
int start = UNSPECIFIED_INT;
int end = UNSPECIFIED_INT;
bool noeof = false;
ParameterList inputParameters;
BEGIN_LONG_PARAMETERS(longParameterList)
LONG_STRINGPARAMETER("in", &inFile)
LONG_STRINGPARAMETER("out", &outFile)
LONG_STRINGPARAMETER("bamIndex", &indexFile)
LONG_STRINGPARAMETER("refName", &refName)
LONG_INTPARAMETER("refID", &refID)
LONG_INTPARAMETER("start", &start)
LONG_INTPARAMETER("end", &end)
LONG_PARAMETER("noeof", &noeof)
END_LONG_PARAMETERS();
inputParameters.Add(new LongParameters ("Input Parameters",
longParameterList));
inputParameters.Read(argc-1, &(argv[1]));
inputParameters.Status();
// If no eof block is required for a bgzf file, set the bgzf file type to
// not look for it.
if(noeof)
{
// Set that the eof block is not required.
BgzfFileType::setRequireEofBlock(false);
}
// Check to see if the in file was specified, if not, report an error.
if(inFile == "")
{
writeRegionUsage();
// mandatory argument was not specified.
std::cerr << "Missing mandatory argument: --in" << std::endl;
return(-1);
}
if(outFile == "")
{
writeRegionUsage();
// mandatory argument was not specified.
std::cerr << "Missing mandatory argument: --out" << std::endl;
return(-1);
}
if(indexFile == "")
{
// In file was not specified, so set it to the in file
// + ".bai"
indexFile = inFile + ".bai";
}
if(refID != -1 && refName.Length() != 0)
{
std::cerr << "Can't specify both refID and refName" << std::endl;
return(-1);
}
SamFile samIn;
// Open the file for reading.
samIn.OpenForRead(inFile);
// If refName is set, use that.
if(refName.Length() != 0)
{
// Use Reference Name.
samIn.SetReadSection(refName.c_str(), start, end);
}
else
{
// Use Reference ID
samIn.SetReadSection(refID, start, end);
}
// Open the output file for writing.
SamFile samOut;
samOut.OpenForWrite(outFile);
// Open the bam index file for reading.
samIn.ReadBamIndex(indexFile);
// Read & write the sam header.
SamFileHeader samHeader;
samIn.ReadHeader(samHeader);
samOut.WriteHeader(samHeader);
// Read the sam records.
SamRecord samRecord;
// Track the status.
int numSectionRecords = 0;
// Set returnStatus to success.
SamStatus::Status returnStatus = SamStatus::SUCCESS;
// Keep reading records until they aren't anymore.
while(samIn.ReadRecord(samHeader, samRecord))
{
// Successfully read a record from the file, so write it.
samOut.WriteRecord(samHeader, samRecord);
++numSectionRecords;
}
std::cout << "Wrote " << outFile << " with " << numSectionRecords << " records.\n";
return(returnStatus);
}
Validate SAM/BAM Files Continuing Processing After Allowed Errors
This example demonstrates calling validation on a SAM/BAM file. It also demonstrates a more complicated read loop that continues until a configurable number of errors are hit.
#include "SamFile.h"
#include "SamValidation.h"
int Validate(String inFile, int quitAfterErrorNum, int maxReportedErrors, SamFile::SortedType sortType)
{
// Check to see if the in file was specified. If it wasn't this is an error.
if(inFile == "")
{
// In file was not specified but it is mandatory.
std::cerr << "--in is a mandatory argument for validate, "
<< "but was not specified" << std::endl;
return(-1);
}
SamFile samIn(ErrorHandler::RETURN);
// Open the file for reading.
if(!samIn.OpenForRead(inFile))
{
fprintf(stderr, "%s\n", samIn.GetStatusMessage());
return(samIn.GetStatus());
}
// Set the sorting validation type.
samIn.setSortedValidation(sortType);
// Read the sam header.
SamFileHeader samHeader;
if(!samIn.ReadHeader(samHeader))
{
fprintf(stderr, "%s\n", samIn.GetStatusMessage());
return(samIn.GetStatus());
}
// Read the sam records.
SamRecord samRecord;
// Track the status.
SamStatus::Status status = SamStatus::SUCCESS;
// Keep reading records until the end of the file is reached.
int numValidRecords = 0;
int numInvalidRecords = 0;
int numErrorRecords = 0;
int numRecords = 0;
int numReportedErrors = 0;
int totalErrorRecords = 0;
SamValidationErrors invalidSamErrors;
// Keep reading records from the file until SamFile::ReadRecord
// indicates to stop (returns false).
while( (samIn.ReadRecord(samHeader, samRecord))||
(SamStatus::isContinuableStatus(samIn.GetStatus()) &&
( (quitAfterErrorNum < 0) ||
(numErrorRecords < quitAfterErrorNum) )) )
{
++numRecords;
if(samIn.GetStatus() == SamStatus::SUCCESS)
{
// Successfully set the record, so check to see if it is valid.
// Clear any errors in the list.
invalidSamErrors.clear();
if(!SamValidator::isValid(samHeader, samRecord, invalidSamErrors))
{
// The record is not valid.
++numInvalidRecords;
++totalErrorRecords;
if(numReportedErrors < maxReportedErrors)
{
std::cout << "Record " << numRecords << std::endl
<< invalidSamErrors << std::endl;
numReportedErrors += invalidSamErrors.numErrors();
status = SamStatus::INVALID;
}
}
else
{
// Valid record, so increment the counter.
++numValidRecords;
}
}
else
{
// Error reading the record.
++numErrorRecords;
if(numReportedErrors < maxReportedErrors)
{
// report error.
std::cout << "Record " << numRecords << std::endl
<< samIn.GetStatusMessage() << std::endl
<< std::endl;
++numReportedErrors;
++totalErrorRecords;
status = samIn.GetStatus();
}
}
}
if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
{
++numErrorRecords;
// The last read call had a failure, so report it.
if(numReportedErrors < maxReportedErrors)
{
std::cout << "Record " << numRecords << ": ";
std::cout << std::endl << samIn.GetStatusMessage() << std::endl;
}
status = samIn.GetStatus();
}
printf("\nNumber of records read = %d\n", numRecords);
printf("Number of valid records = %d\n", numValidRecords);
printf("Returning: %d (%s)\n", status, SamStatus::getStatusString(status));
return(status);
}