Difference between revisions of "Sam Library Usage Examples"

From Genome Analysis Wiki
Jump to navigationJump to search
Line 1: Line 1:
 +
= Usage Examples =
 
This page contains examples on how to use the [[C++ Library: libbam|SAM/BAM Library]].
 
This page contains examples on how to use the [[C++ Library: libbam|SAM/BAM Library]].
  

Revision as of 16:47, 10 June 2010

Usage Examples

This page contains examples on how to use the SAM/BAM Library.

Simple Read/Write SAM/BAM files

The following example reads in a sam/bam file and writes it out as a sam/bam file. The file format of the input sam/bam is determined by the SamFile class based on reading the type from the file. The file format of the output sam/bam file is determined by the SamFile class based on the extension of the output file. A ".bam" extension indicates a BAM file. All other extensions indicate SAM files.

#include "SamFile.h"
#include "SamValidation.h"

int main(int argc, char ** argv)
{
   // Check for the appropriate number of arguments.
   if(argc != 3)
   {
      printf("./bam <inputFile> <outputFile.sam/bam>\n");
      exit(-1);
   }

   // Open the input file for reading.
   SamFile samIn;
   if(!samIn.OpenForRead(argv[1]))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Open the output file for writing.
   SamFile samOut;
   if(!samOut.OpenForWrite(argv[2]))
   {
      fprintf(stderr, "%s\n", samOut.GetStatusMessage());
      return(samOut.GetStatus());
   }

   // Read the sam header.
   SamFileHeader samHeader;
   if(!samIn.ReadHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Write the sam header.
   if(!samOut.WriteHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samOut.GetStatusMessage());
      return(samOut.GetStatus());     
   }

   SamRecord samRecord;
   SamValidationErrors samInvalidErrors;

    // Set returnStatus to success.  It will be changed
    // to the failure reason if any of the writes fail.
    SamStatus::Status returnStatus = SamStatus::SUCCESS;

    // Keep reading records until ReadRecord returns false.
    while(samIn.ReadRecord(samHeader, samRecord))
    {
        // Successfully read a record from the file, so write it.
        if(!samOut.WriteRecord(samHeader, samRecord))
        {
            // Failed to write a record.
            fprintf(stderr, "%s\n", samOut.GetStatusMessage());
            returnStatus = samOut.GetStatus();
        }
    }

    if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
    {
        // Failed to read a record.
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        // Set the return status to the reason why
        // the read failed.
        returnStatus = samIn.GetStatus();
    }

    std::cout << std::endl << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;
    std::cout << "Number of records written = " << 
        samOut.GetCurrentRecordCount() << std::endl;

    // Since the reads were successful, return the status based
    // on the status of the writes.  If any failed, return
    // their failure status.
    return(returnStatus);
 }


Simple Read/Write Indexed BAM Files

This example reads in the inputFilename bam file and writes it back out section by section to the specified outputFilename, starting with section -1. It also prints a count of the number of records in each section.

#include "SamFile.h"
#include "SamValidation.h"

int ReadIndexedBam(const char* inputFilename,
                   const char* outputFilename,
                   const char* indexFilename)
{
   // Open the input file for reading.
   SamFile samIn;
   if(!samIn.OpenForRead(inputFilename))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Open the bam index file for reading.
   if(!samIn.ReadBamIndex(indexFilename))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Open the output file for writing.
   SamFile samOut;
   if(!samOut.OpenForWrite(outputFilename))
   {
      fprintf(stderr, "%s\n", samOut.GetStatusMessage());
      return(samOut.GetStatus());
   }

   // Read the sam header.
   SamFileHeader samHeader;
   if(!samIn.ReadHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Write the sam header.
   if(!samOut.WriteHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samOut.GetStatusMessage());
      return(samOut.GetStatus());     
   }

   SamRecord samRecord;
   SamValidationErrors samInvalidErrors;

   // Loop through each Reference.
   for(int i = -1; i < 23; i++)
   {
      int numSectionRecords = 0;
      samIn.SetReadSection(i);
      // Keep reading records until they aren't more.
      while(samIn.ReadRecord(samHeader, samRecord))
      {
         numSectionRecords++;
         // Successfully read a record from the file, so write it.
         if(!samOut.WriteRecord(samHeader, samRecord))
         {
            // Failed to write a record.
            fprintf(stderr, "%s\n", samOut.GetStatusMessage());
         }
      }

      if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
      {
         // Failed to read a record.
         fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      }

      std::cout << "Reference ID " << i << " has " << numSectionRecords 
                << " records" << std::endl;
   }
   
   std::cout << "Number of records = " << samIn.GetCurrentRecordCount() << std::endl;
   
   return(0);
}

Validate SAM/BAM Files Continuing Processing After Allowed Errors

This example demonstrates calling validation on a SAM/BAM file. It also demonstrates a more complicated read loop that continues until a configurable number of errors are hit.

#include "SamFile.h"
#include "SamValidation.h"

int Validate(String inFile, int quitAfterErrorNum, int maxReportedErrors, SamFile::SortedType sortType)
{
   // Check to see if the in file was specified.  If it wasn't this is an error.
   if(inFile == "")
   {
      // In file was not specified but it is mandatory.
      std::cerr << "--in is a mandatory argument for validate, "
                << "but was not specified" << std::endl;
      return(-1);
   }

   SamFile samIn;
   // Open the file for reading.   
   if(!samIn.OpenForRead(inFile))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Set the sorting validation type.
   samIn.setSortedValidation(sortType);

   // Read the sam header.
   SamFileHeader samHeader;
   if(!samIn.ReadHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Read the sam records.
   SamRecord samRecord;

   // Track the status.
   SamStatus::Status status = SamStatus::SUCCESS;

   // Keep reading records until the end of the file is reached.
   int numValidRecords = 0;
   int numInvalidRecords = 0;
   int numErrorRecords = 0;
   int numRecords = 0;
   int numReportedErrors = 0;
   int totalErrorRecords = 0;

   SamValidationErrors invalidSamErrors;

   // Keep reading records from the file until SamFile::ReadRecord
   // indicates to stop (returns false).
   while( (samIn.ReadRecord(samHeader, samRecord))||
          (SamStatus::isContinuableStatus(samIn.GetStatus()) &&
           ( (quitAfterErrorNum < 0) ||
             (numErrorRecords < quitAfterErrorNum) )) )
   {
      ++numRecords;
      if(samIn.GetStatus() == SamStatus::SUCCESS)
      {
         // Successfully set the record, so check to see if it is valid.
         // Clear any errors in the list.
         invalidSamErrors.clear();
         if(!SamValidator::isValid(samHeader, samRecord, invalidSamErrors))
         {
            // The record is not valid.
            ++numInvalidRecords;
            ++totalErrorRecords;
            if(numReportedErrors < maxReportedErrors)
            {
               std::cout << "Record " << numRecords << std::endl
                         << invalidSamErrors << std::endl;
               numReportedErrors += invalidSamErrors.numErrors();
               status = SamStatus::INVALID;
            }
         }
         else
         {
            // Valid record, so increment the counter.
            ++numValidRecords;
         }
      }
      else
      {
         // Error reading the record.
         ++numErrorRecords;
         if(numReportedErrors < maxReportedErrors)
         {
            // report error.
            std::cout << "Record " << numRecords << std::endl
                      << samIn.GetStatusMessage() << std::endl
                      << std::endl;
            ++numReportedErrors;
            ++totalErrorRecords;
            status = samIn.GetStatus();
         }
      }
   }

   if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
   {
      ++numErrorRecords;
      // The last read call had a failure, so report it.
      if(numReportedErrors < maxReportedErrors)
      {
         std::cout << "Record " << numRecords << ": ";
         std::cout << std::endl << samIn.GetStatusMessage() << std::endl;
      }
      status = samIn.GetStatus();
   }

   printf("\nNumber of records read = %d\n", numRecords);
   printf("Number of valid records = %d\n", numValidRecords);
   printf("Returning: %d (%s)\n", status, SamStatus::getStatusString(status));
   return(status);
}