Line 1: |
Line 1: |
| = Introduction = | | = Introduction = |
| | | |
− | GNU Makefile is a widely used tool for managing the complicated process of compiling a C program. | + | GNU Make is often thought of as a tool for managing the compilation of large C programs. This is true, but it's potential is not limited to this! |
− | But this is not the only use for this very powerful tool.
| |
| | | |
− | A statistical analysis usually involves multiple data preparation steps just to mould the input into
| + | At its core, It is a generic pipelining framework that is aware of dependencies and can run steps in parallel. |
− | a form that is acceptable by the analysis tool. Analysis steps involving large data sets requires | |
− | parallelization and this means partitioning the data into subsets that may be run independently on the
| |
− | cluster. Upon completion of the analyses, the partial outputs have to be merged into a file again before
| |
− | plots are made to summarize the results. This is further compounded when one is interested in the effects
| |
− | of multiple parameter settings in an analysis.
| |
| | | |
− | All these often result in hundreds of separate commands invoked and storing all these commands in
| + | Statistical genetics analyses often requires multitudinous steps to prepare the data, run computationally expensive analyses and the collating the data. |
− | a text file is probably not the efficient way. | + | |
| + | Make can potentially save you lots of time and hair pulling especially when your supervisor asks for ALL the analyses again but this time only with rare variants. |
| + | |
| + | = Example = |
| + | |
| + | This example does the following: |
| + | |
| + | #generate 100 log files with a number written to it |
| + | #concatenate the 100 log files into one file |
| + | #delete the 100 log files |
| + | |
| + | The example files may be found in /net/fantasia/home/atks/makefile_tutorial |
| + | |
| + | #generate make file using perl script |
| + | ./generate_simple_stuff |
| + | |
| + | #run make file sequentially |
| + | make -f simple_stuff.mk |
| + | |
| + | #run make file in parallel to at most 100 jobs |
| + | make -f simple_stuff.mk -j 100 |
| + | |
| + | #clear files from run |
| + | make -f simple_stuff.mk -j 100 |
| + | |
| + | = Script = |
| + | |
| + | <source lang=perl> |
| + | #!/usr/bin/perl -w |
| + | |
| + | use warnings; |
| + | use strict; |
| + | use POSIX; |
| + | use Getopt::Long; |
| + | use File::Path; |
| + | use File::Basename; |
| + | use Pod::Usage; |
| + | |
| + | =head1 NAME |
| + | |
| + | generate_simple_stuff_makefile |
| + | |
| + | =head1 SYNOPSIS |
| + | |
| + | generate_simple_stuff_makefile [options] |
| + | |
| + | -o output directory : location of all output files |
| + | -m output make file |
| + | |
| + | example: ./generate_simple_stuff_makefile.pl |
| + | |
| + | =head1 DESCRIPTION |
| + | |
| + | =cut |
| + | |
| + | #option variables |
| + | my $help; |
| + | my $verbose; |
| + | my $debug; |
| + | my $outputDir = getcwd(); |
| + | my $makeFile = "simple_stuff.mk"; |
| + | |
| + | #initialize options |
| + | Getopt::Long::Configure ('bundling'); |
| + | |
| + | if(!GetOptions ('h'=>\$help, 'v'=>\$verbose, 'd'=>\$debug, |
| + | 'o:s'=>\$outputDir, |
| + | 'm:s'=>\$makeFile) |
| + | || !defined($outputDir) |
| + | || scalar(@ARGV)!=0) |
| + | { |
| + | if ($help) |
| + | { |
| + | pod2usage(-verbose => 2); |
| + | } |
| + | else |
| + | { |
| + | pod2usage(1); |
| + | } |
| + | } |
| + | |
| + | ############## |
| + | #print options |
| + | ############## |
| + | printf("Options\n"); |
| + | printf("\n"); |
| + | printf("output directory : %s\n", $outputDir); |
| + | printf("\n"); |
| + | |
| + | my @nodes = (); |
| + | for my $i (140..171) |
| + | { |
| + | push(@nodes, "$i"); |
| + | } |
| + | my $nodes = join(",", @nodes); |
| + | |
| + | #arrays for storing targets, dependencies and commands |
| + | my @tgts = (); |
| + | my @deps = (); |
| + | my @cmds = (); |
| + | |
| + | #temporary variables |
| + | my $tgt; |
| + | my $dep; |
| + | my @cmd; |
| + | |
| + | mkpath($outputDir); |
| + | |
| + | my $inputFiles = ""; |
| + | my $inputFilesOK = ""; |
| + | my $inputFile = ""; |
| + | my $outputFile = ""; |
| + | |
| + | ###################### |
| + | #1. Generate 100 files |
| + | ###################### |
| + | for my $i (1..100) |
| + | { |
| + | $inputFiles .= " $outputDir/$i.log"; |
| + | $inputFilesOK .= " $outputDir/$i.OK"; |
| + | $tgt = "$outputDir/$i.OK"; |
| + | $dep = ""; |
| + | @cmd = ("echo $i > $outputDir/$i.log"); |
| + | #makeLocalStep($tgt, $dep, @cmd); |
| + | makeSlurm($tgt, $dep, @cmd); |
| + | } |
| + | |
| + | ######################### |
| + | #2. Concatenate 100 files |
| + | ######################### |
| + | $outputFile = "$outputDir/all.log"; |
| + | $tgt = "$outputFile.OK"; |
| + | $dep = $inputFilesOK; |
| + | @cmd = ("cat $inputFiles > $outputFile"); |
| + | #makeLocalStep($tgt, $dep, @cmd); |
| + | makeSlurm($tgt, $dep, @cmd); |
| + | |
| + | ########################### |
| + | #3. Cleanup temporary files |
| + | ########################### |
| + | $tgt = "$outputDir/cleaned.OK"; |
| + | $dep = "$outputDir/all.log"; |
| + | @cmd = ("rm $inputFiles"); |
| + | #makeLocalStep($tgt, $dep, @cmd); |
| + | makeSlurm($tgt, $dep, @cmd); |
| + | |
| + | #******************* |
| + | #Write out make file |
| + | #******************* |
| + | open(MAK,">$makeFile") || die "Cannot open $makeFile\n"; |
| + | print MAK ".DELETE_ON_ERROR:\n\n"; |
| + | print MAK "all: @tgts\n\n"; |
| + | |
| + | #clean |
| + | push(@tgts, "clean"); |
| + | push(@deps, ""); |
| + | push(@cmds, "\t-rm -rf $outputDir/*.OK $outputDir/*.log"); |
| + | |
| + | for(my $i=0; $i < @tgts; ++$i) |
| + | { |
| + | print MAK "$tgts[$i]: $deps[$i]\n"; |
| + | print MAK "$cmds[$i]\n"; |
| + | } |
| + | close MAK; |
| + | |
| + | ########## |
| + | #functions |
| + | ########## |
| + | |
| + | #run slurm jobs |
| + | sub makeSlurm |
| + | { |
| + | my ($tgt, $dep, @cmd) = @_; |
| + | |
| + | push(@tgts, $tgt); |
| + | push(@deps, $dep); |
| + | my $cmd = ""; |
| + | for my $c (@cmd) |
| + | { |
| + | $cmd .= "\tsrun " . $c . "\n"; |
| + | } |
| + | $cmd .= "\ttouch $tgt\n"; |
| + | push(@cmds, $cmd); |
| + | } |
| + | |
| + | #run a local job |
| + | sub makeLocalStep |
| + | { |
| + | my ($tgt, $dep, @cmd) = @_; |
| + | |
| + | push(@tgts, $tgt); |
| + | push(@deps, $dep); |
| + | my $cmd = ""; |
| + | for my $c (@cmd) |
| + | { |
| + | $cmd .= "\t" . $c . "\n"; |
| + | } |
| + | $cmd .= "\ttouch $tgt\n"; |
| + | push(@cmds, $cmd); |
| + | } |
| + | </source> |
| | | |
| = Solution = | | = Solution = |