GreedyTupleAligner.h
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef _GREEDY_TUPLE_H
00019 #define _GREEDY_TUPLE_H
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <assert.h>
00025 #include <ctype.h>
00026 #include "Generic.h"
00027 #include "CigarRoller.h"
00028
00029
00030
00031
00032
00033
00034
00035
00036
00040 struct Weight
00041 {
00042 public:
00043 Weight()
00044 {
00045 gapOpen = gapExtend = -1;
00046 mismatch = -1;
00047 match= 2;
00048 };
00049 int gapOpen;
00050 int gapExtend;
00051 int mismatch;
00052 int match;
00053 };
00054
00055
00056
00057
00058
00059 template <typename QueryType, typename ReferenceType, typename ReferenceIndex>
00060 class GreedyTupleAligner
00061 {
00062 public:
00063 GreedyTupleAligner(Weight& wt): weight(wt)
00064 {}
00065
00079 int MatchTuple(
00080 const QueryType query,
00081 const int queryLength,
00082 const ReferenceType reference,
00083 const ReferenceIndex searchStartIndex,
00084 const int searchSize,
00085 int& matchedLength,
00086 int& mismatch)
00087 {
00088
00089
00090
00091 const int MAX_MISMATCH=2;
00092 int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;
00093
00094 #if defined(DEBUG_GREEDY_ALIGNER)
00095 cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
00096 #endif
00097
00098
00099 for (int i = 0; i <= searchSize - tupleSize; i++)
00100 {
00101 int j = 0;
00102 mismatch = 0;
00103 while (j < queryLength)
00104 {
00105 if (searchStartIndex + i + j >= reference.getNumberBases())
00106 break;
00107 if (query[j] != reference[searchStartIndex + i + j])
00108 {
00109 mismatch++;
00110 if (mismatch >= MAX_MISMATCH)
00111 break;
00112 }
00113 j++;
00114 }
00115
00116 if (j>0 && (j==queryLength)) j--;
00117
00118 while (searchStartIndex +i +j < reference.getNumberBases()
00119 && ((j+1) > mismatch)
00120 && mismatch>0
00121 && query[j] != reference[searchStartIndex + i+j])
00122 {
00123
00124
00125 j--;
00126 mismatch--;
00127 }
00128
00129 int score = j - mismatch;
00130
00131 if (score > bestScore)
00132 {
00133 bestPos = i;
00134 bestScore = score;
00135 bestMismatch = mismatch;
00136 bestMatchedLength = j+1;
00137 }
00138 }
00139
00140 if (bestScore > 0)
00141 {
00142 mismatch = bestMismatch;
00143 matchedLength = bestMatchedLength;
00144 return bestPos;
00145 }
00146 return -1;
00147 }
00148
00159 void Align(
00160 QueryType query,
00161 int queryLength,
00162 ReferenceType reference,
00163 ReferenceIndex searchStartIndex,
00164 int searchSize,
00165 CigarRoller& cigarRoller,
00166 ReferenceIndex& matchPosition)
00167 {
00168
00169
00170
00171
00172
00173
00174 int r1 = 0;
00175 int queryMatchCount = 0;
00176 int q1 = 0;
00177 int pos = -1;
00178 int lastR1 = -1;
00179
00180 cigarRoller.clear();
00181 matchPosition = -1;
00182
00183 while (queryMatchCount < queryLength)
00184 {
00185 if (r1 == searchSize - 1)
00186 {
00187 cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
00188 break;
00189 }
00190 if (queryLength - q1 < tupleSize)
00191 {
00192
00193
00194
00195 cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
00196 break;
00197 }
00198 int mismatch = 0;
00199 int matchedLen = 0;
00200 if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch))
00201
00202 >= 0)
00203 {
00204
00205
00206 if (lastR1<0)
00207 matchPosition = pos;
00208
00209
00210
00211
00212 if (lastR1>=0)
00213 {
00214 if (pos > 0)
00215 {
00216 cigarRoller.Add(CigarRoller::del, pos);
00217 }
00218 }
00219 else
00220 {
00221 lastR1 = pos;
00222 }
00223
00224 r1 += pos;
00225 r1 += matchedLen;
00226 q1 += matchedLen;
00227
00228
00229
00230
00231 cigarRoller.Add(CigarRoller::match, matchedLen);
00232 queryMatchCount = q1;
00233 lastR1 = r1;
00234
00235 continue;
00236 }
00237
00238
00239
00240
00241
00242 for (int i = 1; i < queryLength - q1 - tupleSize; i++)
00243 {
00244 int mismatch = 0;
00245 int matchedLen = 0;
00246
00247 if (searchStartIndex + r1 >= reference.getNumberBases())
00248 return;
00249 if ((pos = MatchTuple(query+q1 + i ,
00250 queryLength - q1 -i ,
00251 reference,
00252 searchStartIndex + r1,
00253 searchSize - r1,
00254 matchedLen,
00255 mismatch))
00256 >= 0)
00257 {
00258 if (matchPosition < 0)
00259 matchPosition = pos + q1 + i ;
00260 }
00261 queryMatchCount += i;
00262 q1 += i;
00263 cigarRoller.Add(CigarRoller::insert, i);
00264
00265 lastR1 = r1 + pos;
00266 r1 += pos + tupleSize;
00267 q1 += tupleSize;
00268
00269
00270 while (searchStartIndex + r1 < reference.getNumberBases()
00271 && query[q1]==reference[searchStartIndex + r1]
00272 && q1 < queryLength)
00273 {
00274 r1++;
00275 q1++;
00276 }
00277 if (q1 < queryLength)
00278 {
00279 cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
00280 queryMatchCount = q1;
00281 }
00282 else
00283 {
00284 cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
00285 queryMatchCount = queryLength ;
00286 break ;
00287 }
00288 }
00289
00290 r1++;
00291 q1++;
00292
00293
00294 }
00295 }
00296 private:
00297 static const int tupleSize = 3;
00298 Weight weight;
00299 };
00300
00301
00302 #endif