1 /** 2 Some additional string functions. 3 4 Copyright: © 2019 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dalicious..string; 10 11 import std.algorithm : 12 countUntil, 13 joiner, 14 min, 15 map; 16 import std.array : 17 appender, 18 array, 19 minimallyInitializedArray; 20 import std.conv : to; 21 import std.exception : basicExceptionCtors, enforce; 22 import std.functional : binaryFun; 23 import std.math : 24 ceil, 25 floor, 26 isInfinity, 27 isNaN, 28 round, 29 sgn, 30 sqrt; 31 import std.range : 32 chain, 33 chunks, 34 cycle, 35 only, 36 take, 37 zip; 38 import std.range.primitives : hasSlicing; 39 import std..string : lineSplitter, tr; 40 import std.traits : 41 isFloatingPoint, 42 isSomeString; 43 import std.typecons : 44 Flag, 45 No, 46 tuple, 47 Yes; 48 import transforms : snakeCaseCT; 49 50 /// Convert a string to `dash-case` at compile time. 51 enum dashCaseCT(string camelCase) = camelCase.snakeCaseCT.tr("_", "-"); 52 53 /** 54 Adds one level of indentation for a multi-line string. Adds indentSize 55 spaces to each non-empty line. 56 57 Returns: indented string 58 */ 59 S indent(S)(S str, in size_t indentSize = 4) if (isSomeString!S) 60 { 61 enum lineSep = "\n"; 62 alias indentLine = (line) => chain(" ".cycle.take(line.length == 0 ? 0 : indentSize), line); 63 64 return str[] 65 .lineSplitter 66 .map!indentLine 67 .joiner(lineSep) 68 .chain(str[$ - lineSep.length .. $] == lineSep ? lineSep : "") 69 .array 70 .to!S; 71 } 72 73 /// 74 unittest 75 { 76 assert("a\nb".indent == " a\n b"); 77 assert("a\nb\n\n".indent == " a\n b\n\n"); 78 assert("a\nb\n".indent(2) == " a\n b\n"); 79 } 80 81 class AlignmentException : Exception 82 { 83 /// 84 mixin basicExceptionCtors; 85 } 86 87 /// One edit operation of the Needleman-Wunsch algorithm. 88 enum EditOp: byte 89 { 90 substitution, 91 deletetion, 92 insertion, 93 } 94 95 alias score_t = uint; 96 97 static enum Strip : byte 98 { 99 none = 0b00, 100 back = 0b01, 101 front = 0b10, 102 both = 0b11, 103 } 104 105 /// Represents an alignment of two sequences. 106 struct SequenceAlignment(S, alias scoreFun = "a == b ? 0 : 1") 107 { 108 alias getScore = binaryFun!scoreFun; 109 110 score_t score; 111 EditOp[] editPath; 112 S reference; 113 S query; 114 score_t indelPenalty; 115 Flag!"freeShift" freeShift; 116 117 /// Compute alignment score. 118 score_t computeScore() const pure 119 { 120 auto walkResult = walkEditOps(); 121 enforce!AlignmentException( 122 walkResult.i == reference.length && walkResult.j == query.length, 123 "invalid alignment", 124 ); 125 126 return walkResult.computedScore; 127 } 128 129 bool isValid() const pure nothrow 130 { 131 auto walkResult = walkEditOps(); 132 133 return isValid(walkResult); 134 } 135 136 private bool isValid(in WalkResult walkResult) const pure nothrow 137 { 138 return walkResult.i == reference.length && 139 walkResult.j == query.length && 140 walkResult.computedScore == score; 141 } 142 143 private static struct WalkResult 144 { 145 score_t computedScore; 146 size_t i, j; 147 } 148 149 private auto walkEditOps() const pure nothrow 150 { 151 WalkResult result; 152 foreach (k, editOp; editPath) 153 { 154 if (result.i > reference.length || result.j > query.length) 155 return result; 156 157 final switch (editOp) 158 { 159 case EditOp.substitution: 160 if (result.i == reference.length || result.j == query.length) 161 return result; 162 result.computedScore += getScore(reference[result.i], query[result.j]); 163 ++result.i; 164 ++result.j; 165 break; 166 case EditOp.deletetion: 167 if (result.i == reference.length) 168 return result; 169 result.computedScore += indelPenalty; 170 ++result.i; 171 break; 172 case EditOp.insertion: 173 if (result.j == query.length) 174 return result; 175 result.computedScore += indelPenalty; 176 ++result.j; 177 break; 178 } 179 } 180 if (freeShift) 181 { 182 auto firstSubstitution = editPath.countUntil(EditOp.substitution); 183 result.computedScore -= firstSubstitution >= 0 184 ? firstSubstitution 185 : editPath.length; 186 } 187 188 return result; 189 } 190 191 /// Strip leading/trailing insertions. 192 auto stripInsertions(Strip strip) inout pure nothrow 193 { 194 return partial(0, reference.length, strip); 195 } 196 197 /** 198 Get a partial alignment with respect to `reference`. 199 */ 200 auto partial(in size_t begin, in size_t end, Strip stripInsertions = Strip.none) inout pure nothrow 201 in 202 { 203 assert(this.isValid(), "Attempting to get a partial alignment of an invalid alignment"); 204 } 205 out (partialAlignment) 206 { 207 assert(partialAlignment.isValid(), "Partial alignment is invalid"); 208 } 209 do 210 { 211 assert(0 <= begin && begin <= end && end <= reference.length, "index out of bounds"); 212 213 if (end == begin) 214 return typeof(this)(0, [], reference[0 .. 0], query[0 .. 0], indelPenalty); 215 216 bool hasStarted; 217 score_t newScore; 218 size_t editBegin, editEnd; 219 size_t queryBegin, queryEnd; 220 size_t i, j; 221 foreach (k, editOp; editPath) 222 { 223 if (!hasStarted && i == begin) 224 { 225 hasStarted = !(stripInsertions & Strip.front); 226 newScore = 0; 227 editBegin = k; 228 queryBegin = j; 229 } 230 231 if (freeShift && i == begin) 232 newScore = 0; 233 234 if (i == end && ((stripInsertions & Strip.back) || editOp != EditOp.insertion)) 235 break; 236 237 final switch (editOp) 238 { 239 case EditOp.substitution: 240 newScore += getScore(reference[i], query[j]); 241 ++i; 242 ++j; 243 break; 244 case EditOp.deletetion: 245 newScore += indelPenalty; 246 ++i; 247 break; 248 case EditOp.insertion: 249 newScore += indelPenalty; 250 ++j; 251 break; 252 } 253 254 if (i == end) 255 { 256 editEnd = k + 1; 257 queryEnd = j; 258 } 259 } 260 261 if ( 262 !(stripInsertions & Strip.back) && 263 editEnd < editPath.length && 264 editPath[editEnd] == EditOp.insertion 265 ) 266 { 267 ++editEnd; 268 queryEnd = j; 269 } 270 271 auto partialAlignment = typeof(this)( 272 newScore, 273 editPath[editBegin .. editEnd], 274 reference[begin .. end], 275 query[queryBegin .. queryEnd], 276 indelPenalty, 277 freeShift, 278 ); 279 280 return partialAlignment; 281 } 282 283 auto opDollar() const pure nothrow 284 { 285 return reference.length; 286 } 287 288 /// ditto 289 auto opIndex(in size_t[2] slice) inout pure nothrow 290 { 291 return partial(slice[0], slice[1]); 292 } 293 294 /// 295 unittest 296 { 297 enum indelPenalty = 1; 298 auto alignment = findAlignment("GCATGCT", "GATTACA", indelPenalty); 299 300 assert(alignment.score == 4); 301 assert(alignment.toString == 302 "GCAT-GCT\n" ~ 303 "| || *|*\n" ~ 304 "G-ATTACA"); 305 306 auto partialAlignment = alignment[1 .. 5]; 307 308 assert(partialAlignment.score == 3); 309 assert(partialAlignment.toString == 310 "CAT-G\n" ~ 311 " || *\n" ~ 312 "-ATTA"); 313 } 314 315 size_t[2] opSlice(size_t dim)(in size_t begin, in size_t end) const pure nothrow 316 { 317 return [begin, end]; 318 } 319 320 /// Get a string representation of this alignment. Visual alignment breaks 321 /// unless elements of the sequences convert to single chars via `to!string`. 322 string toString( 323 alias matchSymbol = '|', 324 alias substitutionSymbol = '*', 325 alias indelSymbol = ' ', 326 alias gapSymbol = '-', 327 )(in size_t width = 0) const pure 328 { 329 auto referenceLine = appender!string; 330 referenceLine.reserve(editPath.length); 331 auto compareLine = appender!string; 332 compareLine.reserve(editPath.length); 333 auto queryLine = appender!string; 334 queryLine.reserve(editPath.length); 335 336 size_t i, j; 337 foreach (editOp; editPath) 338 { 339 final switch (editOp) 340 { 341 case EditOp.substitution: 342 referenceLine ~= reference[i].to!string; 343 compareLine ~= reference[i] == query[j] 344 ? matchSymbol 345 : substitutionSymbol; 346 queryLine ~= query[j].to!string; 347 ++i; 348 ++j; 349 break; 350 case EditOp.deletetion: 351 referenceLine ~= reference[i].to!string; 352 compareLine ~= indelSymbol; 353 queryLine ~= gapSymbol; 354 ++i; 355 break; 356 case EditOp.insertion: 357 referenceLine ~= gapSymbol; 358 compareLine ~= indelSymbol; 359 queryLine ~= query[j].to!string; 360 ++j; 361 break; 362 } 363 } 364 365 if (width == 0) 366 return only( 367 referenceLine.data.to!string, 368 compareLine.data.to!string, 369 queryLine.data.to!string, 370 ) 371 .joiner("\n") 372 .to!string; 373 else 374 return zip( 375 referenceLine.data.to!string.chunks(width), 376 compareLine.data.to!string.chunks(width), 377 queryLine.data.to!string.chunks(width), 378 ) 379 .map!(lineTriple => only(lineTriple.expand) 380 .joiner("\n")) 381 .joiner("\n\n") 382 .to!string; 383 } 384 385 /// 386 unittest 387 { 388 auto alignment = findAlignment("ACGTC", "AGTC", 1); 389 assert(alignment.toString == 390 "ACGTC\n" ~ 391 "| |||\n" ~ 392 "A-GTC"); 393 } 394 395 /// 396 unittest 397 { 398 auto alignment = findAlignment("GCATGCT", "GATTACA", 1); 399 400 assert(alignment.toString == 401 "GCAT-GCT\n" ~ 402 "| || *|*\n" ~ 403 "G-ATTACA"); 404 } 405 } 406 407 /** 408 Compute an alignment of `query` against `reference` using the 409 Needleman-Wunsch algorithm with non-negative scores and constant 410 indel penalty. Optionally, the `freeShift` mode may be activated 411 as to allow large indels at the beginning and end of the alignment. 412 413 **Implementation Notes:** The current implementation needs 414 `O(reference.length * query.length)` in time and memory. As the 415 memory requirement easily exceeds available memory it can be 416 limited for now. This may change in future and an implementation 417 using `O(max(reference.length, query.length))` memory will be 418 silently selected for large inputs. 419 420 Params: 421 scoreFun = calculate score for a 'substitution' at `i, j` using 422 `scoreFun(reference[i], reference[j])` 423 reference = Sequence to compare `query` against 424 query = Sequence to compare against `reference` 425 indelPenalty = Penalize each indel with this value 426 freeShift = Allow indels at the beginning and end of the alignment 427 memoryLimit = throw an error if the calculation would require more 428 than `memoryLimit` bytes. 429 Throws: AlignmentException if the calculation would require more than 430 `memoryLimit` bytes. 431 432 See_Also: http://en.wikipedia.org/wiki/Needleman-Wunsch_algorithm 433 */ 434 SequenceAlignment!(const(S), scoreFun) findAlignment( 435 alias scoreFun = "a == b ? 0 : 1", 436 S, 437 )( 438 in S reference, 439 in S query, 440 in score_t indelPenalty, 441 Flag!"freeShift" freeShift = No.freeShift, 442 size_t memoryLimit = 2^^20, 443 ) 444 { 445 alias score = binaryFun!scoreFun; 446 enforce!AlignmentException( 447 memoryRequired(reference, query) <= memoryLimit, 448 "memory limit exceeded; use short reference and/or query", 449 ); 450 451 auto F = DPMatrix!score_t(reference.length + 1, query.length + 1); 452 453 // Initialize scoring matrix 454 foreach (i; 0 .. reference.length + 1) 455 F[i, 0] = freeShift ? 0 : cast(score_t) i * indelPenalty; 456 foreach (j; 0 .. query.length + 1) 457 F[0, j] = freeShift ? 0 : cast(score_t) j * indelPenalty; 458 459 // Compute scoring matrix by rows in a cache friendly manner 460 foreach (i; 1 .. reference.length + 1) 461 foreach (j; 1 .. query.length + 1) 462 F[i, j] = min( 463 F[i - 1, j - 1] + score(reference[i - 1], query[j - 1]), 464 F[i - 1, j ] + indelPenalty, 465 F[i , j - 1] + indelPenalty, 466 ); 467 468 return typeof(return)( 469 F[$ - 1, $ - 1], 470 tracebackScoringMatrix(F), 471 reference, 472 query, 473 indelPenalty, 474 freeShift, 475 ); 476 } 477 478 /// 479 unittest 480 { 481 auto alignment = findAlignment("ACGTC", "AGTC", 1); 482 483 // - A G T C 484 // - 0 1 2 3 4 485 // A 1 0 1 2 3 486 // C 2 1 1 2 2 487 // G 3 2 1 2 3 488 // T 4 3 2 1 2 489 // C 5 4 3 2 1 490 // 491 // ACGTC 492 // | ||| 493 // A-GTC 494 495 assert(alignment.score == 1); 496 assert(alignment.editPath == [ 497 EditOp.substitution, 498 EditOp.deletetion, 499 EditOp.substitution, 500 EditOp.substitution, 501 EditOp.substitution, 502 ]); 503 } 504 505 /// 506 unittest 507 { 508 auto alignment = findAlignment("GCATGCT", "GATTACA", 1); 509 510 // - G A T T A C A 511 // - 0 1 2 3 4 5 6 7 512 // G 1 0 1 2 3 4 5 6 513 // C 2 1 2 3 4 5 4 5 514 // A 3 2 1 2 3 4 5 4 515 // T 4 3 2 1 2 3 6 5 516 // G 5 4 3 2 3 4 5 6 517 // C 6 5 4 3 4 5 4 5 518 // T 7 6 5 4 3 6 5 6 519 // 520 // GCAT-GCT 521 // | || *|* 522 // G-ATTACA 523 assert(alignment.score == 4); 524 assert(alignment.editPath == [ 525 EditOp.substitution, 526 EditOp.deletetion, 527 EditOp.substitution, 528 EditOp.substitution, 529 EditOp.insertion, 530 EditOp.substitution, 531 EditOp.substitution, 532 EditOp.substitution, 533 ]); 534 } 535 536 /// 537 unittest 538 { 539 auto alignment = findAlignment!"a == b ? 0 : 1"( 540 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~ 541 "gttgcaggtttgatctccacctggtcggggcacatgca", 542 "tgaggacagaagggtcatggtttaattctggtcacaggcacattcctggg" ~ 543 "ttgtaggctcaattcccacccggtcggggccacatgca", 544 1, 545 ); 546 547 assert(alignment.toString(50) == 548 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg\n" ~ 549 "|||||||||||||||||| |||||||||||||||||||||||||||||||\n" ~ 550 "tgaggacagaagggtcat-ggtttaattctggtcacaggcacattcctgg\n" ~ 551 "\n" ~ 552 "gttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~ 553 "||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~ 554 "gttgtaggctcaat-tcccacccggtcggggccacatgca"); 555 } 556 557 /// 558 unittest 559 { 560 auto alignment = findAlignment( 561 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~ 562 "gttgcaggtttgatctccacctggtcggggcacatgca", 563 "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~ 564 "cattcctgggttgtaggctcaattcccacccggtcggggccacatgca", 565 1, 566 Yes.freeShift, 567 ); 568 569 assert(alignment.toString(50) == 570 "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~ 571 " |||||||||||||||||| |||||||||||||||||||||\n" ~ 572 "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~ 573 "\n" ~ 574 "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~ 575 "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~ 576 "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca"); 577 } 578 579 /// 580 unittest 581 { 582 auto alignment = findAlignment( 583 "tatcctcaggtgaggactaacaacaaatatatatatatttatatctaaca" ~ 584 "acatatgatttctaaaatttcaaaaatcttaaggctgaattaat", 585 "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~ 586 "caacatatgattctaaaatttcaaaatgcttaaaggtctga", 587 1, 588 Yes.freeShift, 589 ); 590 591 assert(alignment.toString(50) == 592 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 593 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 594 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 595 "\n" ~ 596 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~ 597 "||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 598 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------"); 599 } 600 601 unittest 602 { 603 enum reference = "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~ 604 "caacatatgattctaaaatttcaaaatgcttaaaggtctga"; 605 enum query = "atatcctcaggtgaggactaacaacaaatatatatatatttatatctaac" ~ 606 "aacatatgatttctaaaatttcaaaaatcttaaggctgaattaat"; 607 auto alignment = findAlignment( 608 reference, 609 query, 610 1 611 ); 612 613 assert(alignment.partial(0, reference.length, Strip.none).toString(50) == 614 "-tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatct\n" ~ 615 " ||||||||||||||| || ||||||||||||||||||| |*|| |||||\n" ~ 616 "atatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatct\n" ~ 617 "\n" ~ 618 "aacaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------\n" ~ 619 "|||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 620 "aacaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat"); 621 assert(alignment.partial(0, reference.length, Strip.front).toString(50) == 622 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 623 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 624 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 625 "\n" ~ 626 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------\n" ~ 627 "||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 628 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat"); 629 assert(alignment.partial(0, reference.length, Strip.back).toString(50) == 630 "-tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatct\n" ~ 631 " ||||||||||||||| || ||||||||||||||||||| |*|| |||||\n" ~ 632 "atatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatct\n" ~ 633 "\n" ~ 634 "aacaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga\n" ~ 635 "|||||||||||||| ||||||||||||||**||||| || ||||\n" ~ 636 "aacaacatatgatttctaaaatttcaaaaatcttaa-gg-ctga"); 637 assert(alignment.partial(0, reference.length, Strip.both).toString(50) == 638 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 639 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 640 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 641 "\n" ~ 642 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga\n" ~ 643 "||||||||||||| ||||||||||||||**||||| || ||||\n" ~ 644 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctga"); 645 } 646 647 unittest 648 { 649 enum reference = "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa"; 650 enum query = "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"; 651 auto alignment = findAlignment!"1 - (a == b || a == 'n' || b == 'n')"( 652 reference, 653 query, 654 1 655 ); 656 657 import std.stdio; 658 assert(alignment.partial(0, reference.length, Strip.front).toString(50) == 659 "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa\n" ~ 660 "**************************************************\n" ~ 661 "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"); 662 } 663 664 unittest 665 { 666 auto alignment = findAlignment( 667 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~ 668 "gttgcaggtttgatctccacctggtcggggcacatgcaggaggcaaccaa" ~ 669 "tcaatgtgtctctttctcagtgatgtttcttctctctgtctctctccccc" ~ 670 "ctctcttctactctctctgaaaaatagatggaaaaaatatcctcaggtga" ~ 671 "ggactaacaacaaatatatatatatttatatctaacaacatatgatttct" ~ 672 "aaaatttcaaaaatcttaaggctgaattaat", 673 "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~ 674 "cattcctgggttgtaggctcaattcccacccggtcggggccacatgcaga" ~ 675 "aggcaaccatcaatgtgtctctttcaagtgatgtttcttctctctgtcta" ~ 676 "ctccccctccctatctactctctgggaaacttatggaaaaaatatcctca" ~ 677 "ggtgaggcttaacaacaaatatatatatactgtaatatctaacaacatat" ~ 678 "gattctaaaatttcaaaatgcttaaaggtctga", 679 1, 680 Yes.freeShift, 681 ); 682 683 assert(alignment.toString(50) == 684 "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~ 685 " |||||||||||||||||| |||||||||||||||||||||\n" ~ 686 "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~ 687 "\n" ~ 688 "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~ 689 "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~ 690 "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca\n" ~ 691 "\n" ~ 692 "ggaggcaaccaatcaatgtgtctctttctcagtgatgtttcttctctctg\n" ~ 693 "|*||||||||| |||||||||||||||| *||||||||||||||||||||\n" ~ 694 "gaaggcaacca-tcaatgtgtctctttc-aagtgatgtttcttctctctg\n" ~ 695 "\n" ~ 696 "tctctctcccccctctct-tctactctctctgaaaaatagatggaaaaaa\n" ~ 697 "||| *||||||| ||*|| ||||||||||**||||**|| |||||||||\n" ~ 698 "tct-actccccc-tccctatctactctctgggaaactta--tggaaaaaa\n" ~ 699 "\n" ~ 700 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 701 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 702 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 703 "\n" ~ 704 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~ 705 "||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 706 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------"); 707 } 708 709 // Returns the amount of memory required to compute an alignment between reference and query. 710 size_t memoryRequired(S)(in S reference, in S query) 711 { 712 return score_t.sizeof * ( 713 // Space for the scoring matrix F 714 ((reference.length + 1) * (query.length + 1)) + 715 // Space for the edit path 716 (reference.length + query.length) 717 ); 718 } 719 720 /// Returns longest query and refernce length possible with memoryLimit. 721 size_t longestInputsLength(size_t memoryLimit) pure 722 { 723 return round(sqrt((memoryLimit / score_t.sizeof + 3).to!double) - 2).to!size_t; 724 } 725 726 // Find edit path of the best alignment 727 private EditOp[] tracebackScoringMatrix(in DPMatrix!score_t F) 728 { 729 auto editPath = minimallyInitializedArray!(EditOp[])(F.size[0] + F.size[1]); 730 size_t i = F.size[0] - 1; 731 size_t j = F.size[1] - 1; 732 size_t k = editPath.length; 733 734 while (0 < i && 0 < j) 735 { 736 auto matchScore = F[i - 1, j - 1]; 737 auto insertionScore = F[i , j - 1]; 738 auto deletionScore = F[i - 1, j ]; 739 auto nextScore = min( 740 matchScore, 741 deletionScore, 742 insertionScore, 743 ); 744 745 assert(k > 0); 746 switch (nextScore) 747 { 748 case matchScore: 749 editPath[--k] = EditOp.substitution; 750 --i; 751 --j; 752 break; 753 case insertionScore: 754 editPath[--k] = EditOp.insertion; 755 --j; 756 break; 757 case deletionScore: 758 editPath[--k] = EditOp.deletetion; 759 --i; 760 break; 761 default: 762 assert(0, "corrupted scoring matrix"); 763 } 764 } 765 766 while (0 < i) 767 { 768 assert(k > 0); 769 editPath[--k] = EditOp.deletetion; 770 --i; 771 } 772 773 while (0 < j) 774 { 775 assert(k > 0); 776 editPath[--k] = EditOp.insertion; 777 --j; 778 } 779 780 return editPath[k .. $]; 781 } 782 783 unittest 784 { 785 auto F = DPMatrix!score_t(6, 5); 786 F.elements = [ 787 // - A G T C 788 0, 1, 2, 3, 4, // - 789 1, 0, 1, 2, 3, // A 790 2, 1, 1, 2, 2, // C 791 3, 2, 1, 2, 3, // G 792 4, 3, 2, 1, 2, // T 793 5, 4, 3, 2, 1, // C 794 ]; 795 796 assert(F.tracebackScoringMatrix == [ 797 EditOp.substitution, 798 EditOp.deletetion, 799 EditOp.substitution, 800 EditOp.substitution, 801 EditOp.substitution, 802 ]); 803 } 804 805 private struct DPMatrix(T) 806 { 807 size_t[2] size; 808 T[] elements; 809 810 this(in size_t n, in size_t m) 811 { 812 this.size[0] = n; 813 this.size[1] = m; 814 this.elements = minimallyInitializedArray!(T[])(n * m); 815 } 816 817 this(in size_t n, in size_t m, ref T[] buffer) 818 { 819 this.size[0] = n; 820 this.size[1] = m; 821 auto numElements = n * m; 822 assert(buffer.length <= numElements); 823 this.elements = buffer; 824 } 825 826 unittest 827 { 828 auto M = DPMatrix!int(2, 3); 829 830 assert(M.size[0] == 2); 831 assert(M.size[1] == 3); 832 833 foreach (i; 0 .. M.size[0]) 834 foreach (j; 0 .. M.size[1]) 835 M[i, j] = cast(int) (i + j); 836 837 assert(M.elements == [ 838 0, 1, 2, 839 1, 2, 3, 840 ]); 841 } 842 843 size_t opDollar(size_t dim)() const pure nothrow if (dim < 2) 844 { 845 return size[dim]; 846 } 847 848 auto ref T opIndex(size_t i, size_t j) pure nothrow 849 { 850 assert(i < size[0] && j < size[1], "index out of bounds"); 851 852 return elements[i * size[1] + j]; 853 } 854 855 const(T) opIndex(size_t i, size_t j) const pure nothrow 856 { 857 assert(i < size[0] && j < size[1], "index out of bounds"); 858 859 return elements[i * size[1] + j]; 860 } 861 862 unittest 863 { 864 auto M = DPMatrix!int(2, 3); 865 M.elements = [ 866 0, 1, 2, 867 1, 2, 3, 868 ]; 869 870 assert(M[0, 0] == 0 + 0); 871 assert(M[0, 1] == 0 + 1); 872 assert(M[0, 2] == 0 + 2); 873 assert(M[1, 0] == 1 + 0); 874 assert(M[1, 1] == 1 + 1); 875 assert(M[1, 2] == 1 + 2); 876 } 877 878 int opApply(scope int delegate(size_t i, size_t j, ref T) yield) 879 { 880 mixin(opApplyImpl!(No.reverse)); 881 } 882 883 int opApplyReverse(scope int delegate(size_t i, size_t j, ref T) yield) 884 { 885 mixin(opApplyImpl!(Yes.reverse)); 886 } 887 888 unittest 889 { 890 auto M = DPMatrix!int(2, 3); 891 892 foreach (i, j, ref elem; M) 893 elem = cast(int) (i + j); 894 895 assert(M.elements == [ 896 0, 1, 2, 897 1, 2, 3, 898 ]); 899 } 900 901 int opApply(scope int delegate(size_t i, size_t j, in T) yield) const 902 { 903 mixin(opApplyImpl!(No.reverse)); 904 } 905 906 int opApplyReverse(scope int delegate(size_t i, size_t j, in T) yield) const 907 { 908 mixin(opApplyImpl!(Yes.reverse)); 909 } 910 911 unittest 912 { 913 auto M = DPMatrix!int(2, 3); 914 M.elements = [ 915 0, 1, 2, 916 1, 2, 3, 917 ]; 918 919 foreach (i, j, elem; cast(const(typeof(M))) M) 920 { 921 assert(M[i, j] == elem); 922 assert(elem == i + j); 923 } 924 } 925 926 private static enum opApplyImpl(Flag!"reverse" reverse) = ` 927 int result = 0; 928 929 foreach` ~ (reverse ? `_reverse` : ``) ~ ` (i, ref element; elements) 930 { 931 result = yield(i / size[1], i % size[1], element); 932 933 if (result) 934 break; 935 } 936 937 return result; 938 `; 939 } 940 941 /// Convert a floating point number to a base-10 string at compile time. 942 /// This function is very crude and will not work in many cases! 943 string toString(Float)(in Float value, in uint precision) pure nothrow 944 if (isFloatingPoint!Float) 945 { 946 if (value.isNaN) 947 return "nan"; 948 else if (value.isInfinity) 949 return value > 0 ? "inf" : "-inf"; 950 951 if (precision == 0) 952 { 953 auto intPart = cast(long) round(value); 954 955 return intPart.to!string; 956 } 957 else 958 { 959 auto intPart = cast(long) (value > 0 ? floor(value) : ceil(value)); 960 auto fraction = sgn(value) * (value - intPart); 961 assert(fraction >= 0, "fractional part of value should be non-negative"); 962 auto fracPart = cast(ulong) round(10^^precision * fraction); 963 964 return intPart.to!string ~ "." ~ fracPart.to!string; 965 } 966 } 967 968 /// 969 unittest 970 { 971 enum x = 42.0; 972 enum y = -13.37f; 973 enum z = 0.9; 974 975 static assert(float.nan.toString(0) == "nan"); 976 static assert(double.infinity.toString(0) == "inf"); 977 static assert((-double.infinity).toString(0) == "-inf"); 978 static assert(x.toString(0) == "42"); 979 static assert(x.toString(1) == "42.0"); 980 static assert(y.toString(2) == "-13.37"); 981 static assert(y.toString(1) == "-13.4"); 982 static assert(y.toString(0) == "-13"); 983 static assert(z.toString(1) == "0.9"); 984 static assert(z.toString(0) == "1"); 985 }