1 /** 2 Some additional string functions. 3 4 Copyright: © 2019 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dalicious..string; 10 11 import std.algorithm : 12 countUntil, 13 joiner, 14 min, 15 map; 16 import std.array : 17 appender, 18 array, 19 minimallyInitializedArray; 20 import std.conv : to; 21 import std.exception : basicExceptionCtors, enforce; 22 import std.functional : binaryFun; 23 import std.math : 24 ceil, 25 floor, 26 isInfinity, 27 isNaN, 28 round, 29 sgn, 30 sqrt; 31 import std.range : 32 chain, 33 chunks, 34 cycle, 35 only, 36 take, 37 zip; 38 import std.range.primitives : hasSlicing; 39 import std..string : lineSplitter, tr; 40 import std.traits : 41 isFloatingPoint, 42 isSomeString; 43 import std.typecons : 44 Flag, 45 No, 46 tuple, 47 Yes; 48 import transforms : 49 snakeCase, 50 snakeCaseCT; 51 52 53 /// Convert a `camelCase` string to `dash-case`. 54 string dashCase(string camelCase) 55 { 56 return camelCase.snakeCase.tr("_", "-"); 57 } 58 59 /// ditto 60 enum dashCaseCT(string camelCase) = camelCase.snakeCaseCT.tr("_", "-"); 61 62 /** 63 Adds one level of indentation for a multi-line string. Adds indentSize 64 spaces to each non-empty line. 65 66 Returns: indented string 67 */ 68 S indent(S)(S str, in size_t indentSize = 4) if (isSomeString!S) 69 { 70 enum lineSep = "\n"; 71 alias indentLine = (line) => chain(" ".cycle.take(line.length == 0 ? 0 : indentSize), line); 72 73 return str[] 74 .lineSplitter 75 .map!indentLine 76 .joiner(lineSep) 77 .chain(str[$ - lineSep.length .. $] == lineSep ? lineSep : "") 78 .array 79 .to!S; 80 } 81 82 /// 83 unittest 84 { 85 assert("a\nb".indent == " a\n b"); 86 assert("a\nb\n\n".indent == " a\n b\n\n"); 87 assert("a\nb\n".indent(2) == " a\n b\n"); 88 } 89 90 class AlignmentException : Exception 91 { 92 /// 93 mixin basicExceptionCtors; 94 } 95 96 /// One edit operation of the Needleman-Wunsch algorithm. 97 enum EditOp: byte 98 { 99 substitution, 100 deletetion, 101 insertion, 102 } 103 104 alias score_t = uint; 105 106 static enum Strip : byte 107 { 108 none = 0b00, 109 back = 0b01, 110 front = 0b10, 111 both = 0b11, 112 } 113 114 /// Represents an alignment of two sequences. 115 struct SequenceAlignment(S, alias scoreFun = "a == b ? 0 : 1") 116 { 117 alias getScore = binaryFun!scoreFun; 118 119 score_t score; 120 EditOp[] editPath; 121 S reference; 122 S query; 123 score_t indelPenalty; 124 Flag!"freeShift" freeShift; 125 126 /// Compute alignment score. 127 score_t computeScore() const pure 128 { 129 auto walkResult = walkEditOps(); 130 enforce!AlignmentException( 131 walkResult.i == reference.length && walkResult.j == query.length, 132 "invalid alignment", 133 ); 134 135 return walkResult.computedScore; 136 } 137 138 bool isValid() const pure nothrow 139 { 140 auto walkResult = walkEditOps(); 141 142 return isValid(walkResult); 143 } 144 145 private bool isValid(in WalkResult walkResult) const pure nothrow 146 { 147 return walkResult.i == reference.length && 148 walkResult.j == query.length && 149 walkResult.computedScore == score; 150 } 151 152 private static struct WalkResult 153 { 154 score_t computedScore; 155 size_t i, j; 156 } 157 158 private auto walkEditOps() const pure nothrow 159 { 160 WalkResult result; 161 foreach (k, editOp; editPath) 162 { 163 if (result.i > reference.length || result.j > query.length) 164 return result; 165 166 final switch (editOp) 167 { 168 case EditOp.substitution: 169 if (result.i == reference.length || result.j == query.length) 170 return result; 171 result.computedScore += getScore(reference[result.i], query[result.j]); 172 ++result.i; 173 ++result.j; 174 break; 175 case EditOp.deletetion: 176 if (result.i == reference.length) 177 return result; 178 result.computedScore += indelPenalty; 179 ++result.i; 180 break; 181 case EditOp.insertion: 182 if (result.j == query.length) 183 return result; 184 result.computedScore += indelPenalty; 185 ++result.j; 186 break; 187 } 188 } 189 if (freeShift) 190 { 191 auto firstSubstitution = editPath.countUntil(EditOp.substitution); 192 result.computedScore -= firstSubstitution >= 0 193 ? firstSubstitution 194 : editPath.length; 195 } 196 197 return result; 198 } 199 200 /// Strip leading/trailing insertions. 201 auto stripInsertions(Strip strip) inout pure nothrow 202 { 203 return partial(0, reference.length, strip); 204 } 205 206 /** 207 Get a partial alignment with respect to `reference`. 208 */ 209 auto partial(in size_t begin, in size_t end, Strip stripInsertions = Strip.none) inout pure nothrow 210 in 211 { 212 assert(this.isValid(), "Attempting to get a partial alignment of an invalid alignment"); 213 } 214 out (partialAlignment) 215 { 216 assert(partialAlignment.isValid(), "Partial alignment is invalid"); 217 } 218 do 219 { 220 assert(0 <= begin && begin <= end && end <= reference.length, "index out of bounds"); 221 222 if (end == begin) 223 return typeof(this)(0, [], reference[0 .. 0], query[0 .. 0], indelPenalty); 224 225 bool hasStarted; 226 score_t newScore; 227 size_t editBegin, editEnd; 228 size_t queryBegin, queryEnd; 229 size_t i, j; 230 foreach (k, editOp; editPath) 231 { 232 if (!hasStarted && i == begin) 233 { 234 hasStarted = !(stripInsertions & Strip.front); 235 newScore = 0; 236 editBegin = k; 237 queryBegin = j; 238 } 239 240 if (freeShift && i == begin) 241 newScore = 0; 242 243 if (i == end && ((stripInsertions & Strip.back) || editOp != EditOp.insertion)) 244 break; 245 246 final switch (editOp) 247 { 248 case EditOp.substitution: 249 newScore += getScore(reference[i], query[j]); 250 ++i; 251 ++j; 252 break; 253 case EditOp.deletetion: 254 newScore += indelPenalty; 255 ++i; 256 break; 257 case EditOp.insertion: 258 newScore += indelPenalty; 259 ++j; 260 break; 261 } 262 263 if (i == end) 264 { 265 editEnd = k + 1; 266 queryEnd = j; 267 } 268 } 269 270 if ( 271 !(stripInsertions & Strip.back) && 272 editEnd < editPath.length && 273 editPath[editEnd] == EditOp.insertion 274 ) 275 { 276 ++editEnd; 277 queryEnd = j; 278 } 279 280 auto partialAlignment = typeof(this)( 281 newScore, 282 editPath[editBegin .. editEnd], 283 reference[begin .. end], 284 query[queryBegin .. queryEnd], 285 indelPenalty, 286 freeShift, 287 ); 288 289 return partialAlignment; 290 } 291 292 auto opDollar() const pure nothrow 293 { 294 return reference.length; 295 } 296 297 /// ditto 298 auto opIndex(in size_t[2] slice) inout pure nothrow 299 { 300 return partial(slice[0], slice[1]); 301 } 302 303 /// 304 unittest 305 { 306 enum indelPenalty = 1; 307 auto alignment = findAlignment("GCATGCT", "GATTACA", indelPenalty); 308 309 assert(alignment.score == 4); 310 assert(alignment.toString == 311 "GCAT-GCT\n" ~ 312 "| || *|*\n" ~ 313 "G-ATTACA"); 314 315 auto partialAlignment = alignment[1 .. 5]; 316 317 assert(partialAlignment.score == 3); 318 assert(partialAlignment.toString == 319 "CAT-G\n" ~ 320 " || *\n" ~ 321 "-ATTA"); 322 } 323 324 size_t[2] opSlice(size_t dim)(in size_t begin, in size_t end) const pure nothrow 325 { 326 return [begin, end]; 327 } 328 329 /// Get a string representation of this alignment. Visual alignment breaks 330 /// unless elements of the sequences convert to single chars via `to!string`. 331 string toString( 332 alias matchSymbol = '|', 333 alias substitutionSymbol = '*', 334 alias indelSymbol = ' ', 335 alias gapSymbol = '-', 336 )(in size_t width = 0) const pure 337 { 338 auto referenceLine = appender!string; 339 referenceLine.reserve(editPath.length); 340 auto compareLine = appender!string; 341 compareLine.reserve(editPath.length); 342 auto queryLine = appender!string; 343 queryLine.reserve(editPath.length); 344 345 size_t i, j; 346 foreach (editOp; editPath) 347 { 348 final switch (editOp) 349 { 350 case EditOp.substitution: 351 referenceLine ~= reference[i].to!string; 352 compareLine ~= reference[i] == query[j] 353 ? matchSymbol 354 : substitutionSymbol; 355 queryLine ~= query[j].to!string; 356 ++i; 357 ++j; 358 break; 359 case EditOp.deletetion: 360 referenceLine ~= reference[i].to!string; 361 compareLine ~= indelSymbol; 362 queryLine ~= gapSymbol; 363 ++i; 364 break; 365 case EditOp.insertion: 366 referenceLine ~= gapSymbol; 367 compareLine ~= indelSymbol; 368 queryLine ~= query[j].to!string; 369 ++j; 370 break; 371 } 372 } 373 374 if (width == 0) 375 return only( 376 referenceLine.data.to!string, 377 compareLine.data.to!string, 378 queryLine.data.to!string, 379 ) 380 .joiner("\n") 381 .to!string; 382 else 383 return zip( 384 referenceLine.data.to!string.chunks(width), 385 compareLine.data.to!string.chunks(width), 386 queryLine.data.to!string.chunks(width), 387 ) 388 .map!(lineTriple => only(lineTriple.expand) 389 .joiner("\n")) 390 .joiner("\n\n") 391 .to!string; 392 } 393 394 /// 395 unittest 396 { 397 auto alignment = findAlignment("ACGTC", "AGTC", 1); 398 assert(alignment.toString == 399 "ACGTC\n" ~ 400 "| |||\n" ~ 401 "A-GTC"); 402 } 403 404 /// 405 unittest 406 { 407 auto alignment = findAlignment("GCATGCT", "GATTACA", 1); 408 409 assert(alignment.toString == 410 "GCAT-GCT\n" ~ 411 "| || *|*\n" ~ 412 "G-ATTACA"); 413 } 414 } 415 416 /** 417 Compute an alignment of `query` against `reference` using the 418 Needleman-Wunsch algorithm with non-negative scores and constant 419 indel penalty. Optionally, the `freeShift` mode may be activated 420 as to allow large indels at the beginning and end of the alignment. 421 422 **Implementation Notes:** The current implementation needs 423 `O(reference.length * query.length)` in time and memory. As the 424 memory requirement easily exceeds available memory it can be 425 limited for now. This may change in future and an implementation 426 using `O(max(reference.length, query.length))` memory will be 427 silently selected for large inputs. 428 429 Params: 430 scoreFun = calculate score for a 'substitution' at `i, j` using 431 `scoreFun(reference[i], reference[j])` 432 reference = Sequence to compare `query` against 433 query = Sequence to compare against `reference` 434 indelPenalty = Penalize each indel with this value 435 freeShift = Allow indels at the beginning and end of the alignment 436 memoryLimit = throw an error if the calculation would require more 437 than `memoryLimit` bytes. 438 Throws: AlignmentException if the calculation would require more than 439 `memoryLimit` bytes. 440 441 See_Also: http://en.wikipedia.org/wiki/Needleman-Wunsch_algorithm 442 */ 443 SequenceAlignment!(const(S), scoreFun) findAlignment( 444 alias scoreFun = "a == b ? 0 : 1", 445 S, 446 )( 447 in S reference, 448 in S query, 449 in score_t indelPenalty, 450 Flag!"freeShift" freeShift = No.freeShift, 451 size_t memoryLimit = 2^^20, 452 ) 453 { 454 alias score = binaryFun!scoreFun; 455 enforce!AlignmentException( 456 memoryRequired(reference, query) <= memoryLimit, 457 "memory limit exceeded; use short reference and/or query", 458 ); 459 460 auto F = DPMatrix!score_t(reference.length + 1, query.length + 1); 461 462 // Initialize scoring matrix 463 foreach (i; 0 .. reference.length + 1) 464 F[i, 0] = freeShift ? 0 : cast(score_t) i * indelPenalty; 465 foreach (j; 0 .. query.length + 1) 466 F[0, j] = freeShift ? 0 : cast(score_t) j * indelPenalty; 467 468 // Compute scoring matrix by rows in a cache friendly manner 469 foreach (i; 1 .. reference.length + 1) 470 foreach (j; 1 .. query.length + 1) 471 F[i, j] = min( 472 F[i - 1, j - 1] + score(reference[i - 1], query[j - 1]), 473 F[i - 1, j ] + indelPenalty, 474 F[i , j - 1] + indelPenalty, 475 ); 476 477 return typeof(return)( 478 F[$ - 1, $ - 1], 479 tracebackScoringMatrix(F), 480 reference, 481 query, 482 indelPenalty, 483 freeShift, 484 ); 485 } 486 487 /// 488 unittest 489 { 490 auto alignment = findAlignment("ACGTC", "AGTC", 1); 491 492 // - A G T C 493 // - 0 1 2 3 4 494 // A 1 0 1 2 3 495 // C 2 1 1 2 2 496 // G 3 2 1 2 3 497 // T 4 3 2 1 2 498 // C 5 4 3 2 1 499 // 500 // ACGTC 501 // | ||| 502 // A-GTC 503 504 assert(alignment.score == 1); 505 assert(alignment.editPath == [ 506 EditOp.substitution, 507 EditOp.deletetion, 508 EditOp.substitution, 509 EditOp.substitution, 510 EditOp.substitution, 511 ]); 512 } 513 514 /// 515 unittest 516 { 517 auto alignment = findAlignment("GCATGCT", "GATTACA", 1); 518 519 // - G A T T A C A 520 // - 0 1 2 3 4 5 6 7 521 // G 1 0 1 2 3 4 5 6 522 // C 2 1 2 3 4 5 4 5 523 // A 3 2 1 2 3 4 5 4 524 // T 4 3 2 1 2 3 6 5 525 // G 5 4 3 2 3 4 5 6 526 // C 6 5 4 3 4 5 4 5 527 // T 7 6 5 4 3 6 5 6 528 // 529 // GCAT-GCT 530 // | || *|* 531 // G-ATTACA 532 assert(alignment.score == 4); 533 assert(alignment.editPath == [ 534 EditOp.substitution, 535 EditOp.deletetion, 536 EditOp.substitution, 537 EditOp.substitution, 538 EditOp.insertion, 539 EditOp.substitution, 540 EditOp.substitution, 541 EditOp.substitution, 542 ]); 543 } 544 545 /// 546 unittest 547 { 548 auto alignment = findAlignment!"a == b ? 0 : 1"( 549 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~ 550 "gttgcaggtttgatctccacctggtcggggcacatgca", 551 "tgaggacagaagggtcatggtttaattctggtcacaggcacattcctggg" ~ 552 "ttgtaggctcaattcccacccggtcggggccacatgca", 553 1, 554 ); 555 556 assert(alignment.toString(50) == 557 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg\n" ~ 558 "|||||||||||||||||| |||||||||||||||||||||||||||||||\n" ~ 559 "tgaggacagaagggtcat-ggtttaattctggtcacaggcacattcctgg\n" ~ 560 "\n" ~ 561 "gttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~ 562 "||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~ 563 "gttgtaggctcaat-tcccacccggtcggggccacatgca"); 564 } 565 566 /// 567 unittest 568 { 569 auto alignment = findAlignment( 570 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~ 571 "gttgcaggtttgatctccacctggtcggggcacatgca", 572 "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~ 573 "cattcctgggttgtaggctcaattcccacccggtcggggccacatgca", 574 1, 575 Yes.freeShift, 576 ); 577 578 assert(alignment.toString(50) == 579 "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~ 580 " |||||||||||||||||| |||||||||||||||||||||\n" ~ 581 "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~ 582 "\n" ~ 583 "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~ 584 "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~ 585 "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca"); 586 } 587 588 /// 589 unittest 590 { 591 auto alignment = findAlignment( 592 "tatcctcaggtgaggactaacaacaaatatatatatatttatatctaaca" ~ 593 "acatatgatttctaaaatttcaaaaatcttaaggctgaattaat", 594 "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~ 595 "caacatatgattctaaaatttcaaaatgcttaaaggtctga", 596 1, 597 Yes.freeShift, 598 ); 599 600 assert(alignment.toString(50) == 601 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 602 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 603 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 604 "\n" ~ 605 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~ 606 "||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 607 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------"); 608 } 609 610 unittest 611 { 612 enum reference = "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~ 613 "caacatatgattctaaaatttcaaaatgcttaaaggtctga"; 614 enum query = "atatcctcaggtgaggactaacaacaaatatatatatatttatatctaac" ~ 615 "aacatatgatttctaaaatttcaaaaatcttaaggctgaattaat"; 616 auto alignment = findAlignment( 617 reference, 618 query, 619 1 620 ); 621 622 assert(alignment.partial(0, reference.length, Strip.none).toString(50) == 623 "-tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatct\n" ~ 624 " ||||||||||||||| || ||||||||||||||||||| |*|| |||||\n" ~ 625 "atatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatct\n" ~ 626 "\n" ~ 627 "aacaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------\n" ~ 628 "|||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 629 "aacaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat"); 630 assert(alignment.partial(0, reference.length, Strip.front).toString(50) == 631 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 632 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 633 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 634 "\n" ~ 635 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------\n" ~ 636 "||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 637 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat"); 638 assert(alignment.partial(0, reference.length, Strip.back).toString(50) == 639 "-tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatct\n" ~ 640 " ||||||||||||||| || ||||||||||||||||||| |*|| |||||\n" ~ 641 "atatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatct\n" ~ 642 "\n" ~ 643 "aacaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga\n" ~ 644 "|||||||||||||| ||||||||||||||**||||| || ||||\n" ~ 645 "aacaacatatgatttctaaaatttcaaaaatcttaa-gg-ctga"); 646 assert(alignment.partial(0, reference.length, Strip.both).toString(50) == 647 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 648 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 649 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 650 "\n" ~ 651 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga\n" ~ 652 "||||||||||||| ||||||||||||||**||||| || ||||\n" ~ 653 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctga"); 654 } 655 656 unittest 657 { 658 enum reference = "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa"; 659 enum query = "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"; 660 auto alignment = findAlignment!"1 - (a == b || a == 'n' || b == 'n')"( 661 reference, 662 query, 663 1 664 ); 665 666 import std.stdio; 667 assert(alignment.partial(0, reference.length, Strip.front).toString(50) == 668 "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa\n" ~ 669 "**************************************************\n" ~ 670 "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"); 671 } 672 673 unittest 674 { 675 auto alignment = findAlignment( 676 "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~ 677 "gttgcaggtttgatctccacctggtcggggcacatgcaggaggcaaccaa" ~ 678 "tcaatgtgtctctttctcagtgatgtttcttctctctgtctctctccccc" ~ 679 "ctctcttctactctctctgaaaaatagatggaaaaaatatcctcaggtga" ~ 680 "ggactaacaacaaatatatatatatttatatctaacaacatatgatttct" ~ 681 "aaaatttcaaaaatcttaaggctgaattaat", 682 "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~ 683 "cattcctgggttgtaggctcaattcccacccggtcggggccacatgcaga" ~ 684 "aggcaaccatcaatgtgtctctttcaagtgatgtttcttctctctgtcta" ~ 685 "ctccccctccctatctactctctgggaaacttatggaaaaaatatcctca" ~ 686 "ggtgaggcttaacaacaaatatatatatactgtaatatctaacaacatat" ~ 687 "gattctaaaatttcaaaatgcttaaaggtctga", 688 1, 689 Yes.freeShift, 690 ); 691 692 assert(alignment.toString(50) == 693 "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~ 694 " |||||||||||||||||| |||||||||||||||||||||\n" ~ 695 "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~ 696 "\n" ~ 697 "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~ 698 "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~ 699 "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca\n" ~ 700 "\n" ~ 701 "ggaggcaaccaatcaatgtgtctctttctcagtgatgtttcttctctctg\n" ~ 702 "|*||||||||| |||||||||||||||| *||||||||||||||||||||\n" ~ 703 "gaaggcaacca-tcaatgtgtctctttc-aagtgatgtttcttctctctg\n" ~ 704 "\n" ~ 705 "tctctctcccccctctct-tctactctctctgaaaaatagatggaaaaaa\n" ~ 706 "||| *||||||| ||*|| ||||||||||**||||**|| |||||||||\n" ~ 707 "tct-actccccc-tccctatctactctctgggaaactta--tggaaaaaa\n" ~ 708 "\n" ~ 709 "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~ 710 "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~ 711 "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~ 712 "\n" ~ 713 "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~ 714 "||||||||||||| ||||||||||||||**||||| || |||| \n" ~ 715 "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------"); 716 } 717 718 // Returns the amount of memory required to compute an alignment between reference and query. 719 size_t memoryRequired(S)(in S reference, in S query) 720 { 721 return score_t.sizeof * ( 722 // Space for the scoring matrix F 723 ((reference.length + 1) * (query.length + 1)) + 724 // Space for the edit path 725 (reference.length + query.length) 726 ); 727 } 728 729 /// Returns longest query and refernce length possible with memoryLimit. 730 size_t longestInputsLength(size_t memoryLimit) pure 731 { 732 return round(sqrt((memoryLimit / score_t.sizeof + 3).to!double) - 2).to!size_t; 733 } 734 735 // Find edit path of the best alignment 736 private EditOp[] tracebackScoringMatrix(in DPMatrix!score_t F) 737 { 738 auto editPath = minimallyInitializedArray!(EditOp[])(F.size[0] + F.size[1]); 739 size_t i = F.size[0] - 1; 740 size_t j = F.size[1] - 1; 741 size_t k = editPath.length; 742 743 while (0 < i && 0 < j) 744 { 745 auto matchScore = F[i - 1, j - 1]; 746 auto insertionScore = F[i , j - 1]; 747 auto deletionScore = F[i - 1, j ]; 748 auto nextScore = min( 749 matchScore, 750 deletionScore, 751 insertionScore, 752 ); 753 754 assert(k > 0); 755 switch (nextScore) 756 { 757 case matchScore: 758 editPath[--k] = EditOp.substitution; 759 --i; 760 --j; 761 break; 762 case insertionScore: 763 editPath[--k] = EditOp.insertion; 764 --j; 765 break; 766 case deletionScore: 767 editPath[--k] = EditOp.deletetion; 768 --i; 769 break; 770 default: 771 assert(0, "corrupted scoring matrix"); 772 } 773 } 774 775 while (0 < i) 776 { 777 assert(k > 0); 778 editPath[--k] = EditOp.deletetion; 779 --i; 780 } 781 782 while (0 < j) 783 { 784 assert(k > 0); 785 editPath[--k] = EditOp.insertion; 786 --j; 787 } 788 789 return editPath[k .. $]; 790 } 791 792 unittest 793 { 794 auto F = DPMatrix!score_t(6, 5); 795 F.elements = [ 796 // - A G T C 797 0, 1, 2, 3, 4, // - 798 1, 0, 1, 2, 3, // A 799 2, 1, 1, 2, 2, // C 800 3, 2, 1, 2, 3, // G 801 4, 3, 2, 1, 2, // T 802 5, 4, 3, 2, 1, // C 803 ]; 804 805 assert(F.tracebackScoringMatrix == [ 806 EditOp.substitution, 807 EditOp.deletetion, 808 EditOp.substitution, 809 EditOp.substitution, 810 EditOp.substitution, 811 ]); 812 } 813 814 private struct DPMatrix(T) 815 { 816 size_t[2] size; 817 T[] elements; 818 819 this(in size_t n, in size_t m) 820 { 821 this.size[0] = n; 822 this.size[1] = m; 823 this.elements = minimallyInitializedArray!(T[])(n * m); 824 } 825 826 this(in size_t n, in size_t m, ref T[] buffer) 827 { 828 this.size[0] = n; 829 this.size[1] = m; 830 auto numElements = n * m; 831 assert(buffer.length <= numElements); 832 this.elements = buffer; 833 } 834 835 unittest 836 { 837 auto M = DPMatrix!int(2, 3); 838 839 assert(M.size[0] == 2); 840 assert(M.size[1] == 3); 841 842 foreach (i; 0 .. M.size[0]) 843 foreach (j; 0 .. M.size[1]) 844 M[i, j] = cast(int) (i + j); 845 846 assert(M.elements == [ 847 0, 1, 2, 848 1, 2, 3, 849 ]); 850 } 851 852 size_t opDollar(size_t dim)() const pure nothrow if (dim < 2) 853 { 854 return size[dim]; 855 } 856 857 auto ref T opIndex(size_t i, size_t j) pure nothrow 858 { 859 assert(i < size[0] && j < size[1], "index out of bounds"); 860 861 return elements[i * size[1] + j]; 862 } 863 864 const(T) opIndex(size_t i, size_t j) const pure nothrow 865 { 866 assert(i < size[0] && j < size[1], "index out of bounds"); 867 868 return elements[i * size[1] + j]; 869 } 870 871 unittest 872 { 873 auto M = DPMatrix!int(2, 3); 874 M.elements = [ 875 0, 1, 2, 876 1, 2, 3, 877 ]; 878 879 assert(M[0, 0] == 0 + 0); 880 assert(M[0, 1] == 0 + 1); 881 assert(M[0, 2] == 0 + 2); 882 assert(M[1, 0] == 1 + 0); 883 assert(M[1, 1] == 1 + 1); 884 assert(M[1, 2] == 1 + 2); 885 } 886 887 int opApply(scope int delegate(size_t i, size_t j, ref T) yield) 888 { 889 mixin(opApplyImpl!(No.reverse)); 890 } 891 892 int opApplyReverse(scope int delegate(size_t i, size_t j, ref T) yield) 893 { 894 mixin(opApplyImpl!(Yes.reverse)); 895 } 896 897 unittest 898 { 899 auto M = DPMatrix!int(2, 3); 900 901 foreach (i, j, ref elem; M) 902 elem = cast(int) (i + j); 903 904 assert(M.elements == [ 905 0, 1, 2, 906 1, 2, 3, 907 ]); 908 } 909 910 int opApply(scope int delegate(size_t i, size_t j, in T) yield) const 911 { 912 mixin(opApplyImpl!(No.reverse)); 913 } 914 915 int opApplyReverse(scope int delegate(size_t i, size_t j, in T) yield) const 916 { 917 mixin(opApplyImpl!(Yes.reverse)); 918 } 919 920 unittest 921 { 922 auto M = DPMatrix!int(2, 3); 923 M.elements = [ 924 0, 1, 2, 925 1, 2, 3, 926 ]; 927 928 foreach (i, j, elem; cast(const(typeof(M))) M) 929 { 930 assert(M[i, j] == elem); 931 assert(elem == i + j); 932 } 933 } 934 935 private static enum opApplyImpl(Flag!"reverse" reverse) = ` 936 int result = 0; 937 938 foreach` ~ (reverse ? `_reverse` : ``) ~ ` (i, ref element; elements) 939 { 940 result = yield(i / size[1], i % size[1], element); 941 942 if (result) 943 break; 944 } 945 946 return result; 947 `; 948 } 949 950 /// Convert a floating point number to a base-10 string at compile time. 951 /// This function is very crude and will not work in many cases! 952 string toString(Float)(in Float value, in uint precision) pure nothrow 953 if (isFloatingPoint!Float) 954 { 955 if (value.isNaN) 956 return "nan"; 957 else if (value.isInfinity) 958 return value > 0 ? "inf" : "-inf"; 959 960 if (precision == 0) 961 { 962 auto intPart = cast(long) round(value); 963 964 return intPart.to!string; 965 } 966 else 967 { 968 auto intPart = cast(long) (value > 0 ? floor(value) : ceil(value)); 969 auto fraction = sgn(value) * (value - intPart); 970 assert(fraction >= 0, "fractional part of value should be non-negative"); 971 auto fracPart = cast(ulong) round(10^^precision * fraction); 972 973 return intPart.to!string ~ "." ~ fracPart.to!string; 974 } 975 } 976 977 /// 978 unittest 979 { 980 enum x = 42.0; 981 enum y = -13.37f; 982 enum z = 0.9; 983 984 static assert(float.nan.toString(0) == "nan"); 985 static assert(double.infinity.toString(0) == "inf"); 986 static assert((-double.infinity).toString(0) == "-inf"); 987 static assert(x.toString(0) == "42"); 988 static assert(x.toString(1) == "42.0"); 989 static assert(y.toString(2) == "-13.37"); 990 static assert(y.toString(1) == "-13.4"); 991 static assert(y.toString(0) == "-13"); 992 static assert(z.toString(1) == "0.9"); 993 static assert(z.toString(0) == "1"); 994 }