1 /**
2     Some additional string functions.
3 
4     Copyright: © 2019 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dalicious..string;
10 
11 import std.algorithm :
12     countUntil,
13     equal,
14     joiner,
15     min,
16     map;
17 import std.array :
18     appender,
19     array,
20     minimallyInitializedArray;
21 import std.conv : to;
22 import std.exception : basicExceptionCtors, enforce;
23 import std.functional : binaryFun;
24 import std.math :
25     ceil,
26     floor,
27     isInfinity,
28     isNaN,
29     round,
30     sgn,
31     sqrt;
32 import std.range :
33     chain,
34     chunks,
35     cycle,
36     only,
37     take,
38     zip;
39 import std.range.primitives : hasSlicing;
40 import std.string : lineSplitter, tr;
41 import std.traits :
42     isFloatingPoint,
43     isSomeString;
44 import std.typecons :
45     Flag,
46     No,
47     tuple,
48     Yes;
49 import transforms :
50     snakeCase,
51     snakeCaseCT;
52 
53 
54 /// Convert a `camelCase` string to `dash-case`.
55 string dashCase(string camelCase)
56 {
57     return camelCase.snakeCase.tr("_", "-");
58 }
59 
60 /// ditto
61 enum dashCaseCT(string camelCase) = camelCase.snakeCaseCT.tr("_", "-");
62 
63 /**
64     Adds one level of indentation for a multi-line string. Adds indentSize
65     spaces to each non-empty line.
66 
67     Returns: indented string
68 */
69 S indent(S)(S str, in size_t indentSize = 4) if (isSomeString!S)
70 {
71     enum lineSep = "\n";
72     alias indentLine = (line) => chain(" ".cycle.take(line.length == 0 ? 0 : indentSize), line);
73 
74     return str[]
75         .lineSplitter
76         .map!indentLine
77         .joiner(lineSep)
78         .chain(str[$ - lineSep.length .. $] == lineSep ? lineSep : "")
79         .array
80         .to!S;
81 }
82 
83 ///
84 unittest
85 {
86     assert("a\nb".indent == "    a\n    b");
87     assert("a\nb\n\n".indent == "    a\n    b\n\n");
88     assert("a\nb\n".indent(2) == "  a\n  b\n");
89 }
90 
91 class AlignmentException : Exception
92 {
93     ///
94     mixin basicExceptionCtors;
95 }
96 
97 /// One edit operation of the Needleman-Wunsch algorithm.
98 enum EditOp: byte
99 {
100     substitution,
101     deletetion,
102     insertion,
103 }
104 
105 alias score_t = uint;
106 
107 static enum Strip : byte
108 {
109     none = 0b00,
110     back = 0b01,
111     front = 0b10,
112     both = 0b11,
113 }
114 
115 /// Represents an alignment of two sequences.
116 struct SequenceAlignment(S, alias scoreFun = "a == b ? 0 : 1")
117 {
118     alias getScore = binaryFun!scoreFun;
119 
120     score_t score;
121     EditOp[] editPath;
122     S reference;
123     S query;
124     score_t indelPenalty;
125     Flag!"freeShift" freeShift;
126 
127     /// Compute alignment score.
128     score_t computeScore() const pure
129     {
130         auto walkResult = walkEditOps();
131         enforce!AlignmentException(
132             walkResult.i == reference.length && walkResult.j == query.length,
133             "invalid alignment",
134         );
135 
136         return walkResult.computedScore;
137     }
138 
139     bool isValid() const pure nothrow
140     {
141         auto walkResult = walkEditOps();
142 
143         return isValid(walkResult);
144     }
145 
146     private bool isValid(in WalkResult walkResult) const pure nothrow
147     {
148         return walkResult.i == reference.length &&
149                walkResult.j == query.length &&
150                walkResult.computedScore == score;
151     }
152 
153     private static struct WalkResult
154     {
155         score_t computedScore;
156         size_t i, j;
157     }
158 
159     private auto walkEditOps() const pure nothrow
160     {
161         WalkResult result;
162         foreach (k, editOp; editPath)
163         {
164             if (result.i > reference.length || result.j > query.length)
165                 return result;
166 
167             final switch (editOp)
168             {
169             case EditOp.substitution:
170                 if (result.i == reference.length || result.j == query.length)
171                     return result;
172                 result.computedScore += getScore(reference[result.i], query[result.j]);
173                 ++result.i;
174                 ++result.j;
175                 break;
176             case EditOp.deletetion:
177                 if (result.i == reference.length)
178                     return result;
179                 result.computedScore += indelPenalty;
180                 ++result.i;
181                 break;
182             case EditOp.insertion:
183                 if (result.j == query.length)
184                     return result;
185                 result.computedScore += indelPenalty;
186                 ++result.j;
187                 break;
188             }
189         }
190         if (freeShift)
191         {
192             auto firstSubstitution = editPath.countUntil(EditOp.substitution);
193             result.computedScore -= firstSubstitution >= 0
194                 ? firstSubstitution
195                 : editPath.length;
196         }
197 
198         return result;
199     }
200 
201     /// Strip leading/trailing insertions.
202     auto stripInsertions(Strip strip) inout pure nothrow
203     {
204         return partial(0, reference.length, strip);
205     }
206 
207     /**
208         Get a partial alignment with respect to `reference`.
209     */
210     auto partial(in size_t begin, in size_t end, Strip stripInsertions = Strip.none) inout pure nothrow
211     in
212     {
213         assert(this.isValid(), "Attempting to get a partial alignment of an invalid alignment");
214     }
215     out (partialAlignment)
216     {
217         assert(partialAlignment.isValid(), "Partial alignment is invalid");
218     }
219     do
220     {
221         assert(0 <= begin && begin <= end && end <= reference.length, "index out of bounds");
222 
223         if (end == begin)
224             return typeof(this)(0, [], reference[0 .. 0], query[0 .. 0], indelPenalty);
225 
226         bool hasStarted;
227         score_t newScore;
228         size_t editBegin, editEnd;
229         size_t queryBegin, queryEnd;
230         size_t i, j;
231         foreach (k, editOp; editPath)
232         {
233             if (!hasStarted && i == begin)
234             {
235                 hasStarted = !(stripInsertions & Strip.front);
236                 newScore = 0;
237                 editBegin = k;
238                 queryBegin = j;
239             }
240 
241             if (freeShift && i == begin)
242                 newScore = 0;
243 
244             if (i == end && ((stripInsertions & Strip.back) || editOp != EditOp.insertion))
245                 break;
246 
247             final switch (editOp)
248             {
249             case EditOp.substitution:
250                 newScore += getScore(reference[i], query[j]);
251                 ++i;
252                 ++j;
253                 break;
254             case EditOp.deletetion:
255                 newScore += indelPenalty;
256                 ++i;
257                 break;
258             case EditOp.insertion:
259                 newScore += indelPenalty;
260                 ++j;
261                 break;
262             }
263 
264             if (i == end)
265             {
266                 editEnd = k + 1;
267                 queryEnd = j;
268             }
269         }
270 
271         if (
272             !(stripInsertions & Strip.back) &&
273             editEnd < editPath.length &&
274             editPath[editEnd] == EditOp.insertion
275         )
276         {
277             ++editEnd;
278             queryEnd = j;
279         }
280 
281         auto partialAlignment = typeof(this)(
282             newScore,
283             editPath[editBegin .. editEnd],
284             reference[begin .. end],
285             query[queryBegin .. queryEnd],
286             indelPenalty,
287             freeShift,
288         );
289 
290         return partialAlignment;
291     }
292 
293     auto opDollar() const pure nothrow
294     {
295         return reference.length;
296     }
297 
298     /// ditto
299     auto opIndex(in size_t[2] slice) inout pure nothrow
300     {
301         return partial(slice[0], slice[1]);
302     }
303 
304     ///
305     unittest
306     {
307         enum indelPenalty = 1;
308         auto alignment = findAlignment("GCATGCT", "GATTACA", indelPenalty);
309 
310         assert(alignment.score == 4);
311         assert(alignment.toString ==
312             "GCAT-GCT\n" ~
313             "| || *|*\n" ~
314             "G-ATTACA");
315 
316         auto partialAlignment = alignment[1 .. 5];
317 
318         assert(partialAlignment.score == 3);
319         assert(partialAlignment.toString ==
320             "CAT-G\n" ~
321             " || *\n" ~
322             "-ATTA");
323     }
324 
325     size_t[2] opSlice(size_t dim)(in size_t begin, in size_t end) const pure nothrow
326     {
327         return [begin, end];
328     }
329 
330     /// Get a string representation of this alignment. Visual alignment breaks
331     /// unless elements of the sequences convert to single chars via `to!string`.
332     string toString(
333         alias matchSymbol = '|',
334         alias substitutionSymbol = '*',
335         alias indelSymbol = ' ',
336         alias gapSymbol = '-',
337     )(in size_t width = 0) const pure
338     {
339         auto referenceLine = appender!string;
340         referenceLine.reserve(editPath.length);
341         auto compareLine = appender!string;
342         compareLine.reserve(editPath.length);
343         auto queryLine = appender!string;
344         queryLine.reserve(editPath.length);
345 
346         size_t i, j;
347         foreach (editOp; editPath)
348         {
349             final switch (editOp)
350             {
351             case EditOp.substitution:
352                 referenceLine ~= reference[i].to!string;
353                 compareLine ~= reference[i] == query[j]
354                     ? matchSymbol
355                     : substitutionSymbol;
356                 queryLine ~= query[j].to!string;
357                 ++i;
358                 ++j;
359                 break;
360             case EditOp.deletetion:
361                 referenceLine ~= reference[i].to!string;
362                 compareLine ~= indelSymbol;
363                 queryLine ~= gapSymbol;
364                 ++i;
365                 break;
366             case EditOp.insertion:
367                 referenceLine ~= gapSymbol;
368                 compareLine ~= indelSymbol;
369                 queryLine ~= query[j].to!string;
370                 ++j;
371                 break;
372             }
373         }
374 
375         if (width == 0)
376             return only(
377                 referenceLine.data.to!string,
378                 compareLine.data.to!string,
379                 queryLine.data.to!string,
380             )
381                 .joiner("\n")
382                 .to!string;
383         else
384             return zip(
385                 referenceLine.data.to!string.chunks(width),
386                 compareLine.data.to!string.chunks(width),
387                 queryLine.data.to!string.chunks(width),
388             )
389                 .map!(lineTriple => only(lineTriple.expand)
390                     .joiner("\n"))
391                 .joiner("\n\n")
392                 .to!string;
393     }
394 
395     ///
396     unittest
397     {
398         auto alignment = findAlignment("ACGTC", "AGTC", 1);
399         assert(alignment.toString ==
400             "ACGTC\n" ~
401             "| |||\n" ~
402             "A-GTC");
403     }
404 
405     ///
406     unittest
407     {
408         auto alignment = findAlignment("GCATGCT", "GATTACA", 1);
409 
410         assert(alignment.toString ==
411             "GCAT-GCT\n" ~
412             "| || *|*\n" ~
413             "G-ATTACA");
414     }
415 }
416 
417 /**
418     Compute an alignment of `query` against `reference` using the
419     Needleman-Wunsch algorithm with non-negative scores and constant
420     indel penalty. Optionally, the `freeShift` mode may be activated
421     as to allow large indels at the beginning and end of the alignment.
422 
423     **Implementation Notes:** The current implementation needs
424     `O(reference.length * query.length)` in time and memory. As the
425     memory requirement easily exceeds available memory it can be
426     limited for now. This may change in future and an implementation
427     using `O(max(reference.length, query.length))` memory will be
428     silently selected for large inputs.
429 
430     Params:
431         scoreFun =     calculate score for a 'substitution' at `i, j` using
432                        `scoreFun(reference[i], reference[j])`
433         reference =    Sequence to compare `query` against
434         query =        Sequence to compare against `reference`
435         indelPenalty = Penalize each indel with this value
436         freeShift =    Allow indels at the beginning and end of the alignment
437         memoryLimit =  throw an error if the calculation would require more
438                        than `memoryLimit` bytes.
439     Throws: AlignmentException if the calculation would require more than
440             `memoryLimit` bytes.
441 
442     See_Also: http://en.wikipedia.org/wiki/Needleman-Wunsch_algorithm
443 */
444 SequenceAlignment!(const(S), scoreFun) findAlignment(
445     alias scoreFun = "a == b ? 0 : 1",
446     S,
447 )(
448     in S reference,
449     in S query,
450     in score_t indelPenalty,
451     Flag!"freeShift" freeShift = No.freeShift,
452     size_t memoryLimit = 2^^20,
453 )
454 {
455     alias score = binaryFun!scoreFun;
456     enforce!AlignmentException(
457         memoryRequired(reference, query) <= memoryLimit,
458         "memory limit exceeded; use short reference and/or query",
459     );
460 
461     auto F = DPMatrix!score_t(reference.length + 1, query.length + 1);
462 
463     // Initialize scoring matrix
464     foreach (i; 0 .. reference.length + 1)
465         F[i, 0] = freeShift ? 0 : cast(score_t) i * indelPenalty;
466     foreach (j; 0 .. query.length + 1)
467         F[0, j] = freeShift ? 0 : cast(score_t) j * indelPenalty;
468 
469     // Compute scoring matrix by rows in a cache friendly manner
470     foreach (i; 1 .. reference.length + 1)
471         foreach (j; 1 .. query.length + 1)
472             F[i, j] = min(
473                 F[i - 1, j - 1] + score(reference[i - 1], query[j - 1]),
474                 F[i - 1, j    ] + indelPenalty,
475                 F[i    , j - 1] + indelPenalty,
476             );
477 
478     return typeof(return)(
479         F[$ - 1, $ - 1],
480         tracebackScoringMatrix(F),
481         reference,
482         query,
483         indelPenalty,
484         freeShift,
485     );
486 }
487 
488 ///
489 unittest
490 {
491     auto alignment = findAlignment("ACGTC", "AGTC", 1);
492 
493         //   - A G T C
494         // - 0 1 2 3 4
495         // A 1 0 1 2 3
496         // C 2 1 1 2 2
497         // G 3 2 1 2 3
498         // T 4 3 2 1 2
499         // C 5 4 3 2 1
500         //
501         // ACGTC
502         // | |||
503         // A-GTC
504 
505     assert(alignment.score == 1);
506     assert(alignment.editPath == [
507         EditOp.substitution,
508         EditOp.deletetion,
509         EditOp.substitution,
510         EditOp.substitution,
511         EditOp.substitution,
512     ]);
513 }
514 
515 ///
516 unittest
517 {
518     auto alignment = findAlignment("GCATGCT", "GATTACA", 1);
519 
520     //   - G A T T A C A
521     // - 0 1 2 3 4 5 6 7
522     // G 1 0 1 2 3 4 5 6
523     // C 2 1 2 3 4 5 4 5
524     // A 3 2 1 2 3 4 5 4
525     // T 4 3 2 1 2 3 6 5
526     // G 5 4 3 2 3 4 5 6
527     // C 6 5 4 3 4 5 4 5
528     // T 7 6 5 4 3 6 5 6
529     //
530     // GCAT-GCT
531     // | || *|*
532     // G-ATTACA
533     assert(alignment.score == 4);
534     assert(alignment.editPath == [
535         EditOp.substitution,
536         EditOp.deletetion,
537         EditOp.substitution,
538         EditOp.substitution,
539         EditOp.insertion,
540         EditOp.substitution,
541         EditOp.substitution,
542         EditOp.substitution,
543     ]);
544 }
545 
546 ///
547 unittest
548 {
549     auto alignment = findAlignment!"a == b ? 0 : 1"(
550         "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~
551         "gttgcaggtttgatctccacctggtcggggcacatgca",
552         "tgaggacagaagggtcatggtttaattctggtcacaggcacattcctggg" ~
553         "ttgtaggctcaattcccacccggtcggggccacatgca",
554         1,
555     );
556 
557     assert(alignment.toString(50) ==
558         "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg\n" ~
559         "|||||||||||||||||| |||||||||||||||||||||||||||||||\n" ~
560         "tgaggacagaagggtcat-ggtttaattctggtcacaggcacattcctgg\n" ~
561         "\n" ~
562         "gttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~
563         "||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~
564         "gttgtaggctcaat-tcccacccggtcggggccacatgca");
565 }
566 
567 ///
568 unittest
569 {
570     auto alignment = findAlignment(
571         "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~
572         "gttgcaggtttgatctccacctggtcggggcacatgca",
573         "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~
574         "cattcctgggttgtaggctcaattcccacccggtcggggccacatgca",
575         1,
576         Yes.freeShift,
577     );
578 
579     assert(alignment.toString(50) ==
580         "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~
581         "          |||||||||||||||||| |||||||||||||||||||||\n" ~
582         "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~
583         "\n" ~
584         "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~
585         "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~
586         "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca");
587 }
588 
589 ///
590 unittest
591 {
592     auto alignment = findAlignment(
593         "tatcctcaggtgaggactaacaacaaatatatatatatttatatctaaca" ~
594         "acatatgatttctaaaatttcaaaaatcttaaggctgaattaat",
595         "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~
596         "caacatatgattctaaaatttcaaaatgcttaaaggtctga",
597         1,
598         Yes.freeShift,
599     );
600 
601     assert(alignment.toString(50) ==
602         "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~
603         "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~
604         "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~
605         "\n" ~
606         "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~
607         "||||||||||||| ||||||||||||||**||||| || ||||      \n" ~
608         "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------");
609 }
610 
611 unittest
612 {
613     enum reference = "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~
614                      "caacatatgattctaaaatttcaaaatgcttaaaggtctga";
615     enum query = "atatcctcaggtgaggactaacaacaaatatatatatatttatatctaac" ~
616                  "aacatatgatttctaaaatttcaaaaatcttaaggctgaattaat";
617     auto alignment = findAlignment(
618         reference,
619         query,
620         1
621     );
622 
623     assert(alignment.partial(0, reference.length, Strip.none).toString(50) ==
624         "-tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatct\n" ~
625         " ||||||||||||||| || ||||||||||||||||||| |*|| |||||\n" ~
626         "atatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatct\n" ~
627         "\n" ~
628         "aacaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------\n" ~
629         "|||||||||||||| ||||||||||||||**||||| || ||||      \n" ~
630         "aacaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat");
631     assert(alignment.partial(0, reference.length, Strip.front).toString(50) ==
632         "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~
633         "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~
634         "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~
635         "\n" ~
636         "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------\n" ~
637         "||||||||||||| ||||||||||||||**||||| || ||||      \n" ~
638         "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat");
639     assert(alignment.partial(0, reference.length, Strip.back).toString(50) ==
640         "-tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatct\n" ~
641         " ||||||||||||||| || ||||||||||||||||||| |*|| |||||\n" ~
642         "atatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatct\n" ~
643         "\n" ~
644         "aacaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga\n" ~
645         "|||||||||||||| ||||||||||||||**||||| || ||||\n" ~
646         "aacaacatatgatttctaaaatttcaaaaatcttaa-gg-ctga");
647     assert(alignment.partial(0, reference.length, Strip.both).toString(50) ==
648         "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~
649         "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~
650         "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~
651         "\n" ~
652         "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga\n" ~
653         "||||||||||||| ||||||||||||||**||||| || ||||\n" ~
654         "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctga");
655 }
656 
657 unittest
658 {
659     enum reference = "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa";
660     enum query = "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn";
661     auto alignment = findAlignment!"1 - (a == b || a == 'n' || b == 'n')"(
662         reference,
663         query,
664         1
665     );
666 
667     import std.stdio;
668     assert(alignment.partial(0, reference.length, Strip.front).toString(50) ==
669         "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa\n" ~
670         "**************************************************\n" ~
671         "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn");
672 }
673 
674 unittest
675 {
676     auto alignment = findAlignment(
677         "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~
678         "gttgcaggtttgatctccacctggtcggggcacatgcaggaggcaaccaa" ~
679         "tcaatgtgtctctttctcagtgatgtttcttctctctgtctctctccccc" ~
680         "ctctcttctactctctctgaaaaatagatggaaaaaatatcctcaggtga" ~
681         "ggactaacaacaaatatatatatatttatatctaacaacatatgatttct" ~
682         "aaaatttcaaaaatcttaaggctgaattaat",
683         "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~
684         "cattcctgggttgtaggctcaattcccacccggtcggggccacatgcaga" ~
685         "aggcaaccatcaatgtgtctctttcaagtgatgtttcttctctctgtcta" ~
686         "ctccccctccctatctactctctgggaaacttatggaaaaaatatcctca" ~
687         "ggtgaggcttaacaacaaatatatatatactgtaatatctaacaacatat" ~
688         "gattctaaaatttcaaaatgcttaaaggtctga",
689         1,
690         Yes.freeShift,
691     );
692 
693     assert(alignment.toString(50) ==
694         "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~
695         "          |||||||||||||||||| |||||||||||||||||||||\n" ~
696         "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~
697         "\n" ~
698         "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~
699         "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~
700         "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca\n" ~
701         "\n" ~
702         "ggaggcaaccaatcaatgtgtctctttctcagtgatgtttcttctctctg\n" ~
703         "|*||||||||| |||||||||||||||| *||||||||||||||||||||\n" ~
704         "gaaggcaacca-tcaatgtgtctctttc-aagtgatgtttcttctctctg\n" ~
705         "\n" ~
706         "tctctctcccccctctct-tctactctctctgaaaaatagatggaaaaaa\n" ~
707         "||| *||||||| ||*|| ||||||||||**||||**||  |||||||||\n" ~
708         "tct-actccccc-tccctatctactctctgggaaactta--tggaaaaaa\n" ~
709         "\n" ~
710         "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~
711         "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~
712         "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~
713         "\n" ~
714         "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~
715         "||||||||||||| ||||||||||||||**||||| || ||||      \n" ~
716         "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------");
717 }
718 
719 // Returns the amount of memory required to compute an alignment between reference and query.
720 size_t memoryRequired(S)(in S reference, in S query)
721 {
722     return score_t.sizeof * (
723         // Space for the scoring matrix F
724         ((reference.length + 1) * (query.length + 1)) +
725         // Space for the edit path
726         (reference.length + query.length)
727     );
728 }
729 
730 /// Returns longest query and refernce length possible with memoryLimit.
731 size_t longestInputsLength(size_t memoryLimit) pure
732 {
733     return round(sqrt((memoryLimit / score_t.sizeof + 3).to!double) - 2).to!size_t;
734 }
735 
736 // Find edit path of the best alignment
737 private EditOp[] tracebackScoringMatrix(in DPMatrix!score_t F)
738 {
739     auto editPath = minimallyInitializedArray!(EditOp[])(F.size[0] + F.size[1]);
740     size_t i = F.size[0] - 1;
741     size_t j = F.size[1] - 1;
742     size_t k = editPath.length;
743 
744     while (0 < i && 0 < j)
745     {
746         auto matchScore     = F[i - 1, j - 1];
747         auto insertionScore = F[i    , j - 1];
748         auto deletionScore  = F[i - 1, j    ];
749         auto nextScore = min(
750             matchScore,
751             deletionScore,
752             insertionScore,
753         );
754 
755         assert(k > 0);
756         if (nextScore == matchScore)
757         {
758             editPath[--k] = EditOp.substitution;
759             --i;
760             --j;
761         }
762         else if (nextScore == insertionScore)
763         {
764             editPath[--k] = EditOp.insertion;
765             --j;
766         }
767         else if (nextScore == deletionScore)
768         {
769             editPath[--k] = EditOp.deletetion;
770             --i;
771         }
772         else
773         {
774             assert(0, "corrupted scoring matrix");
775         }
776     }
777 
778     while (0 < i)
779     {
780         assert(k > 0);
781         editPath[--k] = EditOp.deletetion;
782         --i;
783     }
784 
785     while (0 < j)
786     {
787         assert(k > 0);
788         editPath[--k] = EditOp.insertion;
789         --j;
790     }
791 
792     return editPath[k .. $];
793 }
794 
795 unittest
796 {
797     auto F = DPMatrix!score_t(6, 5);
798     F.elements = [
799         // -  A  G  T  C
800            0, 1, 2, 3, 4, // -
801            1, 0, 1, 2, 3, // A
802            2, 1, 1, 2, 2, // C
803            3, 2, 1, 2, 3, // G
804            4, 3, 2, 1, 2, // T
805            5, 4, 3, 2, 1, // C
806     ];
807 
808     assert(F.tracebackScoringMatrix == [
809         EditOp.substitution,
810         EditOp.deletetion,
811         EditOp.substitution,
812         EditOp.substitution,
813         EditOp.substitution,
814     ]);
815 }
816 
817 private struct DPMatrix(T)
818 {
819     size_t[2] size;
820     T[] elements;
821 
822     this(in size_t n, in size_t m)
823     {
824         this.size[0] = n;
825         this.size[1] = m;
826         this.elements = minimallyInitializedArray!(T[])(n * m);
827     }
828 
829     this(in size_t n, in size_t m, ref T[] buffer)
830     {
831         this.size[0] = n;
832         this.size[1] = m;
833         auto numElements = n * m;
834         assert(buffer.length <= numElements);
835         this.elements = buffer;
836     }
837 
838     unittest
839     {
840         auto M = DPMatrix!int(2, 3);
841 
842         assert(M.size[0] == 2);
843         assert(M.size[1] == 3);
844 
845         foreach (i; 0 .. M.size[0])
846             foreach (j; 0 .. M.size[1])
847                 M[i, j] = cast(int) (i + j);
848 
849         assert(M.elements == [
850             0, 1, 2,
851             1, 2, 3,
852         ]);
853     }
854 
855     size_t opDollar(size_t dim)() const pure nothrow if (dim < 2)
856     {
857         return size[dim];
858     }
859 
860     auto ref T opIndex(size_t i, size_t j) pure nothrow
861     {
862         assert(i < size[0] && j < size[1], "index out of bounds");
863 
864         return elements[i * size[1] + j];
865     }
866 
867     const(T) opIndex(size_t i, size_t j) const pure nothrow
868     {
869         assert(i < size[0] && j < size[1], "index out of bounds");
870 
871         return elements[i * size[1] + j];
872     }
873 
874     unittest
875     {
876         auto M = DPMatrix!int(2, 3);
877         M.elements = [
878             0, 1, 2,
879             1, 2, 3,
880         ];
881 
882         assert(M[0, 0] == 0 + 0);
883         assert(M[0, 1] == 0 + 1);
884         assert(M[0, 2] == 0 + 2);
885         assert(M[1, 0] == 1 + 0);
886         assert(M[1, 1] == 1 + 1);
887         assert(M[1, 2] == 1 + 2);
888     }
889 
890     int opApply(scope int delegate(size_t i, size_t j, ref T) yield)
891     {
892         mixin(opApplyImpl!(No.reverse));
893     }
894 
895     int opApplyReverse(scope int delegate(size_t i, size_t j, ref T) yield)
896     {
897         mixin(opApplyImpl!(Yes.reverse));
898     }
899 
900     unittest
901     {
902         auto M = DPMatrix!int(2, 3);
903 
904         foreach (i, j, ref elem; M)
905             elem = cast(int) (i + j);
906 
907         assert(M.elements == [
908             0, 1, 2,
909             1, 2, 3,
910         ]);
911     }
912 
913     int opApply(scope int delegate(size_t i, size_t j, in T) yield) const
914     {
915         mixin(opApplyImpl!(No.reverse));
916     }
917 
918     int opApplyReverse(scope int delegate(size_t i, size_t j, in T) yield) const
919     {
920         mixin(opApplyImpl!(Yes.reverse));
921     }
922 
923     unittest
924     {
925         auto M = DPMatrix!int(2, 3);
926         M.elements = [
927             0, 1, 2,
928             1, 2, 3,
929         ];
930 
931         foreach (i, j, elem; cast(const(typeof(M))) M)
932         {
933             assert(M[i, j] == elem);
934             assert(elem == i + j);
935         }
936     }
937 
938     private static enum opApplyImpl(Flag!"reverse" reverse) = `
939         int result = 0;
940 
941         foreach` ~ (reverse ? `_reverse` : ``) ~ ` (i, ref element; elements)
942         {
943             result = yield(i / size[1], i % size[1], element);
944 
945             if (result)
946                 break;
947         }
948 
949         return result;
950     `;
951 }
952 
953 /// Convert a floating point number to a base-10 string at compile time.
954 /// This function is very crude and will not work in many cases!
955 string toString(Float)(in Float value, in uint precision) pure nothrow
956     if (isFloatingPoint!Float)
957 {
958     if (value.isNaN)
959         return "nan";
960     else if (value.isInfinity)
961         return value > 0 ? "inf" : "-inf";
962 
963     if (precision == 0)
964     {
965         auto intPart = cast(long) round(value);
966 
967         return intPart.to!string;
968     }
969     else
970     {
971         auto intPart = cast(long) (value > 0 ? floor(value) : ceil(value));
972         auto fraction = sgn(value) * (value - intPart);
973         assert(fraction >= 0, "fractional part of value should be non-negative");
974         auto fracPart = cast(ulong) round(10^^precision * fraction);
975 
976         return intPart.to!string ~ "." ~ fracPart.to!string;
977     }
978 }
979 
980 ///
981 unittest
982 {
983     enum x = 42.0;
984     enum y = -13.37f;
985     enum z = 0.9;
986 
987     static assert(float.nan.toString(0) == "nan");
988     static assert(double.infinity.toString(0) == "inf");
989     static assert((-double.infinity).toString(0) == "-inf");
990     static assert(x.toString(0) == "42");
991     static assert(x.toString(1) == "42.0");
992     static assert(y.toString(2) == "-13.37");
993     static assert(y.toString(1) == "-13.4");
994     static assert(y.toString(0) == "-13");
995     static assert(z.toString(1) == "0.9");
996     static assert(z.toString(0) == "1");
997 }
998 
999 
1000 private struct SuffixPrefixMatches(alias pred, C)
1001 {
1002     C[] stringA;
1003     C[] stringB;
1004     size_t shift;
1005 
1006 
1007     this(C[] stringA, C[] stringB)
1008     {
1009         this.stringA = stringA;
1010         this.stringB = stringB;
1011         this.shift = stringA.length > stringB.length
1012             ? stringA.length - stringB.length
1013             : 0;
1014 
1015         findNextMatch();
1016     }
1017 
1018 
1019     void popFront()
1020     {
1021         assert(!empty, "Attempting to popFront an empty " ~ typeof(this).stringof);
1022 
1023         ++shift;
1024         findNextMatch();
1025     }
1026 
1027 
1028     @property auto front()
1029     {
1030         assert(!empty, "Attempting to fetch the front of an empty " ~ typeof(this).stringof);
1031 
1032         return suffixA;
1033     }
1034 
1035 
1036     @property bool empty() const pure nothrow @safe @nogc
1037     {
1038         return shift >= stringA.length;
1039     }
1040 
1041 
1042     @property SuffixPrefixMatches!(pred, C) save() pure nothrow @safe @nogc
1043     {
1044         return this;
1045     }
1046 
1047 
1048 private:
1049 
1050 
1051     void findNextMatch()
1052     {
1053         while (shift < stringA.length && !equal!pred(suffixA, prefixB))
1054             ++shift;
1055     }
1056 
1057 
1058     @property C[] suffixA() pure nothrow @safe @nogc
1059     {
1060         return stringA[shift .. $];
1061     }
1062 
1063 
1064     @property C[] prefixB() pure nothrow @safe @nogc
1065     {
1066         return stringB[0 .. stringA.length - shift];
1067     }
1068 }
1069 
1070 
1071 /// Find all suffixes of stringA that are a prefix of stringB. The matches are
1072 /// computed lazily, decreasing length of the match.
1073 ///
1074 /// Returns: lazily computed range of all suffixes of stringA that are a
1075 ///          prefix of stringB.
1076 auto suffixPrefixMatches(alias pred = "a == b", C)(C[] stringA, C[] stringB)
1077 {
1078     return SuffixPrefixMatches!(pred, C)(stringA, stringB);
1079 }
1080 
1081 ///
1082 unittest
1083 {
1084     auto matches = suffixPrefixMatches("abcdef", "cdefghij");
1085 
1086     assert(equal(matches, ["cdef"]));
1087 }
1088 
1089 ///
1090 unittest
1091 {
1092     auto matches = suffixPrefixMatches("abcdefcdef", "cdefcdefghij");
1093 
1094     assert(equal(matches, ["cdefcdef", "cdef"]));
1095 }
1096 
1097 unittest
1098 {
1099     auto matches = suffixPrefixMatches("abcdefcdefc", "cdefcdefcghij");
1100 
1101     assert(equal(matches, ["cdefcdefc", "cdefc", "c"]));
1102 }
1103 
1104 unittest
1105 {
1106     struct ModulusChar
1107     {
1108         int value;
1109         alias value this;
1110 
1111         bool opEquals(const ModulusChar other) const pure nothrow @safe @nogc
1112         {
1113             return this.value % 10 == other.value % 10;
1114         }
1115     }
1116 
1117     alias m = ModulusChar;
1118     const modulusStringA = [m(1), m(2), m(3), m(1), m(2),  m(3),  m(1),  m(2)];
1119     const modulusStringB = [                       m(12), m(13), m(11), m(12)];
1120 
1121     auto matches = suffixPrefixMatches(modulusStringA, modulusStringB);
1122 
1123     assert(equal(matches, [
1124         modulusStringA[4 .. $],
1125         modulusStringA[7 .. $],
1126     ]));
1127 }
1128 
1129 unittest
1130 {
1131     const modulusStringA = [1, 2, 3, 1, 2,  3,  1,  2];
1132     const modulusStringB = [           12, 13, 11, 12];
1133 
1134     auto matches = suffixPrefixMatches!"a % 10 == b % 10"(modulusStringA, modulusStringB);
1135 
1136     assert(equal(matches, [
1137         modulusStringA[4 .. $],
1138         modulusStringA[7 .. $],
1139     ]));
1140 }