findAlignment

Compute an alignment of query against reference using the Needleman-Wunsch algorithm with non-negative scores and constant indel penalty. Optionally, the freeShift mode may be activated as to allow large indels at the beginning and end of the alignment.

**Implementation Notes:** The current implementation needs O(reference.length * query.length) in time and memory. As the memory requirement easily exceeds available memory it can be limited for now. This may change in future and an implementation using O(max(reference.length, query.length)) memory will be silently selected for large inputs.

SequenceAlignment!(const(S), scoreFun)
findAlignment
(
alias scoreFun = "a == b ? 0 : 1"
S
)
(,
in S query
,,
Flag!"freeShift" freeShift = No.freeShift
,
size_t memoryLimit = 2^^20
)

Parameters

scoreFun

calculate score for a 'substitution' at i, j using scoreFun(reference[i], reference[j])

reference S

Sequence to compare query against

query S

Sequence to compare against reference

indelPenalty score_t

Penalize each indel with this value

freeShift Flag!"freeShift"

Allow indels at the beginning and end of the alignment

memoryLimit size_t

throw an error if the calculation would require more than memoryLimit bytes.

Throws

AlignmentException if the calculation would require more than memoryLimit bytes.

Examples

auto alignment = findAlignment("ACGTC", "AGTC", 1);

    //   - A G T C
    // - 0 1 2 3 4
    // A 1 0 1 2 3
    // C 2 1 1 2 2
    // G 3 2 1 2 3
    // T 4 3 2 1 2
    // C 5 4 3 2 1
    //
    // ACGTC
    // | |||
    // A-GTC

assert(alignment.score == 1);
assert(alignment.editPath == [
    EditOp.substitution,
    EditOp.deletetion,
    EditOp.substitution,
    EditOp.substitution,
    EditOp.substitution,
]);
auto alignment = findAlignment("GCATGCT", "GATTACA", 1);

//   - G A T T A C A
// - 0 1 2 3 4 5 6 7
// G 1 0 1 2 3 4 5 6
// C 2 1 2 3 4 5 4 5
// A 3 2 1 2 3 4 5 4
// T 4 3 2 1 2 3 6 5
// G 5 4 3 2 3 4 5 6
// C 6 5 4 3 4 5 4 5
// T 7 6 5 4 3 6 5 6
//
// GCAT-GCT
// | || *|*
// G-ATTACA
assert(alignment.score == 4);
assert(alignment.editPath == [
    EditOp.substitution,
    EditOp.deletetion,
    EditOp.substitution,
    EditOp.substitution,
    EditOp.insertion,
    EditOp.substitution,
    EditOp.substitution,
    EditOp.substitution,
]);
auto alignment = findAlignment!"a == b ? 0 : 1"(
    "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~
    "gttgcaggtttgatctccacctggtcggggcacatgca",
    "tgaggacagaagggtcatggtttaattctggtcacaggcacattcctggg" ~
    "ttgtaggctcaattcccacccggtcggggccacatgca",
    1,
);

assert(alignment.toString(50) ==
    "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg\n" ~
    "|||||||||||||||||| |||||||||||||||||||||||||||||||\n" ~
    "tgaggacagaagggtcat-ggtttaattctggtcacaggcacattcctgg\n" ~
    "\n" ~
    "gttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~
    "||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~
    "gttgtaggctcaat-tcccacccggtcggggccacatgca");
auto alignment = findAlignment(
    "tgaggacagaagggtcataggtttaattctggtcacaggcacattcctgg" ~
    "gttgcaggtttgatctccacctggtcggggcacatgca",
    "aatgaacagctgaggacagaagggtcatggtttaattctggtcacaggca" ~
    "cattcctgggttgtaggctcaattcccacccggtcggggccacatgca",
    1,
    Yes.freeShift,
);

assert(alignment.toString(50) ==
    "----------tgaggacagaagggtcataggtttaattctggtcacaggc\n" ~
    "          |||||||||||||||||| |||||||||||||||||||||\n" ~
    "aatgaacagctgaggacagaagggtcat-ggtttaattctggtcacaggc\n" ~
    "\n" ~
    "acattcctgggttgcaggtttgatctcc-acctggtcggggc-acatgca\n" ~
    "||||||||||||||*|||*|**|| ||| |||*||||||||| |||||||\n" ~
    "acattcctgggttgtaggctcaat-tcccacccggtcggggccacatgca");
auto alignment = findAlignment(
    "tatcctcaggtgaggactaacaacaaatatatatatatttatatctaaca" ~
    "acatatgatttctaaaatttcaaaaatcttaaggctgaattaat",
    "tatcctcaggtgaggcttaacaacaaatatatatatactgtaatatctaa" ~
    "caacatatgattctaaaatttcaaaatgcttaaaggtctga",
    1,
    Yes.freeShift,
);

assert(alignment.toString(50) ==
    "tatcctcaggtgaggact-aacaacaaatatatatata-ttta-tatcta\n" ~
    "||||||||||||||| || ||||||||||||||||||| |*|| ||||||\n" ~
    "tatcctcaggtgagg-cttaacaacaaatatatatatactgtaatatcta\n" ~
    "\n" ~
    "acaacatatgatttctaaaatttcaaaaatcttaa-gg-ctgaattaat\n" ~
    "||||||||||||| ||||||||||||||**||||| || ||||      \n" ~
    "acaacatatgatt-ctaaaatttcaaaatgcttaaaggtctga------");

See Also

Meta