R::Script of forward algorithm of HMM
From Biocourse
#################################################
# Forward algorithm
#
# Description : Forward algorithm implementation
# (0:non-CpG island, 1: CpG island)
#
# author : arhan@ucla.edu
#################################################
e0 <- function(dna) {
value = 0.25;
value
}
e1 <- function(dna) {
value = -1
if(dna == "A") value = 0.1;
if(dna == "C") value = 0.4;
if(dna == "G") value = 0.4;
if(dna == "T") value = 0.1;
value
}
a <- function(previous, current){
value = -1
if(previous == 0 && current == 0) value = 0.99;
if(previous == 0 && current == 1) value = 0.01;
if(previous == 1 && current == 0) value = 0.02;
if(previous == 1 && current == 1) value = 0.98;
value
}
getElement <- function(data, index){
substr(data, index, index)
}
getMaxVariable <- function(x, y){
value = max(x,y)
if(value == x) value = 0;
if(value == y) value = 1;
value;
}
aB0 = 0.5;
aB1 = 0.5;
a0E = 1;
a1E = 1;
#initialization
# open an output file connection
#datafile = file("c:/Dataset1.txt", "r")
#dataline = readLines(datafile)
#inputSequence = dataline[2];
#No_dna = 1000;
input sequence
inputSequence = "CGCGCGCGTTTTTTAAAAAATATATATACCGGCGCGCGCG";
No_dna = 40;
F0 = array();
F1 = array();
LOGREMEINY0 = array();
LOGREMEINY1 = array();
LOGY0 = array();
LOGY1 = array();
F00 = 1;
F01 = 0;
F0[1] = (F00*a(0,0) + F01*a(1,0))*e0(getElement(inputSequence,1));
F1[1] = (F00*a(0,1) + F01*a(1,1))*e1(getElement(inputSequence,1));
print(F0[1]);
print(F1[1]);
F0[1] = log(F0[1]);
F1[1] = log(F1[1]);
#recursive
for(i in 2:No_dna){
LOGY0[i] = 0;
LOGY1[i] = 0;
LOGREMAINY0[i] = 0;
LOGREMAINY1[i] = 0;
FRACTION = 0;
#FO, F1 was logged
LOGY0[i] = max(F0[i-1]+log(a(0,0)), F1[i-1] + log(a(1,0)));
Y0MAX = getMaxVariable(F0[i-1]+log(a(0,0)),F1[i-1] + log(a(1,0)));
if(Y0MAX==0){
FRACTION = F1[i-1] - F0[i-1] + log(a(1,0)) - log(a(0,0));
}
if(Y0MAX==1){
FRACTION = F0[i-1] - F1[i-1] + log(a(0,0)) - log(a(1,0));
}
LOGREMEINY0[i] = log(exp(FRACTION) + 1);
F0[i] = LOGY0[i] + LOGREMEINY0[i] + log(e0(getElement(inputSequence,i)));
LOGY1[i] = max(F0[i-1]+log(a(0,1)), F1[i-1] + log(a(1,1)));
Y1MAX = getMaxVariable(F0[i-1]+log(a(0,1)),F1[i-1] + log(a(1,1)));
if(Y1MAX==0){
FRACTION = F1[i-1] - F0[i-1] + log(a(1,1)) - log(a(0,1));
}
if(Y1MAX==1){
FRACTION = F0[i-1] - F1[i-1] + log(a(0,1)) - log(a(1,1));
}
LOGREMEINY1[i] = log(exp(FRACTION) + 1);
F1[i] = LOGY1[i] + LOGREMEINY1[i] + log(e1(getElement(inputSequence,i)));
}
#ending state
LOGYE = max(F0[No_dna] + log(a0E),F1[No_dna] + log(a1E));
YEMAX = getMaxVariable(F0[No_dna] + log(a0E),F1[No_dna] + log(a1E));
FRACTION = 0;
if(YEMAX==0){
FRACTION = F1[No_dna] - F0[No_dna] + log(a1E) - log(a0E);
}
if(YEMAX==1){
FRACTION = F0[No_dna] - F1[No_dna] + log(a0E) - log(a1E);
}
LOGREMEINYE = log(exp(FRACTION) + 1);
FE = LOGYE + LOGREMEINYE;
