R::Script to find marginal posterior probability of HMM

From Biocourse

Jump to: navigation, search

#################################################
# Marginal Posterior Probability
#
# Description : Marginal Posterior Probability, you can compare the result with hidden paths.
#  (0:non-CpG island, 1: CpG island)
#
# author : arhan@ucla.edu
#################################################
#A example

e0 <- function(dna) {
 value = 0.25;
 value
}


e1 <- function(dna) {
 value = -1
 if(dna == "A") value = 0.1;
 if(dna == "C") value = 0.4;
 if(dna == "G") value = 0.4;
 if(dna == "T") value = 0.1;
        value
}

a <- function(previous, current){
 value = -1
 if(previous == 0 && current == 0) value = 0.99;
 if(previous == 0 && current == 1) value = 0.01;
 if(previous == 1 && current == 0) value = 0.02;
 if(previous == 1 && current == 1) value = 0.98;
 value
}

getElement <- function(data, index){
 substr(data, index, index)
}
getMaxVariable <- function(x, y){
 value = -1;
 value = max(x,y)
 if(value == x) value = 0;
 if(value == y) value = 1;
 value;
}


#initialization
aB0 = 0.5;
aB1 = 0.5;
a0E = 0.5;
a1E = 0.5;

# open an output file connection
#datafile = file("c:/Dataset1.txt", "r") 
#dataline = readLines(datafile)
#inputSequence = dataline[2];
#No_dna = 1000;

#input sequence
inputSequence = "CGCGCGCGTTTTTTAAAAAATATATATACCGGCGCGCGCG";
No_dna = 40;


##############################################
#forward
##############################################
F0 = array();
F1 = array();
LOGREMEINY0 = array();
LOGREMEINY1 = array();
LOGY0 = array();
LOGY1 = array();

F00 = 1;
F01 = 0;

F0[1] = (F00*a(0,0) + F01*a(1,0))*e0(getElement(inputSequence,1));
F1[1] = (F00*a(0,1) + F01*a(1,1))*e1(getElement(inputSequence,1));

print(F0[1]);
print(F1[1]);

F0[1] = log(F0[1]);
F1[1] = log(F1[1]);

#recursive
for(i in 2:No_dna){

 LOGY0[i] = 0;
 LOGY1[i] = 0;

 LOGREMAINY0[i] = 0;
 LOGREMAINY1[i] = 0;
 
 FRACTION = 0;

        #FO, F1 was logged
 LOGY0[i] = max(F0[i-1]+log(a(0,0)), F1[i-1] + log(a(1,0)));
 Y0MAX = getMaxVariable(F0[i-1]+log(a(0,0)),F1[i-1] + log(a(1,0)));

 if(Y0MAX==0){

  FRACTION = F1[i-1] - F0[i-1] + log(a(1,0)) - log(a(0,0));
 }

 if(Y0MAX==1){

  FRACTION = F0[i-1] - F1[i-1] + log(a(0,0)) - log(a(1,0));  
 }

 LOGREMEINY0[i] = log(exp(FRACTION) + 1);
 F0[i] = LOGY0[i] + LOGREMEINY0[i] + log(e0(getElement(inputSequence,i)));

 LOGY1[i] = max(F0[i-1]+log(a(0,1)), F1[i-1] + log(a(1,1)));
 Y1MAX = getMaxVariable(F0[i-1]+log(a(0,1)),F1[i-1] + log(a(1,1)));

 if(Y1MAX==0){

  FRACTION = F1[i-1] - F0[i-1] + log(a(1,1)) - log(a(0,1));

 }
 if(Y1MAX==1){

  FRACTION = F0[i-1] - F1[i-1] + log(a(0,1)) - log(a(1,1));
 }

 LOGREMEINY1[i] = log(exp(FRACTION) + 1);
 F1[i] = LOGY1[i] + LOGREMEINY1[i] + log(e1(getElement(inputSequence,i)));
 
}

#ending state

LOGYE = max(F0[No_dna] + log(a0E),F1[No_dna] + log(a1E));
YEMAX = getMaxVariable(F0[No_dna] + log(a0E),F1[No_dna] + log(a1E));
FRACTION = 0;

if(YEMAX==0){

 FRACTION = F1[No_dna] - F0[No_dna] + log(a1E) - log(a0E);

}
if(YEMAX==1){

 FRACTION = F0[No_dna] - F1[No_dna] + log(a0E) - log(a1E);
}

LOGREMEINYE = log(exp(FRACTION) + 1);
FE = LOGYE + LOGREMEINYE;

##############################################
#backward
##############################################
B0 = array();
B1 = array();

B0[No_dna] = log(a0E);
B1[No_dna] = log(a1E);

LOGY0 = array();
LOGY1 = array();

LOGREMEINY0 = array();
LOGREMEINY1 = array();

#recursive
for(i in (No_dna-1):1){

 LOGY0[i] = 0;
 LOGY1[i] = 0;

 LOGREMEINY0[i] = 0;
 LOGREMEINY1[i] = 0;

 LOGY0[i] = max(B0[i+1]+log(a(0,0))+log(e0(getElement(inputSequence,i+1))), B1[i+1]+log(a(0,1))+log(e1(getElement(inputSequence,i+1))));
 Y0MAX = getMaxVariable((B0[i+1]+log(a(0,0))+log(e0(getElement(inputSequence,i+1)))), (B1[i+1]+log(a(0,1))+log(e1(getElement(inputSequence,i+1)))));
 
 
 FRACTION = 0;

 if(Y0MAX==0){

  FRACTION = B1[i+1]-B0[i+1] + log(a(0,1)) - log(a(0,0)) + log(e1(getElement(inputSequence,i+1))) - log(e0(getElement(inputSequence,i+1)));
 }
 if(Y0MAX==1){
 
  FRACTION = B0[i+1]-B1[i+1] + log(a(0,0)) - log(a(0,1)) + log(e0(getElement(inputSequence,i+1))) - log(e1(getElement(inputSequence,i+1)));
 
 }
 
 LOGREMEINY0[i] = log(1+exp(FRACTION));
 B0[i] = LOGY0[i] + LOGREMEINY0[i];


 LOGY1[i] = max(B0[i+1]+log(a(1,0))+log(e0(getElement(inputSequence,i+1))), B1[i+1]+log(a(1,1))+log(e1(getElement(inputSequence,i+1))));
 Y1MAX = getMaxVariable((B0[i+1]+log(a(1,0))+log(e0(getElement(inputSequence,i+1)))), (B1[i+1]+log(a(1,1))+log(e1(getElement(inputSequence,i+1)))));

 if(Y1MAX==0){

  FRACTION = B1[i+1] - B0[i+1] + log(a(1,1)) - log(a(1,0)) + log(e1(getElement(inputSequence,i+1))) - log(e0(getElement(inputSequence,i+1)));
 }
 if(Y1MAX==1){

  FRACTION = B0[i+1] - B1[i+1] + log(a(1,0)) - log(a(1,1)) + log(e0(getElement(inputSequence,i+1))) - log(e1(getElement(inputSequence,i+1)));

 }

 LOGREMEINY1[i] = log(1+exp(FRACTION));
 B1[i] = LOGY1[i] + LOGREMEINY1[i];
}
##############################################
#mpp
##############################################
M0 = array();
M1 = array();


for(i in 1:No_dna){
 M0[i] = (F0[i]+B0[i])-FE;
  M1[i] = (F1[i]+B1[i])-FE;
}

plot(exp(M0), type = "l");

#marginary posterior probability
plot(exp(M1), type = "l");