/*
 * rfold.hpp
 *
 */
#ifndef RFOLD_HPP
#define RFOLD_HPP
#include "util.hpp"
#include "score.hpp"
#include "tri_matrix_ring.hpp"
#include "array_ring.hpp"
#include "array_file.hpp"
#include "seq_file.hpp"
#include "ct_file.hpp"
#include "energy_model.hpp"
// #include "contrafold_model.hpp"

namespace RFOLD {
// #ifdef USE_CONTRAFOLD_MODEL
// typedef ContrafoldModel Model;
// #else
typedef EnergyModel Model;
// #endif
}


namespace RFOLD {
class Rfold : private Model {
public:
  enum MEAState {
    P_st,
    I_st,
    O_st
  };
  enum {
    N_MEA_STATE = (O_st - P_st + 1)
  };
  typedef SeqFile SeqT;
  typedef CTFile StructT;
  typedef TriMatrixRing1<ScoreT> ScoreMatrixP;
  typedef Matrix<ScoreT> ScoreMatrixS;
  typedef ArrayFile<ScoreT> ScoreArray;
  typedef ArrayRing<ScoreT> ScoreArrayR;
  typedef CArray<CArray<ScoreT, NSTATE0>, NDST> CurrSC;
  typedef TriMatrixRing1<TBNode> TracebackMatrix;
  typedef TriMatrixRing1<NusTBNode> NusTracebackMatrix;
  typedef ArrayFile<int> TracebackArray;
  typedef CArray<ScoreT, N_MEA_STATE> ProbScales;
  class Inside {
  public:
    typedef ScoreMatrixS S;
    typedef ScoreMatrixP P;
    typedef ScoreArray O;
    S s;
    S s1;
    P p;
    O o;
    void clean() {
      s.fill(NEG_INF());
      s1.fill(NEG_INF());
      p.fill(NEG_INF());
      o.fill(NEG_INF());
    }
  };
  class Outside {
  public:
    typedef ScoreMatrixS S;
    typedef ScoreMatrixP P;
    typedef ScoreArray O;
    S s;
    P p;
    O o;
    void clean() {
      s.fill(NEG_INF());
      p.fill(NEG_INF());
      o.fill(NEG_INF());
    }
  };
  class CYK {
  public:
    typedef ScoreMatrixS S;
    typedef ScoreMatrixP P;
    typedef ScoreArray O;
    S s;
    P p;
    O o;
    void clean() {
      s.fill(NEG_INF());
      p.fill(NEG_INF());
      o.fill(NEG_INF());
    }
  };
  class Tau {
  public:
    typedef TracebackMatrix P;
    typedef TracebackArray O;
    P p;
    O o;
    void clean() {
      p.fill(TBNode::null());
      o.fill(-1);
    }
  };
  class NusTau {
  public:
    typedef NusTracebackMatrix P;
    typedef TracebackArray O;
    P p;
    O o;
    void clean() {
      p.fill(NusTBNode::null());
      o.fill(-1);
    }
  };
  class MEAProb {
  public:
    typedef ScoreMatrixP P;
    typedef ScoreArrayR O;
    P p;
    O o;
    void clean() {
      p.fill(NEG_INF());
      o.fill(NEG_INF());
    }
  };
  class ConstrNode {
  public:
    void set(MEAState s, bool allow) {
      switch (s) {
      case P_st:  _allow_pair  = allow; break;
      case I_st: _allow_inner = allow; break;
      case O_st: _allow_outer = allow; break;
      default: Die("bad type"); break;
      }
    }
    bool get(MEAState s) const {
      switch (s) {
      case P_st:  return _allow_pair;
      case I_st: return _allow_inner;
      case O_st: return _allow_outer;
      default: Die("bad type"); return false;
      }
    }
    static const ConstrNode& null() {
      static const ConstrNode nd = {false, false, false};
      return nd;
    }
    unsigned int _allow_pair  :1;
    unsigned int _allow_inner :1;
    unsigned int _allow_outer :1;
  };
  typedef TriMatrixRing1<ConstrNode> Constr;

  static string mea_state_to_s(int type) {
    const struct {string name; MEAState val;} table[] = {
#define Item(x) {#x, x}
      Item(P_st),
      Item(I_st),
      Item(O_st)
    };
    const int n = (sizeof(table) / sizeof(table[0]));
    for (int i = 0; i < n; i++) {
      if (type == table[i].val) {
	return table[i].name;
      }
    }
    return "BAD_MEA_STATE"; 
  }
  enum Command {
    COMPUTE_EM_F,
    COMPUTE_CRF_F,
    COMPUTE_MEA_F,
    COMPUTE_EM_FDF,
    COMPUTE_CRF_FDF,
    COMPUTE_MEA_FDF,
    COMPUTE_ML_FOLD,
    COMPUTE_MEA_FOLD,
    COMPUTE_PROB
  };
  static ScoreT MIN_DIFF() {return 1.0e-13;}
  // static ScoreT MIN_DIFF() {return 0.0;}
  Rfold() 
    : _tag("rfold"), 
      _command(COMPUTE_MEA_FOLD), 
      _max_pair_dist(-1), 
      _mea_inner_loop_coeff(1.0), 
      _mea_outer_loop_coeff(1.0),
      _mea_separate_loop_type(true),
      _seq(NULL),
      _len(0), 
      _width(0),
      _period(0),
      _block_size(0),
      _is_triangular(true),
      _partition_coeff(NEG_INF()), 
      _dp_score(NEG_INF()),
      _ct_constr(NULL),
      _has_constraint(false),
      _has_constraint_nus(false),
      _constraint(false),
      _allow_non_canonical(false),
      _print_prob(false),
      _print_loop_prob(false),
      _print_prob_cutoff(0.0),
      _f_prob(0),
      _f_struct(0) {
    _freq.clean();
    _freq0.clean();
  }
  ~Rfold() {}
  Array<ScoreT> count_vector() {return _count_vector;}
  ScoreT dp_score() {return _dp_score;}
  string sscons() {return _ct_pred.sscons();}
  void set_command(const std::string& command) {
    static const struct {std::string name; Command val;} table[] = {
#define Item(x) {#x, x}
      Item(COMPUTE_EM_F),
      Item(COMPUTE_CRF_F),
      Item(COMPUTE_MEA_F),
      Item(COMPUTE_EM_FDF),
      Item(COMPUTE_CRF_FDF),
      Item(COMPUTE_MEA_FDF),
      Item(COMPUTE_ML_FOLD),
      Item(COMPUTE_MEA_FOLD),
      Item(COMPUTE_PROB)
#undef Item
    };
    static const int n = (sizeof(table) / sizeof(table[0]));
    for (int i = 0; i < n; i++) {
      if (table[i].name == command) {
	_command = table[i].val;
	return;
      }
    }
    Die("unrecognized command: %s", command.c_str());
  }
  void set_seq(SeqT& seq) {_seq = &seq; _ct_constr = NULL;}
  SeqT& seq() {return *_seq;}
  int seq(int i) {return (*_seq)[i];}
  Freq& freq() {return _freq;}
  Freq& freq0() {return _freq0;}
  void set_ct_constr(StructT& ct) {_ct_constr = &ct;};
  StructT& ct_constr() {Assert(_ct_constr); return *_ct_constr;}
  void set_has_constraint(bool flag) {
    if (!_ct_constr) {
      _has_constraint = false;
      _has_constraint_nus = false;
    } else if (_constraint) {
      _has_constraint = true;
      _has_constraint_nus = true;
    } else {
      _has_constraint = flag;
      _has_constraint_nus = flag;
    }
  }
  bool has_constraint() {return _has_constraint;}
  bool has_constraint_nus(){return _has_constraint_nus;}
  void set_constraint(bool flag) {_constraint = flag;}
  void set_constraint_mea(bool flag){_has_constraint_nus = flag;}
  void set_allow_non_canonical(bool flag) {_allow_non_canonical = flag;}
  void set_tag(const string& s) {_tag = s;}
  void set_max_pair_dist(int n) {_max_pair_dist = n;}
  void set_mea_inner_loop_coeff(ScoreT z) {_mea_inner_loop_coeff = z;}
  void set_mea_outer_loop_coeff(ScoreT z) {_mea_outer_loop_coeff = z;}
  void set_mea_separate_loop_type(bool flag) {_mea_separate_loop_type = flag;}
  void set_print_prob(bool flag) {_print_prob = flag;}
  void set_print_loop_prob(bool flag) {_print_loop_prob = flag;}
  void set_print_prob_cutoff(ScoreT z) {_print_prob_cutoff = z;}
  void set_f_prob(ostream* fo) {_f_prob = fo;}
  void set_f_struct(ostream* fo) {_f_struct = fo;}
  void set_param_vector(Array<ScoreT>& pv) {_param.set_param_vector(pv);}
  Array<ScoreT> param_vector() {return _param.param_vector();}
  void set_prob_scales() {
    //ScoreT fac = 1.0; for debug
    ScoreT fac = 1.0 / (1.0 + _mea_inner_loop_coeff + _mea_outer_loop_coeff);
    _prob_scales[P_st] = 2.0 * fac;
    _prob_scales[I_st] = _mea_inner_loop_coeff * fac;
    _prob_scales[O_st] = _mea_outer_loop_coeff * fac;
  }
  int max_block_size() {return 10000;}
  int compute_width() {
    return (_max_pair_dist >= 0 ? std::min((_max_pair_dist+2), (_len+1)) : (_len+1));
  }
  int len() {return _len;}
  int width() {return _width;}
  int period() {return _period;}
  int block_size() {return _block_size;}
  bool is_triangular() {return _is_triangular;}
  ScoreT partition_coeff() {return _partition_coeff;}
  Inside& inside() {return _inside;}
  Outside& outside() {return _outside;}
  CYK& cyk() {return _cyk;}
  Tau& tau() {return _tau;}
  NusTau& nus_tau() {return _nus_tau;}
  CurrSC& curr_sc() {return _curr_sc;}
  CurrCode& curr_code() {return _curr_code;}
  StructT& ct_pred() {return _ct_pred;}
  MEAProb& mea_prob() {return _mea_prob;}

  void run() {
    if (!_ct_constr) {
      set_has_constraint(false);
      set_constraint_mea(false);
    } else {
      if (_constraint) {
	set_has_constraint(true);
      }
    }
    set_prob_scales();
    _len = seq().size();
    _width = compute_width();
    _period = (2*_width-1);
    _block_size = min(max((3*_width-2), max_block_size()), ((_len+1)+_width-1));
    _is_triangular = (_width == (_len+1));
    seq().set_margin(_period);
    seq().set_range_size(_block_size);
    _param.set_seq(seq());
    _param.set_freq(freq());
#ifdef COMPUTE_E_VALUE
    _param.set_e_value_func(_width, 
			    _mea_outer_loop_coeff, _mea_inner_loop_coeff, _len);
#endif

    if (prints_prob() || _command == COMPUTE_PROB) {
      if (_f_prob) {
	(*_f_prob) << "\n#left_pos\tright_pos\tprob\ttype\n";
      }
    }
    if (_command == COMPUTE_ML_FOLD || _command == COMPUTE_MEA_FOLD) {
      if (_f_struct) {
#ifdef COMPUTE_E_VALUE
	(*_f_struct) << "\n#start\tend\tscore\te-value\tstructure\n";
#else
	(*_f_struct) << "\n#start\tend\tscore\tstructure\n";
#endif
      }
    }
    switch (_command) {
    case COMPUTE_EM_F:     compute_em_f();     break;
    case COMPUTE_CRF_F:    compute_crf_f();    break;
    case COMPUTE_MEA_F:    compute_mea_f();    break;
    case COMPUTE_EM_FDF:   compute_em_fdf();   break;
    case COMPUTE_CRF_FDF:  compute_crf_fdf();  break;
    case COMPUTE_MEA_FDF:  compute_mea_fdf();  break;
    case COMPUTE_ML_FOLD:  compute_ml_fold();  break;
    case COMPUTE_MEA_FOLD: compute_mea_fold(); break;
    case COMPUTE_PROB:     compute_prob();     break;
    default: Die("bad command %d", _command);  break;
    }
  }
  void compute_mea_fold() {
    compute_forward();
    _partition_coeff = compute_partition_coeff();
    _dp_score = _partition_coeff;
    Check(possible(_dp_score), "could not parse sequence");
    compute_backward_mea();
    _dp_score = compute_viterbi_score();
    Check(possible(_dp_score), "could not parse sequence");
    compute_traceback_mea();
  }
  void compute_mea_f() {
    Check(has_constraint());
    set_has_constraint(false);
    compute_forward();
    _partition_coeff = compute_partition_coeff();
    _dp_score = _partition_coeff;
    Check(possible(_dp_score), "could not parse sequence");
    compute_backward_mea();
    Die("backward should have no constraints while nus viterbi should be constrained");
  }
  void compute_mea_fdf() {
    Die("not implemented");
  }
  void compute_ml_fold() {
    compute_backward_ml();
    _dp_score = compute_viterbi_score();
    Check(possible(_dp_score), "could not parse sequence");
    compute_traceback_ml();
  }
  void compute_em_f() {
    compute_forward();
    _dp_score = _partition_coeff = compute_partition_coeff();
    Check(possible(_dp_score), "could not parse sequence");
  }
  void compute_em_fdf() {
    compute_forward_backward_feature();
    _dp_score = _partition_coeff;
    Check(possible(_dp_score), "could not parse sequence");
    _count_vector = _counter.count_vector();
  }
  void compute_crf_f() {
    Check(has_constraint());
    compute_forward();
    ScoreT dp_score0 = compute_partition_coeff();
    Check(possible(dp_score0), "could not parse sequence");
    set_has_constraint(false);
    compute_forward();
    _partition_coeff = compute_partition_coeff();
    ScoreT dp_score1 = _partition_coeff;
    Check(possible(dp_score1), "could not parse sequence");
    _dp_score = (dp_score0 - dp_score1);
    set_has_constraint(true);
  }
  void compute_crf_fdf() {
    Check(has_constraint());
    compute_forward_backward_feature();
    ScoreT dp_score0 = _partition_coeff;
    Check(possible(dp_score0), "could not parse sequence");
    Array<ScoreT> cv0 = _counter.count_vector();
    set_has_constraint(false);
    compute_forward_backward_feature();
    ScoreT dp_score1 = _partition_coeff;
    Check(possible(dp_score1), "could not parse sequence");
    Array<ScoreT> cv1 = _counter.count_vector();
    _dp_score = (dp_score0 - dp_score1);
    _count_vector = cv0;
    for (int i = 0; i < (int)cv1.size(); i++) {
      _count_vector[i] -= cv1[i];
    }
    set_has_constraint(true);
  }
  void compute_prob() {
    set_print_prob(true);
    compute_forward();
    _partition_coeff = compute_partition_coeff();
    _dp_score = _partition_coeff;
    Check(possible(_dp_score), "could not parse sequence");
    compute_backward_prob();
  }
  Array<ScoreT> compute_dpv(Array<int> idxs = Array<int>(), ScoreT eps = 0.0001) {
    Array<ScoreT> pv = param_vector();
    if (idxs.empty()) {
      idxs.resize(pv.size());
      for (int i = 0; i < (int)idxs.size(); i++) {
	idxs[i] = i;
      }
    }
    Array<ScoreT> pv1(idxs.size());
    pv1.fill(0);
    Array<ScoreT> pv2;
    for (int i = 0; i < (int)idxs.size(); i++) {
      int idx = idxs[i];
      pv2 = pv;
      pv2[idx] = pv[idx] + eps;
      set_param_vector(pv2);
      run();
      ScoreT sc1 = dp_score();
      pv2[idx] = pv[idx] - eps;
      set_param_vector(pv2);
      run();
      ScoreT sc2 = dp_score();
      pv1[i] = (sc1 - sc2) / (2.0 * eps);
    }
    set_param_vector(pv);
    return pv1;
  }
  void compute_forward() {
    if (has_constraint() || has_constraint_nus()) {
      _constr.set_size(1, _width, _period, false);
      _constr.fill(ConstrNode::null());
      Check((int)ct_constr().size() == (int)seq().size());
    }
    _inside.p.set_size(NLAYER_INSIDE, _width, _width, true);
    _inside.s.set_size(NSTATE, _width);
    _inside.s1.set_size(NSTATE, _width);
    _inside.o.reset(_tag+"_inside_o.txt", _block_size, _width, NEG_INF());
    _inside.clean();
    ComputeForwardFuncs funcs(*this);
    iterate_forward(funcs);
  }
  ScoreT compute_partition_coeff() {
    return _inside.o[_len];
  }
  void compute_backward_mea() {
    _inside.p.flip_order_backward(_len);
    if (!_is_triangular) {
      if (has_constraint()|| has_constraint_nus()) {
	_constr.flip_order_backward(_len);
      }
      int i = (_len-_width+2);
      if (i <= _len) {
	for (int s = 0; s < NSTATE; s++) {
	  int t = inside_layer(s);
	  for (int j = i; j <= _len; j++) {
	    ScoreT sc = (t >= 0 ? _inside.p.get(t, i, j) : NEG_INF());
	    _inside.s.set(s, (j-i), sc);
	  }
	}
      } else {
	_inside.s.fill(NEG_INF());
      }
    }
    _outside.p.set_size(NLAYER_OUTSIDE, _width, _width, true);
    _outside.s.set_size(NSTATE, _width);
    _outside.o.reset(_tag+"_outside_o.txt", _block_size, _width, NEG_INF());
    _outside.clean();
#ifdef USE_MEA_NUSSINOV
    _mea_prob.p.set_size(1, _width, _width, true); //P_st should be 0
    _mea_prob.o.set_size(N_MEA_STATE, _period);
    _mea_prob.clean();
    _cyk.p.set_size(NUS_NLAYER_CYK, _width, _width, false);
    _cyk.s.set_size(NUS_NSTATE, _width);
    _cyk.o.reset(_tag+"_cyk_o.txt", _block_size, _width, NEG_INF());
    _cyk.clean();
    _nus_tau.p.set_size(1, _width, _width, true);
    _nus_tau.o.reset(_tag+"_tau_o.txt", _block_size, _width, (-1));
    _nus_tau.clean();
#else
    _mea_prob.p.set_size(N_MEA_STATE, _width, _width, true);
    _mea_prob.o.set_size(N_MEA_STATE, _period);
    _mea_prob.clean();
    _cyk.p.set_size(NLAYER_CYK, _width, _width, false);
    _cyk.s.set_size(NSTATE, _width);
    _cyk.o.reset(_tag+"_cyk_o.txt", _block_size, _width, NEG_INF());
    _cyk.clean();
    _tau.p.set_size(1, _width, _width, true);
    _tau.o.reset(_tag+"_tau_o.txt", _block_size, _width, (-1));
    _tau.clean();
#endif
    ComputeBackwardMEAFuncs funcs(*this);
    iterate_backward(funcs);
  }
  void compute_backward_prob() {
    _inside.p.flip_order_backward(_len);
    if (!_is_triangular) {
      if (has_constraint() || has_constraint_nus()) {
	_constr.flip_order_backward(_len);
      }
      int i = (_len-_width+2);
      if (i <= _len) {
	for (int s = 0; s < NSTATE; s++) {
	  int t = inside_layer(s);
	  for (int j = i; j <= _len; j++) {
	    ScoreT sc = (t >= 0 ? _inside.p.get(t, i, j) : NEG_INF());
	    _inside.s.set(s, (j-i), sc);
	  }
	}
      } else {
	_inside.s.fill(NEG_INF());
      }
    }
    _outside.p.set_size(NLAYER_OUTSIDE, _width, _width, true);
    _outside.s.set_size(NSTATE, _width);
    _outside.o.reset(_tag+"_outside_o.txt", _block_size, _width, NEG_INF());
    _outside.clean();
#ifdef USE_MEA_NUSSINOV
    _mea_prob.p.set_size(1, _width, _width, true); //P_st should be 0
    _mea_prob.o.set_size(N_MEA_STATE, _period);
    _mea_prob.clean();
#else
    _mea_prob.p.set_size(N_MEA_STATE, _width, _width, true);
    _mea_prob.o.set_size(N_MEA_STATE, _period);
    _mea_prob.clean();
#endif
    ComputeBackwardProbFuncs funcs(*this);
    iterate_backward(funcs);
  }
  void compute_backward_feature() {
    _inside.p.flip_order_backward(_len);
    if (!_is_triangular) {
      if (has_constraint() || has_constraint_nus()) {
	_constr.flip_order_backward(_len);
      }
      int i = (_len-_width+2);
      if (i <= _len) {
	for (int s = 0; s < NSTATE; s++) {
	  int t = inside_layer(s);
	  for (int j = i; j <= _len; j++) {
	    ScoreT sc = (t >= 0 ? _inside.p.get(t, i, j) : NEG_INF());
	    _inside.s.set(s, (j-i), sc);
	  }
	}
      } else {
	_inside.s.fill(NEG_INF());
      }
    }
    _outside.p.set_size(NLAYER_OUTSIDE, _width, _width, true);
    _outside.s.set_size(NSTATE, _width);
    _outside.o.reset(_tag+"_outside_o.txt", _block_size, _width, NEG_INF());
    _outside.clean();
    _counter.set_seq(seq());
    _counter.set_freq(freq());
    _counter.clean();
    ComputeBackwardFeatureFuncs funcs(*this);
    iterate_backward(funcs);
  }
  void compute_backward_ml() {
    if (has_constraint() || has_constraint_nus()) {
      _constr.set_size(1, _width, _period, true);
      _constr.fill(ConstrNode::null());
      Check((int)ct_constr().size() == (int)seq().size());
    }
    _cyk.p.set_size(NLAYER_CYK, _width, _width, false);
    _cyk.s.set_size(NSTATE, _width);
    _cyk.o.reset(_tag+"_cyk_o.txt", _block_size, _width, NEG_INF());
    _cyk.clean();
    _tau.p.set_size(1, _width, _width, true);
    _tau.o.reset(_tag+"_tau_o.txt", _block_size, _width, (-1));
    _tau.clean();
    ComputeBackwardMLFuncs funcs(*this);
    iterate_backward(funcs);
  }
  ScoreT compute_viterbi_score() {
    return _cyk.o[0];
  }
  void compute_forward_backward_feature() {
    compute_forward();
    _partition_coeff = compute_partition_coeff();
    compute_backward_feature();
  }
  void compute_traceback_mea() {
    if (!_is_triangular) {
      if (has_constraint() || has_constraint_nus()) {
	_constr.flip_order_forward(0);
      }
      _inside.p.flip_order_forward(0);
      int j = (0+_width-2);
      if (j < 0) {
	_inside.s.fill(NEG_INF());
      } else {
	for (int s = 0; s < NSTATE; s++) {
	  int t = inside_layer(s);
	  for (int i = j; i >= 0; i--) {
	    ScoreT sc = (t >= 0 ? _inside.p.get(t, i, j) : NEG_INF());
	    _inside.s.set(s, (j-i), sc);
	  }
	}
      }
      _outside.p.set_size(NLAYER_OUTSIDE, _width, _width, false);
      _outside.p.fill(NEG_INF());
      _outside.s.fill(NEG_INF());
#ifdef USE_MEA_NUSSINOV
      _mea_prob.p.set_size(1, _width, _width, false); //P_st should be 0
      _mea_prob.clean();
      _cyk.p.set_size(NUS_NLAYER_CYK, _width, _width, true);
      _cyk.p.fill(NEG_INF());
      _cyk.s.fill(NEG_INF());
      _nus_tau.p.flip_order_forward(0);
#else
      _mea_prob.p.set_size(N_MEA_STATE, _width, _width, false);
      _mea_prob.clean();
      _cyk.p.set_size(NLAYER_CYK, _width, _width, true);
      _cyk.p.fill(NEG_INF());
      _cyk.s.fill(NEG_INF());
      _tau.p.flip_order_forward(0);
#endif
    }
    _ct_pred.reset(_tag+"_ct_pred.txt", _block_size, _period);
    _ct_pred.resize(_len);
    _ct_pred.set_default();
    ComputeTracebackMEAFuncs funcs(*this);
    iterate_traceback(funcs);
  }
  void compute_traceback_ml() {
    if (!_is_triangular) {
      if (has_constraint() || has_constraint_nus()) {
	_constr.set_size(1, _width, _period, false);
	_constr.fill(ConstrNode::null());
      }
      _cyk.p.set_size(NLAYER_CYK, _width, _width, true);
      _cyk.p.fill(NEG_INF());
      _cyk.s.fill(NEG_INF());
      _tau.p.flip_order_forward(0);
    }
    _ct_pred.reset(_tag+"_ct_pred.txt", _block_size, _period);
    _ct_pred.resize(_len);
    _ct_pred.set_default();
    ComputeTracebackMLFuncs funcs(*this);
    iterate_traceback(funcs);
  }
  template <typename Funcs>
  void iterate_forward(Funcs& f) {
    int block_offset = 0;
    int last = _len;
    while (block_offset <= last) {// no period
      f.initialize_block(block_offset);
      int block_last = min((block_offset+_block_size-1), last);// no period
      for (int j = block_offset; j <= block_last; j++) {
	f.compute_period(j);
      }
      f.finalize_block(block_offset);
      block_offset = (block_last+1);
    }
  }
  template <typename Funcs>
  void iterate_backward(Funcs& f) {
    int block_offset = _len;
    int first = (0-_width+1);
    while (first <= block_offset) {
      f.initialize_block(block_offset);
      int block_first = max((block_offset-_block_size+1), first);
      for (int i = block_offset; i >= block_first; i--) {
	f.compute_period(i);
      }
      f.finalize_block(block_offset);
      block_offset = (block_first-1);
    }
  }
  template <typename Funcs>
  void iterate_traceback(Funcs& f) {
    int last = (_len+_width-1);
    int offset = (0+_width-1);
    int b = 0;
    int e = (-1);
    while (true) {
      std::pair<int, int> range = f.seek_for_outer_bf(b);
      b = range.first;
      e = range.second;
      if (e < 0) return; // no more traceback point

      if ((0+_width-1) < e) {
	int b0 = max(0, (b-_width+1));
	int e0 = min((e+_width-1), last);  
	Assert(e0 <= min((b0+_block_size-1), last));
	f.initialize_block(b0);
	for (int j = max(offset, b0); j <= e0; j++) {
	  f.compute_period(b0, e0, j);
	}
	offset = max(offset, (e0+1));
	f.finalize_block(b0);
      }
      ScoreT score = compute_inner_score(b, e);
      string sscons = f.traceback_range(b, e);
      print_structure(b, e, score, sscons);
      b = e;
      e = (-1);
    }
  }
  ScoreT compute_inner_score(int b, int e) {
    _cyk.o.load_range_if_needed(b, e);
    return (_cyk.o[b] - _cyk.o[e]);
  }
#ifdef USE_TMPL_ON_TRANSITION
#define on_transition_(t, n, i, j, k, l) template on_transition<t>(n, i, j, k, l)
#else
#define on_transition_(t, n, i, j, k, l) on_transition(t, n, i, j, k, l)
#endif
  template<typename Funcs>
  void forward_transitions(Funcs& f, int i, int j) {
    Assert(i == 0);
    f.before_transition(i, j);
    //Outer->
    if (i == j) {
      //->OuterEnd
      f.on_transition_(TR_O_X, 0, i, j, i, j);
    } else {//if i < j
      //->Outer
      if (allow_outer_loop(j-1, j)) {
	f.on_transition_(TR_O_O, 0, i, j, i, j-1);
      }
      //->InnerBeg
      if (j <= (i+_width-1)) {
        f.on_transition_(TR_O_IB, 0, i, j, i, j);
      }
      //OuterBF -> (Outer + InnerBeg)
      f.on_transition_(TR_O_BF, 0, i, j, max((i+1), (j-_width+1)), (j-1));
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void mea_prob_forward_transitions(Funcs& f, int i, int j) {// only emission trainsitions
    Assert(i == 0);
    f.before_transition(i, j);
    //Outer->
    if (i == j) {
      // no-op
    } else {//if i < j
      //->Outer
      if (allow_outer_loop(j-1, j)) {
	f.on_transition_(TR_O_O, 0, i, j, i, j-1);
      }
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void backward_transitions(Funcs& f, int i, int j) {
    Assert(i == 0);
    f.before_transition(i, j);
    //->Outer
    if (j == _len) {
      //OuterBeg->
      f.on_transition_(TR_X_O, 0, i, j, i, j);
    } else {//if j < _len
      if (allow_outer_loop(j, j+1)) {
	f.on_transition_(TR_O_O, 0, i, j, i, j+1);
      }
      //OuterBFL->
      f.on_transition_(TR_O_BFL, 0, i, j, (j+1), min((j+_width-1), _len));
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void viterbi_transitions(Funcs& f, int i, int j) {
    Assert(j == _len);
    f.before_transition(i, j);
    //Outer->
    if (i == j) {
      //->OuterEnd
      f.on_transition_(TR_O_X, 0, i, j, i, j);
    } else { //if i < j
      //be careful right to left transition 
      //->Outer
      if (allow_outer_loop(i, i+1)) {
	f.on_transition_(TR_O_O, 0, i, j, i+1, j);
      }
      //->InnerBeg
      if ((j - _width) < i) {
        f.on_transition_(TR_O_IB, 0, i, j, i, j);
      }
      //be careful right to left transition
      //OuterBF -> (InnerBeg + Outer) 
      f.on_transition_(TR_O_BF, 0, i, j, (i+1), min((j-1), (i+_width-1)));
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void inside_transitions(Funcs& f, int i, int j) {
    f.before_transition(i, j);
    //Stem->
    int k = (i+1);
    int l = (j-1);
    if (k <= l) {
      if (allow_pair(i, j)) {
	for (int n = 0; n <= D; n++) {
	  //->Stem
	  if (k+2 <= l) {
	    if (allow_pair(k, l)) {
	      f.on_transition_(TR_S_S, n, i, j, k, l);
	    }
	  }
	  if (MIN_NSTACK <= n) {
	    //->StemEnd
	    f.on_transition_(TR_S_E, n, i, j, k, l);
	  }
	}
      }
    }
    //Multi->
    if (0 < i && j < _len) {
      // MultiBF->(Multi1 + Multi2)
      f.on_transition_(TR_M_BF, 0, i, j, (i+1), (j-1));
      //Multi2->
      //->Stem
      if ((i+2 <= j) && allow_pair(i, j)) {
	f.on_transition_(TR_M2_S, 0, i, j, i, j);
      }
      //->Multi2
      if (i <= (j-1)) {
	if (allow_inner_loop(j-1, j)) {
	  f.on_transition_(TR_M2_M2, 0, i, j, i, j-1);
	}
      }
      //Multi1->
      //->Multi2
      f.on_transition_(TR_M1_M2, 0, i, j, i, j);
      //->MultiBF
      f.on_transition_(TR_M1_MBF, 0, i, j, i, j);

      //Multi->
      //->Multi
      if ((i+1) <= j) {
	if (allow_inner_loop(i, i+1)) {
	  f.on_transition_(TR_M_M, 0, i, j, i+1, j);
	}
      }
      //->MultiBF
      f.on_transition_(TR_M_MBF, 0, i, j, i, j);
    }
    //StemEnd->
    if (0 < i && j < _len) {
      if (allow_pair(i-1, j+1)) {
	//->Hairpin
	if (allow_inner_loop(i, j)) {
	  f.on_transition_(TR_E_H, 0, i, j, i, j);
	}
	//->Interior Loop (includes Bulge)->Stem
	for (int li = 0; li <= min((j-i), (int)C); li++) {
	  int ip = (i+li);
	  if (allow_inner_loop(i, ip)) {
	    for (int lj = max(0, 1-li); lj <= min((j-(ip+2)), (C-li)); lj++) { 
	      // (li+lj) >= 1
	      int jp = (j-lj);
	      if (allow_inner_loop(jp, j)) {
		if (allow_pair(ip, jp)) {
		  f.on_transition_(TR_E_I, 0, i, j, ip, jp);
		}
	      }
	    }
	  }
	}
	//->Multi
	f.on_transition_(TR_E_M, 0, i, j, i, j);
      }
    }
    //InnerBeg->
    //->Stem
    if ((k <= l) && allow_pair(i, j)) {
      f.on_transition_(TR_IB_S, 0, i, j, i, j);
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void viterbi_nussinov_transitions(Funcs& f, int i, int j) {
    Assert(j == _len);
    f.before_transition(i, j);
    if (i == j) {
      f.on_transition_(NUS_TR_O_X, 0, i, j, i, j);
    } else { //if i < j
      //be careful right to left transition 
      if (allow_outer_loop_nus(i, i+1)) {
	f.on_transition_(NUS_TR_O_O, 0, i, j, i+1, j);
      }
      if ((j-_width) < i) {
	if (allow_pair_nus(i, j)) {
	  f.on_transition_(NUS_TR_O_P, 0, i, j, i, j);
	}
      }
      //be careful right to left transition
      f.on_transition_(NUS_TR_O_B, 0, i, j, (i+1), min((j-1), (i+_width-1)));
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void cyk_nussinov_transitions(Funcs& f, int i, int j) {
    f.before_transition(i, j);
    if (i == j) {
      f.on_transition_(NUS_TR_X, 0, i, j, i, j);
    } else {// i < j
      int k = (i+1);
      int l = (j-1);
      if (k <= l) {
	if (allow_pair_nus(i, j)) {
	  f.on_transition_(NUS_TR_P, 0, i, j, k, l);
	  f.on_transition_(NUS_TR_I, 0, i, j, i, j);
	}
      }
      if (allow_inner_loop_nus(i, i+1)) {
	f.on_transition_(NUS_TR_L, 0, i, j, i+1, j);
      }
      if (allow_inner_loop_nus(j-1, j)) {
	f.on_transition_(NUS_TR_R, 0, i, j, i, j-1);
      }
      f.on_transition_(NUS_TR_B, 0, i, j, (i+1), (j-1));
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void mea_prob_inside_transitions(Funcs& f, int i, int j) {// only S_S and S_E transitions
    f.before_transition(i, j);
    int k = (i+1);
    int l = (j-1);
    if (k <= l) {
      if (allow_pair(i, j)) {
	//Stem->
	for (int n = 0; n <= D; n++) {
	  //->Stem
	  if ((k+2 <= l) && allow_pair(k, l)) {
	    f.on_transition_(TR_S_S, n, i, j, k, l);
	  }
	  if (MIN_NSTACK <= n) {
	    //->StemEnd
	    f.on_transition_(TR_S_E, n, i, j, k, l);
	  }
	}
      }
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void outside_transitions(Funcs& f, int i, int j) {
    f.before_transition(i, j);
    //->InnerBeg
    if (i == 0) {
      //Outer->
      f.on_transition_(TR_O_IB, 0, i, j, i, j);
    } else if (i < j) {
      //Outer->(Outer + .)
      f.on_transition_(TR_O_BFR, 0, i, j, 0, 0);
    }
    //->StemEnd
    int k = (i-1);
    int l = (j+1);
    if (0 <= k && l <= _len && (l-k) < _width) {
      if (allow_pair(k, l)) {
	//Stem->
	for (int n = MIN_NSTACK; n <= D; n++) {
	  f.on_transition_(TR_S_E, n, i, j, k, l);
	}
      }
    }
    //->Multi
    //StemEnd->
    if (0 < i && j < _len) {
      if (allow_pair(i-1, j+1)) {
	f.on_transition_(TR_E_M, 0, i, j, i, j);
      }
    }
    //Multi->
    if (max(0, (j-_width+1)) < i) {
      if (allow_inner_loop(i-1, i)) {
	f.on_transition_(TR_M_M, 0, i, j, i-1, j);
      }
    }
    //->Multi1
    //MultiBF->(. + Multi2)
    f.on_transition_(TR_M_BFL, 0, i, j, (j+1), (min(_len, (i+_width-1)) - 1));

    //->Multi2
    //Multi2->
    if (j < min(_len, (i+_width-1))) {
      if (allow_inner_loop(j, j+1)) {
	f.on_transition_(TR_M2_M2, 0, i, j, i, j+1);
      }
    }
    //Multi1->
    f.on_transition_(TR_M1_M2, 0, i, j, i, j);
    //MultiBF->(Multi1 + .)
    f.on_transition_(TR_M_BFR, 0, i, j, max(0, (j-_width+1)) + 1, (i-1));
    //->MultiBF
    //Multi1->
    f.on_transition_(TR_M1_MBF,0,  i, j, i, j);
    //Multi->
    f.on_transition_(TR_M_MBF, 0, i, j, i, j);
    //->Stem
    if ((i+2 <= j) && allow_pair(i, j)) {
      for (int n = 0; n <= D; n++) {
	if (n == 0) {
	  //->InnerBeg
	  f.on_transition_(TR_IB_S, 0, i, j, i, j);
	  //Stem->Interior->
	  for (int li = 0; li <= min((i-(max(0, (j-_width+1)+1))), (int)C); li++) {
	    int ip = (i-li);
	    if (allow_inner_loop(ip, i)) {
	      for (int lj = max(0, (1-li));lj <= min((min(_len,((ip-1)+_width-1))-1-j),(C-li)); lj++) {
		// (li+lj) > 0
		int jp = (j+lj);
		if (allow_inner_loop(j, jp)) {
		  if (allow_pair(ip-1, jp+1)) {
		    f.on_transition_(TR_E_I, 0, i, j, ip, jp);
		  }
		}
	      }
	    }
	  }
	  //Multi2->
	  f.on_transition_(TR_M2_S, 0, i, j, i, j);
	}
	int k = (i-1);
	int l = (j+1);
	if (0 <= k && l <= _len && (l-k) < _width) {
	  //->STEM
	  if (allow_pair(k, l)) {
	    f.on_transition_(TR_S_S, n, i, j, k, l);
	  }
	}
      }
    }
    f.after_transition(i, j);
  }
  template<typename Funcs>
  void cyk_transitions(Funcs& f, int i, int j) {
    inside_transitions(f, i, j);
  }
#undef on_transition_

  bool prints_prob() {return _print_prob;}
public:
  vector<int> left;
  vector<int> right;
  vector<double> score;
  void print_prob(int type, int i, int j, ScoreT w) {
    if (!_f_prob) return;
    ostream& os = (*_f_prob);
    if (w > _print_prob_cutoff) {
      switch (type) {
      case P_st: // 1 based seq coord
	//os << (i+1) << '\t' << j << '\t' << w << "\tP\n";
	//	cout << (i+1) << '\t' << j << '\t' << w << "\tP\n";
	left.push_back(i+1);
	right.push_back(j);
	score.push_back(w);
	break;
      case I_st:
	if (_print_loop_prob) {
	  if (_mea_separate_loop_type) {
	    //os << i << '\t' << j << '\t' << w << "\tI\n";
	    //	    cout << i << '\t' << j << '\t' << w << "\tI\n";
	    left.push_back(i);
	    right.push_back(j);
	    score.push_back(w);
	  }
	}
	break;
      case O_st:
	if (_print_loop_prob) {
	  if (_mea_separate_loop_type) {
	    //os << i << '\t' << j << '\t' << w << "\tO\n";
	    //	    cout << i << '\t' << j << '\t' << w << "\tO\n";
	    left.push_back(i);
	    right.push_back(j);
	    score.push_back(w);
	  } else {
	    //os << i << '\t' << j << '\t' << w << "\tL\n";
	    //	    cout << i << '\t' << j << '\t' << w << "\tL\n";
	    left.push_back(i);
	    right.push_back(j);
	    score.push_back(w);
	  }
	}
	break;
      default:
	Die("bad type %d", type);
      }	
    }
  }
  void print_structure(int b, int e, ScoreT score, const string& sscons) {
    if (!_f_struct) return;

    (*_f_struct) << (b+1) << '\t' << e << '\t' 
		 << score << '\t' 
#ifdef COMPUTE_E_VALUE
		 << compute_e_value(score) << '\t'
#endif
		 << sscons << "\n";
  }
  ScoreT compute_e_value(ScoreT score) {
    return _param.e_value(score);
  }
  ScoreT compute_bit_score(ScoreT score) {
    return _param.bit_score(score);
  }

#ifdef USE_TMPL_ON_TRANSITION
#define void_on_transition_(t, n, i, j, k, l) template <t> void on_transition(n, i, j, k, l)
#define tsc_(t, n, i, j, k, l) tsc<t>(n, i, j, k, l)
#else
#define void_on_transition_(t, n, i, j, k, l) void on_transition(t, n, i, j, k, l) 
#define tsc_(t, n, i, j, k, l) tsc(t, n, i, j, k, l)
#endif

  template <bool fwd>
  class FuncsInside : private Model {
  private:
    Rfold& _rfold;
    Inside& _inside;
    CurrSC& _curr_sc;
  public:
    FuncsInside(Rfold& rfold) 
      :_rfold(rfold),
       _inside(_rfold.inside()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      if (i == j) {
	for (int k = 0; k < (int)_curr_sc.size(); k++) {
	  _curr_sc[k].fill(NEG_INF());
	}
      } else {// i < j
	_curr_sc[DST_C].fill(NEG_INF());
      }
      enum{dst = (fwd ? DST_W : DST_E)};
      for (int s = 0; s < NSTATE; s++) {
	_curr_sc[dst][s] = _inside.s.get(s, (j-i));
      }
      _curr_sc[DST_C][HAIRPIN_END] = 0.0;
    }
    void after_transition(int i, int j) {
      for (int s = 0; s < NSTATE; s++) {
	ScoreT sc = _curr_sc[DST_C][s];
	if (impossible(sc)) {
	  sc = NEG_INF();
	}
	_inside.s.set(s, (j-i), sc);
	int k = inside_layer(s);
	if (k >= 0) {
	  _inside.p.set(k, i, j, sc);
	}
      }
      if (fwd) {
	_rfold.curr_sc_move_nw(_curr_sc);
      } else {
	_rfold.curr_sc_move_ne(_curr_sc);
      }
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      ScoreT sc = NEG_INF();
      if (t.bf) {
	// for (int m = k; m <= l; m++) {
        //   ScoreT sc1 = (_inside.get(t.to, i, m) + _inside.get(t.to1, m, j, false));
	//   LOGADD(sc, sc1);
        // }
	int m = (l - k + 1);
	if (m > 0) {
	  if (fwd) {
	    Inside::P::const_iterator it1 = _inside.p.iter(inside_layer(t.to), i, k);
	    Inside::S::const_iterator it2 = _inside.s.iter(t.to1, (j-k));
	    while (m-- > 0) {
	      const ScoreT& sc1 = ((*it1++) + (*it2--));
	      LOGADD(sc, sc1);
	    }
	  } else {
	    Inside::S::const_iterator it1 = _inside.s.iter(t.to, (k-i));
	    Inside::P::const_iterator it2 = _inside.p.iter(inside_layer(t.to1), k, j);
	    while (m-- > 0) {
	      const ScoreT& sc1 = ((*it1++) + (*it2++));
	      LOGADD(sc, sc1);
	    }
	  }
	}
      } else {
	if (t.dst_in >= 0) {// same for both fwd and bwd
	  sc = _curr_sc[t.dst_in][t.to];
	} else {
	  sc = _inside.p.get(inside_layer(t.to), k, l);
	}
	sc += _rfold.tsc_(type, n, i, j, k, l);
      }
      LOGADD(_curr_sc[DST_C][t.from], sc);  
    }
  };
  class FuncsForward : private Model {// forward direction only
  private:
    Rfold& _rfold;
    int _width;
    Inside& _inside;
    CurrSC& _curr_sc;
  public:
    FuncsForward(Rfold& rfold) 
      :_rfold(rfold),
       _width(_rfold.width()),
       _inside(_rfold.inside()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      if ((j-i) < _width) {
        _curr_sc[DST_C][INNER_BEG] = _inside.s.get(INNER_BEG, (j-i));
      } else {
	_curr_sc[DST_C][INNER_BEG] = NEG_INF();
      }
      _curr_sc[DST_C][OUTER]     = NEG_INF();
      _curr_sc[DST_C][OUTER_END] = 0.0;
    }
    void after_transition(int i, int j) {
      ScoreT sc = _curr_sc[DST_C][OUTER];
      if (impossible(sc)) {
	sc = NEG_INF();
      }
      _inside.o[j] = sc;
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      ScoreT sc = NEG_INF();
      if (t.bf) {
	// for (int m = k; m <= l; m++) {
        //   ScoreT sc1 = (_inside.o[m] + _inside.get(t.to1, m, j, false));
        //   LOGADD(sc, sc1);
        // }
	int m = (l - k + 1);
	if (m > 0) {
	  Inside::O::const_iterator it1 = &_inside.o[k];
	  Inside::S::const_iterator it2 = _inside.s.iter(t.to1, (j-k));
	  while (m-- > 0) {
	    const ScoreT& sc1 = ((*it1++) + (*it2--));
	    LOGADD(sc, sc1);
	  }
	}
      } else {
	if (t.dst_in >= 0) {
	  sc = _curr_sc[t.dst_in][t.to];
	} else {
	  sc = _inside.o[l];
	}
	sc += _rfold.tsc_(type, n, i, j, k, l);
      }
      LOGADD(_curr_sc[DST_C][OUTER], sc);
    }
  };
  template <bool fwd>
  class FuncsOutside : private Model {
  private:
    Rfold& _rfold;
    int _width;
    Inside& _inside;
    Outside& _outside;
    CurrSC& _curr_sc;
  public:
    FuncsOutside(Rfold& rfold) 
      :_rfold(rfold),
       _width(_rfold.width()),
       _inside(_rfold.inside()),
       _outside(_rfold.outside()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      if ((j-i+1) == _width) {
	_curr_sc[DST_N].fill(NEG_INF());
	_curr_sc[DST_NW].fill(NEG_INF());
	_curr_sc[DST_NE].fill(NEG_INF());
      } else if (fwd && (j == _rfold.len())) {
	_curr_sc[DST_N].fill(NEG_INF());
	_curr_sc[DST_NE].fill(NEG_INF());
	for (int s = 0; s < NSTATE; s++) {
	  _curr_sc[DST_NW][s] = _outside.s.get(s, (j-(i-1)));
	}
      } else if ((!fwd) && (i == 0)) {
	_curr_sc[DST_N].fill(NEG_INF());
	_curr_sc[DST_NW].fill(NEG_INF());
	for (int s = 0; s < NSTATE; s++) {
	  _curr_sc[DST_NE][s] = _outside.s.get(s, ((j+1)-i));
	}
      }
      _curr_sc[DST_C].fill(NEG_INF());
      if (i == 0) {
        _curr_sc[DST_C][OUTER] = _outside.o[j];
      }
      enum {dst = (fwd ? DST_W : DST_E)};
      for (int s = 0; s < NSTATE; s++) {
	_curr_sc[dst][s] = _outside.s.get(s, (j-i));
      }	  
    }
    void after_transition(int i, int j) {
      for (int s = 0; s < NSTATE; s++) {
	ScoreT sc = _curr_sc[DST_C][s];
	if (impossible(sc)) {
	  sc = NEG_INF();
	}
	_outside.s.set(s, (j-i), sc);
	int k = outside_layer(s);
	if (k >= 0) {
	  _outside.p.set(k, i, j, sc);
	}
      }
      if (fwd) {
	_rfold.curr_sc_move_sw(_curr_sc);
      } else {
	_rfold.curr_sc_move_se(_curr_sc);
      }
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) { 
      const Transition& t = TRANSITIONS[(type+n)];
      int s = (-1);
      ScoreT sc = NEG_INF();
      if (t.from == OUTER) {
        if (t.bf) {
          Assert((type+n) == TR_O_BFR && t.to == OUTER && t.to1 == INNER_BEG);
          Assert(k == 0 && l == 0);
          s = t.to1;
          sc = (_inside.o[i] + _outside.o[j]);
        } else {
          Assert((type+n) == TR_O_IB && t.to == INNER_BEG);
          s = t.to;
	  if (t.dst_out >= 0) {
	    sc = _curr_sc[DST_C][t.from];
	  } else {
	    sc = _outside.o[l];
	  }
          sc += _rfold.tsc_(type, n, k, l, i, j);
        }
      } else {
	int m = 0;
        switch (t.bf) {
        case BF_LEFT:
          s = t.to;
          // for (int m = k; m <= l; m++) {
          //   ScoreT sc1 = _rfold.tsc_((type+n), i, m, i, j);
          //   sc1 += (_outside.get(t.from, i, m) + _inside.get(t.to1, j, m));
          //   LOGADD(sc, sc1);
          // }
	  m = (l - k + 1);
	  if (m > 0) {
	    if (fwd) {
	      Outside::S::const_iterator it1 = _outside.s.iter(t.from, (k-i));
	      Inside::P::const_iterator  it2 = _inside.p.iter(inside_layer(t.to1), j, k);
	      while (m-- > 0) {
		const ScoreT& sc1 = ((*it1++) + (*it2++));
		LOGADD(sc, sc1);
	      }
	    } else {
	      // inside.s1 should be maintained
	      Outside::P::const_iterator it1 = _outside.p.iter(outside_layer(t.from), i, k);
	      Inside::S::const_iterator  it2 = _inside.s1.iter(t.to1, (k-j));
	      while (m-- > 0) {
		const ScoreT& sc1 = ((*it1++) + (*it2++));
		LOGADD(sc, sc1);
	      }
	    }
	  }
	  break;
	case BF_RIGHT:
          s = t.to1;
          // for (int m = k; m <= l; m++) {
          //   ScoreT sc1 = _rfold.tsc_((type+n), m, j, i, j);
          //   sc1 += (_inside.get(t.to, m, i, false) + _outside.get(t.from, m, j, false));
          //   LOGADD(sc, sc1);
          // }
	  m = (l - k + 1);
	  if (m > 0) {
	    if (fwd) {
	      // inside.s1 should be maintained
	      Inside::S::const_iterator  it1 = _inside.s1.iter(t.to, (i-k));
	      Outside::P::const_iterator it2 = _outside.p.iter(outside_layer(t.from), k, j);
	      while (m-- > 0) {
		const ScoreT& sc1 = ((*it1--) + (*it2++));
		LOGADD(sc, sc1);
	      }
	    } else {
	      Inside::P::const_iterator  it1 = _inside.p.iter(inside_layer(t.to), k, i);
	      Outside::S::const_iterator it2 = _outside.s.iter(t.from, (j-k));
	      while (m-- > 0) {
		const ScoreT& sc1 = ((*it1++) + (*it2--));
		LOGADD(sc, sc1);
	      }
	    }
	  }
	  break;
        default:
          s = t.to;
	  if (t.dst_out >= 0) {
	    sc = _curr_sc[t.dst_out][t.from];
	  } else {
	    sc = _outside.p.get(outside_layer(t.from), k, l);
	  }
          sc += _rfold.tsc_(type, n, k, l, i, j);
	  break;
        }
      }
      LOGADD(_curr_sc[DST_C][s], sc);      
    }
  };
  class FuncsBackward : private Model {// only backward direction is considered
  private:
    Rfold& _rfold;
    Inside& _inside;
    Outside& _outside;
    CurrSC& _curr_sc;
  public:
    FuncsBackward(Rfold& rfold) 
      :_rfold(rfold),
       _inside(_rfold.inside()),
       _outside(_rfold.outside()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      _curr_sc[DST_C][OUTER] = NEG_INF();
      _curr_sc[DST_C][OUTER_BEG] = 0.0;
    }
    void after_transition(int i, int j) {
      ScoreT sc = _curr_sc[DST_C][OUTER];
      if (impossible(sc)) {
	sc = NEG_INF();
      }
      _outside.o[j] = sc;
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      int s = (-1);
      ScoreT sc = NEG_INF();
      if (t.bf) { //TR_O_BFL
        Assert((type+n) == TR_O_BFL && t.to1 == INNER_BEG && t.to == OUTER);
        s = t.to;
        // for (int m = k; m <= l; m++) {
        //   ScoreT sc1 = (_inside.get(t.to1, j, m) + _backward[m]);
        //   LOGADD(sc, sc1);
        // }
	int m = (l - k + 1);
	if (m > 0) {
	  Inside::S::const_iterator  it1 = _inside.s1.iter(t.to1, (k-j));
	  Outside::O::const_iterator it2 = &_outside.o[k];
	  while (m-- > 0) {
	    const ScoreT& sc1 = ((*it1++) + (*it2++));
	    LOGADD(sc, sc1);
	  }
	}
      } else {
        s = t.to;
	if (t.dst_out >= 0) {
	  sc = _curr_sc[t.dst_out][t.from];
	} else {
	  sc = _outside.o[l];
	}
        sc += _rfold.tsc_(type, n, k, l, i, j);
      }
      LOGADD(_curr_sc[DST_C][s], sc); 
    }
  };
  template <bool fwd, bool use_mea_score>
  class FuncsCYK : private Model {
  private:
    Rfold& _rfold;
    CYK& _cyk;
    Tau& _tau;
    CurrSC& _curr_sc;
    TBNode _curr_tr;
  public:
    FuncsCYK(Rfold& rfold) 
      :_rfold(rfold),
       _cyk(_rfold.cyk()),
       _tau(_rfold.tau()),
       _curr_sc(_rfold.curr_sc()),
       _curr_tr(TBNode::null()) {
    }
    void before_transition(int i, int j) {
      if (i == j) {
	for (int k = 0; k < (int)_curr_sc.size(); k++) {
	  _curr_sc[k].fill(NEG_INF());
	}
      } else {// i < j
	_curr_sc[DST_C].fill(NEG_INF());
      }
      enum{dst = (fwd ? DST_W : DST_E)};
      for (int s = 0; s < NSTATE; s++) {
	_curr_sc[dst][s] = _cyk.s.get(s, (j-i));
      }
      _curr_sc[DST_C][HAIRPIN_END] = 0.0;
      _curr_tr.clean();
    }
    void after_transition(int i, int j) {
      for (int s = 0; s < NSTATE; s++) {
	ScoreT sc = _curr_sc[DST_C][s];
	if (impossible(sc)) {
	  sc = NEG_INF();
	}
	_cyk.s.set(s, (j-i), sc);
	int k = cyk_layer(s);
	if (k >= 0) {
	  _cyk.p.set(k, i, j, sc);
	}
      }
      _tau.p.set(0, i, j, _curr_tr);
      if (fwd) {
	_rfold.curr_sc_move_nw(_curr_sc);
      } else {
	_rfold.curr_sc_move_ne(_curr_sc);
      }
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) { 
      const Transition& t = TRANSITIONS[(type+n)];
      ScoreT sc = NEG_INF();
      int best_k = (-1);
      int best_l = (-1);
      if (t.bf) {
        // for (int m = k; m <= l; m++) {
        //   ScoreT sc1 = (_cyk.get(t.to, i, m) + _cyk.get(t.to1, m, j, false));
        //   if ((sc + MIN_DIFF()) < sc1) {
        //     sc = sc1;
	//     tr.set(&t, m, m);
        //   }
        // }
	int m = (l - k + 1);
	if (m > 0) {
	  int best_m = (-1);
	  if (fwd) {
	    CYK::P::const_iterator it1 = _cyk.p.iter(cyk_layer(t.to), i, k);
	    CYK::S::const_iterator it2 = _cyk.s.iter(t.to1, (j-k));
	    while (m-- > 0) {
	      const ScoreT& sc1 = ((*it1++) + (*it2--));
	      if ((sc + MIN_DIFF()) < sc1) {
		sc = sc1;
		best_m = m;
	      }
	    }
	  } else {
	    CYK::S::const_iterator it1 = _cyk.s.iter(t.to, (k-i));
	    CYK::P::const_iterator it2 = _cyk.p.iter(cyk_layer(t.to1), k, j);
	    while (m-- > 0) {
	      const ScoreT& sc1 = ((*it1++) + (*it2++));
	      if ((sc + MIN_DIFF()) < sc1) {
		sc = sc1;
		best_m = m;
	      }
	    }
	  }
	  best_k = (best_m == (-1) ? (-1) : (l-best_m));
	  best_l = (best_m == (-1) ? (-1) : (l-best_m));
	}
      } else {
	if (t.dst_in >= 0) {// same for both fwd and bwd
	  sc = _curr_sc[t.dst_in][t.to];
	} else {
	  sc = _cyk.p.get(cyk_layer(t.to), k, l);
	}
	sc += (use_mea_score
	       ? _rfold.tsc_mea(type, n, i, j, k, l) 
	       : _rfold.tsc_(type, n, i, j, k, l));
	best_k = k;
	best_l = l;
      }
      if ((_curr_sc[DST_C][t.from] + MIN_DIFF()) < sc) {
        _curr_sc[DST_C][t.from] = sc;
	_curr_tr.set((TrType)(type+n), i, j, best_k, best_l);
      }
    }
    struct TBInfo {
      TBNode nd;
      State s;
      int i;
      int j;
      TBInfo(const TBNode& nd1, State s1, int i1, int j1)
	: nd(nd1), s(s1), i(i1), j(j1) {}
    };
    std::pair<int, int> seek_for_outer_bf(int offset) {
      for (int i = offset; i <= _rfold.len(); i++) {
	_tau.o.load_range_if_needed(i, i);
	int val = _tau.o[i];
	if (val >= 0) {
	  return std::make_pair(i, val);
	}
      }
      return std::make_pair((_rfold.len()+1), (-1));
    }
    string traceback_range(int b, int e) {
      string sscons((e-b), '.');
      Vector<TBInfo> arr;
      arr.push_back(TBInfo(_tau.p.get(0, b, e), INNER_BEG, b, e));
      while (!arr.empty()) {
	const TBInfo& tb = arr.back();
	const Transition& t = tb.nd.transition(tb.s);
	int i = tb.i;
	int j = tb.j;
	const std::pair<int, int>& pos = tb.nd.pos(tb.s, tb.i, tb.j);
	int k = pos.first;
	int l = pos.second;
	arr.pop_back();// tb deleted
	if (t.bf == BF_PARENT) {
	  arr.push_back(TBInfo(_tau.p.get(0, l, j), (State)t.to1, l, j));
	  arr.push_back(TBInfo(_tau.p.get(0, i, k), (State)t.to, i, k));
	} else {
	  if ((TR_S_S <= t.type && t.type <= (TR_S_S+D))
	      || (TR_S_E <= t.type && t.type <= (TR_S_E+D))) {// if emit pair
	    _rfold.ct_pred().add_pair((i+1), j); // dp cell coord -> seq index
	    //absolute coord -> relative coord (offset = b)
	    //1 based indexing -> 0 based indexing
	    sscons[(i-b)+1-1] = '<';// 1->0 based
	    sscons[(j-b)-1]   = '>';// 1->0 based
	  }
	  if (t.to < INNER_BEG) {
	    // Check(k < l, "i:%d,j:%d,k:%d,l:%d", i, j, k, l);
	    arr.push_back(TBInfo(_tau.p.get(0, k, l), (State)t.to, k, l));
	  }
	}
      }
      return sscons;
    }
  };
  template <bool use_mea_score>
  class FuncsViterbi : private Model {// only backward direction is considered
  private:
    Rfold& _rfold;
    int _width;
    CYK& _cyk;
    Tau& _tau;
    CurrSC& _curr_sc;
    int _curr_tr;
  public:
    FuncsViterbi(Rfold& rfold) 
      :_rfold(rfold),
       _width(_rfold.width()),
       _cyk(_rfold.cyk()),
       _tau(_rfold.tau()),
       _curr_sc(_rfold.curr_sc()),
       _curr_tr(-1) {
    }
    void before_transition(int i, int j) {
      if ((j-i) < _width) {
        _curr_sc[DST_C][INNER_BEG] = _cyk.s.get(INNER_BEG, (j-i));
      } else {
	_curr_sc[DST_C][INNER_BEG] = NEG_INF();
      }
      _curr_sc[DST_C][OUTER] = NEG_INF();
      _curr_sc[DST_C][OUTER_END] = 0.0;
      _curr_tr = (-1);
    }
    void after_transition(int i, int j) {
      ScoreT sc = _curr_sc[DST_C][OUTER];
      if (impossible(sc)) {
	sc = NEG_INF();
      }
      _cyk.o[i] =  sc;
      _tau.o[i] = _curr_tr;
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      ScoreT sc = NEG_INF();
      int best_k = (-1);
      if (t.bf) {
        Assert(t.to1 == INNER_BEG);
        // for (int m = k; m <= l; m++) {
        //   // left <-> right
        //   ScoreT sc1 = (_cyk.p.get(t.to1, i, m) + _cyk.o[m];
        //          sc1 += (use_mea_score ? 
        //                  _rfold.tsc_mea((type+n), n, i, j, i, m) 
	//                  : _rfold.tsc_(type, n, i, j, i, m));
        //   if ((sc + MIN_DIFF()) < sc1) {
	//     sc = sc1;
	//     tr.set(&t, m, m);
        //   }
        // }
	int m = (l - k + 1);
	if (m > 0) {
	  int best_m = (-1);
	  // left <-> right
	  CYK::S::const_iterator it1 = _cyk.s.iter(t.to1, (k-i));
	  CYK::O::const_iterator it2 = &_cyk.o[k];
	  while (m-- > 0) {
	    const ScoreT& sc1 = ((*it1++) + (*it2++));
	    if ((sc + MIN_DIFF()) < sc1) {
	      sc = sc1;
	      best_m = m;
	    }
	  }
	  best_k = (best_m == (-1) ? (-1) : (l-best_m));
	}
      } else {
	if (t.dst_in >= 0) {
	  sc = _curr_sc[t.dst_in][t.to];
	} else {
	  sc = _cyk.o[k];
	}
	int m = (-1);
	switch ((type+n)) {
	case TR_O_IB:
	  m = j;
	  sc += (use_mea_score 
		 ? _rfold.tsc_mea(type, n, i, j, i, j) 
		 : _rfold.tsc_(type, n, i, j, i, j));
	  break;
	default:
	  // be careful. left to right transition. affects O_X, O_O
	  // since O_X scores are independent of coordinates,
	  // O_O is only the problem
	  // (i, _len, (i+1), _len) is passed to this function
	  // -> (_len, (i+1), _len, i) is passed to tsc_mea, and tsc
	  // -> loop ((i+1)-1, (i+1)) = (i, i+1) is scored
	  sc += (use_mea_score 
		 ? _rfold.tsc_mea(type, n, l, k, j, i) 
		 : _rfold.tsc_(type, n, l, k, j, i));
	  break;
	}
	best_k = m;
      }
      if ((_curr_sc[DST_C][t.from] + MIN_DIFF()) < sc) {
        _curr_sc[DST_C][t.from] = sc;
	_curr_tr = best_k;
      }    
    }
  };
  template <bool fwd>
  class FuncsCYKNussinov : private Model {
  private:
    Rfold& _rfold;
    CYK& _cyk;
    NusTau& _tau;
    CurrSC& _curr_sc;
    NusTBNode _curr_tr;
  public:
    FuncsCYKNussinov(Rfold& rfold) 
      :_rfold(rfold),
       _cyk(_rfold.cyk()),
       _tau(_rfold.nus_tau()),
       _curr_sc(_rfold.curr_sc()),
       _curr_tr(NusTBNode::null()) {
    }
    void before_transition(int i, int j) {
      if (i == j) {
	for (int k = 0; k < (int)_curr_sc.size(); k++) {
	  _curr_sc[k].fill(NEG_INF());
	}
      } else {// i < j
	_curr_sc[DST_C].fill(NEG_INF());
      }
      enum{dst = (fwd ? DST_W : DST_E)};
      for (int s = 0; s < NUS_NSTATE; s++) {
	_curr_sc[dst][s] = _cyk.s.get(s, (j-i));
      }
      _curr_sc[DST_C][NUS_INNER_END] = 0.0;
      _curr_tr.clean();
    }
    void after_transition(int i, int j) {
      for (int s = 0; s < NUS_NSTATE; s++) {
	ScoreT sc = _curr_sc[DST_C][s];
	if (impossible(sc)) {
	  sc = NEG_INF();
	}
	_cyk.s.set(s, (j-i), sc);
	int k = nus_cyk_layer(s);
	if (k >= 0) {
	  _cyk.p.set(k, i, j, sc);
	}
      }
      _tau.p.set(0, i, j, _curr_tr);
      if (fwd) {
	_rfold.curr_sc_move_nw(_curr_sc);
      } else {
	_rfold.curr_sc_move_ne(_curr_sc);
      }
    }
    void_on_transition_(NusTrType type, int n, int i, int j, int k, int l) { 
      const Transition& t = NUS_TRANSITIONS[(type+n)];
      ScoreT sc = NEG_INF();
      int best_k = k;
      int best_l = l;
      switch (type+n) {
      case NUS_TR_X:
	sc = _curr_sc[DST_C][NUS_INNER_END];
	break;
      case NUS_TR_P: case NUS_TR_L: case NUS_TR_R:
	sc = _curr_sc[t.dst_in][NUS_INNER];
	break;
      case NUS_TR_I:
	sc = _curr_sc[DST_C][NUS_PAIR];
	break;
      case NUS_TR_B:
	{
	  // for (int m = k; m <= l; m++) {
	  //   ScoreT sc1 = (_cyk.get(NUS_INNER, i, m) + _cyk.get(NUS_INNER, m, j, false));
	  //   if ((sc + MIN_DIFF()) < sc1) {
	  //     sc = sc1;
	  //     tr.set(&t, m, m);
	  //   }
	  // }
	  int m = (l - k + 1);
	  if (m > 0) {
	    int best_m = (-1);
	    if (fwd) {
	      CYK::P::const_iterator it1 = _cyk.p.iter(nus_cyk_layer(NUS_INNER), i, k);
	      CYK::S::const_iterator it2 = _cyk.s.iter(NUS_INNER, (j-k));
	      while (m-- > 0) {
		const ScoreT& sc1 = ((*it1++) + (*it2--));
		if ((sc + MIN_DIFF()) < sc1) {
		  sc = sc1;
		  best_m = m;
		}
	      }
	    } else {
	      CYK::S::const_iterator it1 = _cyk.s.iter(NUS_INNER, (k-i));
	      CYK::P::const_iterator it2 = _cyk.p.iter(nus_cyk_layer(NUS_INNER), k, j);
	      while (m-- > 0) {
		const ScoreT& sc1 = ((*it1++) + (*it2++));
		if ((sc + MIN_DIFF()) < sc1) {
		  sc = sc1;
		  best_m = m;
		}
	      }
	    }
	    best_k = (best_m == (-1) ? (-1) : (l-best_m));
	    best_l = (best_m == (-1) ? (-1) : (l-best_m));
	  }
	} 
	break;
      default:
	Die("bad type %d", (type+n));
	break;
      }
      sc += _rfold.tsc_mea_nussinov(type, n, i, j, k, l);
      if ((_curr_sc[DST_C][t.from] + MIN_DIFF()) < sc) {
        _curr_sc[DST_C][t.from] = sc;
	_curr_tr.set((NusTrType)(type+n), i, j, best_k, best_l);
      }
    }
    struct TBInfo {
      NusTBNode nd;
      NusState s;
      int i;
      int j;
      TBInfo(const NusTBNode& nd1, NusState s1, int i1, int j1)
	: nd(nd1), s(s1), i(i1), j(j1) {}
    };
    std::pair<int, int> seek_for_outer_bf(int offset) {
      for (int i = offset; i <= _rfold.len(); i++) {
	_tau.o.load_range_if_needed(i, i);
	int val = _tau.o[i];
	if (val >= 0) {
	  return std::make_pair(i, val);
	}
      }
      return std::make_pair((_rfold.len()+1), (-1));
    }
    string traceback_range(int b, int e) {
      string sscons((e-b), '.');
      Vector<TBInfo> arr;
      arr.push_back(TBInfo(_tau.p.get(0, b, e), NUS_PAIR, b, e));
      while (!arr.empty()) {
	const TBInfo& tb = arr.back();
	const Transition& t = tb.nd.transition(tb.s);
	int i = tb.i;
	int j = tb.j;
	const std::pair<int, int>& pos = tb.nd.pos(tb.s, tb.i, tb.j);
	int k = pos.first;
	int l = pos.second;
	arr.pop_back();// tb deleted
	if (t.bf == BF_PARENT) {
	  arr.push_back(TBInfo(_tau.p.get(0, l, j), (NusState)t.to1, l, j));
	  arr.push_back(TBInfo(_tau.p.get(0, i, k), (NusState)t.to, i, k));
	} else {
	  if (t.from == NUS_PAIR) {
	    _rfold.ct_pred().add_pair((i+1), j); // dp cell coord -> seq index
	    //absolute coord -> relative coord (offset = b)
	    //1 based indexing -> 0 based indexing
	    sscons[(i-b)+1-1] = '<';// 1->0 based
	    sscons[(j-b)-1]   = '>';// 1->0 based
	  }
	  if (k < l) {
	    arr.push_back(TBInfo(_tau.p.get(0, k, l), (NusState)t.to, k, l));
	  }
	}
      }
      return sscons;
    }
  };
  class FuncsViterbiNussinov : private Model {
  private:
    Rfold& _rfold;
    int _width;
    CYK& _cyk;
    NusTau& _tau;
    CurrSC& _curr_sc;
    int _curr_tr;
  public:
    FuncsViterbiNussinov(Rfold& rfold) 
      :_rfold(rfold),
       _width(_rfold.width()),
       _cyk(_rfold.cyk()),
       _tau(_rfold.nus_tau()),
       _curr_sc(_rfold.curr_sc()),
       _curr_tr(-1) {
    }
    void before_transition(int i, int j) {
      if ((j-i) < _width) {
        _curr_sc[DST_C][NUS_PAIR] = _cyk.s.get(NUS_PAIR, (j-i));
      } else {
	_curr_sc[DST_C][NUS_PAIR] = NEG_INF();
      }
      _curr_sc[DST_C][NUS_OUTER] = NEG_INF();
      _curr_sc[DST_C][NUS_OUTER_END] = 0.0;
      _curr_tr = (-1);
    }
    void after_transition(int i, int j) {
      ScoreT sc = _curr_sc[DST_C][NUS_OUTER];
      if (impossible(sc)) {
	sc = NEG_INF();
      }
      _cyk.o[i] = sc;
      _tau.o[i] = _curr_tr;
    }
    void_on_transition_(NusTrType type, int n, int i, int j, int k, int l) {
      // const Transition& t = NUS_TRANSITIONS[(type+n)];
      ScoreT sc = NEG_INF();
      int best_k = (-1);
      switch ((type+n)) {
      case NUS_TR_O_X:
	sc = _curr_sc[DST_C][NUS_OUTER_END];
	break;
      case NUS_TR_O_O:
	sc = _cyk.o[k];
	break;
      case NUS_TR_O_P:
	sc = _curr_sc[DST_C][NUS_PAIR];
	best_k = j;
	break;
      case NUS_TR_O_B:
	{
	  // for (int m = k; m <= l; m++) {
	  //   // left <-> right
	  //   ScoreT sc1 = (_cyk.get(NUS_PAIR, i, m) + _cyk.o[m]);
	  //   if ((sc + MIN_DIFF()) < sc1) {
	  //     sc = sc1;
	  //     tr.set(&t, m, m);
	  //   }
	  // }
	  int m = (l - k + 1);
	  if (m > 0) {
	    int best_m = (-1);
	    // left <-> right
	    CYK::S::const_iterator it1 = _cyk.s.iter(NUS_PAIR, (k-i));
	    CYK::O::const_iterator it2 = &_cyk.o[k];
	    while (m-- > 0) {
	      const ScoreT& sc1 = ((*it1++) + (*it2++));
	      if ((sc + MIN_DIFF()) < sc1) {
		sc = sc1;
		best_m = m;
	      }
	    }
	    best_k = (best_m == (-1) ? (-1) : (l-best_m));
	  }
	}
	break;
      default:
	Die("bad transition type %d", (type+n));
	break;
      }
      // be careful. left to right transition. affects O_X, O_O, O_I O_B 
      // since O_X and O_I and O_B scores are independent of coordinates,
      // O_O is only the problem
      // (i, _len, (i+1), _len) is passed to this function
      // -> (_len, (i+1), _len, i) is passed to tsc_mea, and tsc
      // -> loop ((i+1)-1, (i+1)) = (i, i+1) is scored
      sc += _rfold.tsc_mea_nussinov(type, n, l, k, j, i);
      if ((_curr_sc[DST_C][NUS_OUTER] + MIN_DIFF()) < sc) {
        _curr_sc[DST_C][NUS_OUTER] = sc;
	_curr_tr = best_k;
      }    
    }
  };
  class FuncsMEAProbForward : private Model {
  private:
    Rfold& _rfold;
    int _width;
    Inside& _inside;
    Outside& _outside;
    CurrSC& _curr_sc;
  public:
    FuncsMEAProbForward(Rfold& rfold) 
      : _rfold(rfold),
	_width(_rfold.width()),
	_inside(_rfold.inside()),
	_outside(_rfold.outside()),
	_curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      // do not modify curr_sc other than curr_sc[DST_C]
      _curr_sc[DST_C][OUTER] = _outside.o[j];
      _curr_sc[DST_C][OUTER_BEG] = 0.0;
    }
    void after_transition(int i, int j) {
      //no-op
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      Assert((type+n) == TR_O_O);
      ScoreT sc_in = _inside.o[l];
      ScoreT dsc = _rfold.tsc_(type, n, i, j, k, l);
      ScoreT sc_out = _curr_sc[DST_C][t.from];
      ScoreT w = ((sc_in + sc_out - _rfold.partition_coeff()) + dsc);
      Assert(w < 1.0e-5, "type:%s,n:%d,[%d,%d,%d,%d],w:%f", 
	     t.name.c_str(), n, i, j, k, l, w);
      _rfold.tsc_add_prob(type, n, i, j, k, l, w);
    }
  };
  class FuncsMEAProbInside: private Model {
  private:
    Rfold& _rfold;
    Inside& _inside;
    Outside& _outside;
    MEAProb& _mea_prob;
    CurrSC& _curr_sc;
  public:
    FuncsMEAProbInside(Rfold& rfold) 
      :_rfold(rfold),
       _inside(_rfold.inside()),
       _outside(_rfold.outside()),
       _mea_prob(_rfold.mea_prob()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      // do not modify curr_sc other than curr_sc[DST_C]
      for (int s = 0; s < NSTATE; s++) {
        _curr_sc[DST_C][s] = _outside.s.get(s, (j-i));
      }
      for (int s = 0; s < _mea_prob.p.nlayer(); s++) {
	_mea_prob.p.set(s, i, j, NEG_INF());
      }
    }
    void after_transition(int i, int j) {
      //no-op
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      Assert(!t.bf);
      Assert(t.to > NSTATE1 || inside_layer(t.to) >= 0);
      ScoreT sc_in = _inside.p.get(inside_layer(t.to), k, l);
      ScoreT dsc = _rfold.tsc_(type, n, i, j, k, l);
      ScoreT sc_out = _curr_sc[DST_C][t.from]; 
      ScoreT w = (sc_in + dsc + (sc_out - _rfold.partition_coeff()));
      Assert(w < 1.0e-5, "type:%s,n:%d,[%d,%d,%d,%d],w:%f", 
	     t.name.c_str(), n, i, j, k, l, w);
      _rfold.tsc_add_prob(type, n, i, j, k, l, w);
    }
  };
  class FuncsFeatureForward : private Model {
  private:
    Rfold& _rfold;
    int _width;
    Inside& _inside;
    Outside& _outside;
    CurrSC& _curr_sc;
  public:
    FuncsFeatureForward(Rfold& rfold) 
      :_rfold(rfold),
       _width(_rfold.width()),
       _inside(_rfold.inside()),
       _outside(_rfold.outside()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      // do not modify curr_sc other than curr_sc[DST_C]
      if ((j-i) < _width) {
        _curr_sc[DST_C][INNER_BEG] = _outside.s.get(INNER_BEG, (j-i));
      } else {
	_curr_sc[DST_C][INNER_BEG] = NEG_INF();
      }
      _curr_sc[DST_C][OUTER] = _outside.o[j];
      _curr_sc[DST_C][OUTER_BEG] = 0.0;
    }
    void after_transition(int i, int j) {
      //no-op
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      if (t.bf) {

      } else if (t.to == OUTER) {
	//left to right transition
	ScoreT sc = _inside.o[l];
	sc += _rfold.tsc_(type, n, i, j, k, l);
	ScoreT sc_in = sc;
	ScoreT sc_out = _curr_sc[DST_C][t.from];
	ScoreT w = (sc_in + sc_out - _rfold.partition_coeff());
	Assert(w < 1.0e-5, "type:%s,n:%d,[%d,%d,%d,%d],w:%f", 
	       t.name.c_str(), n, i, j, k, l, w);
	_rfold.tsc_count(type, n, i, j, k, l, w);
      }
    }
  };
  class FuncsFeatureInside : private Model {
  private:
    Rfold& _rfold;
    Inside& _inside;
    Outside& _outside;
    CurrSC& _curr_sc;
  public:
    FuncsFeatureInside(Rfold& rfold) 
      :_rfold(rfold),
       _inside(_rfold.inside()),
       _outside(_rfold.outside()),
       _curr_sc(_rfold.curr_sc()) {
    }
    void before_transition(int i, int j) {
      // do not modify curr_sc other than curr_sc[DST_C]
      for (int s = 0; s < NSTATE; s++) {
        _curr_sc[DST_C][s] = _outside.s.get(s, (j-i));
      }
    }
    void after_transition(int i, int j) {
      //no-op
    }
    void_on_transition_(TrType type, int n, int i, int j, int k, int l) {
      const Transition& t = TRANSITIONS[(type+n)];
      if (t.bf) return;
      if (t.to < NSTATE && inside_layer(t.to) < 0) return;

      ScoreT sc = (t.to < NSTATE ? _inside.p.get(inside_layer(t.to), k, l) : 0.0);
      sc += _rfold.tsc_(type, n, i, j, k, l);
      ScoreT sc_in = sc;
      ScoreT sc_out = _curr_sc[DST_C][t.from]; 
      ScoreT w = (sc_in + sc_out - _rfold.partition_coeff());
      Assert(w < 1.0e-5, "type:%s,n:%d,[%d,%d,%d,%d],w:%f", 
	     t.name.c_str(), n, i, j, k, l, w);
      _rfold.tsc_count(type, n, i, j, k, l, w);
    }
  };
#undef void_on_transition_
#undef tsc_

  class ComputeForwardFuncs {
  public:
    ComputeForwardFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_inside(_rfold),
       _funcs_forward(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.period() + 1);
      int e0 = (offset + _rfold.block_size());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e);
      if (_rfold.has_constraint() || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.inside().o.load_range(b, e);
    }
    void finalize_block(int offset) {
      _rfold.inside().o.flush();
    }
    void compute_period(int j) {
#ifdef USE_TSC_FREQ
      _rfold.freq().clean();
#endif
      int ib = max(0, (j-_rfold.width()+1));
      for (int i = j; i >= ib; i--) {
	_rfold.set_constr(i, j);
#ifdef USE_TSC_FREQ
	_rfold.freq_move_nw(_rfold.freq(), i, j);
#endif
        _rfold.inside_transitions(_funcs_inside, i, j);
      }
      _rfold.forward_transitions(_funcs_forward, 0, j);
    }
  private:
    Rfold& _rfold;
    FuncsInside<true> _funcs_inside;
    FuncsForward _funcs_forward;
  };
  class ComputeBackwardMEAFuncs {
  public:
    ComputeBackwardMEAFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_inside(_rfold),
       _funcs_outside(_rfold),
       _funcs_backward(_rfold),
       _funcs_mea_prob_inside(_rfold),
       _funcs_mea_prob_forward(_rfold),
       _funcs_cyk(_rfold),
       _funcs_viterbi(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.block_size() + 1);
      int e0 = (offset + _rfold.period());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e);
      if (_rfold.has_constraint() || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.inside().o.load_range(b, e);
      _rfold.outside().o.load_range(b, e);
      _rfold.cyk().o.load_range(b, e);
#ifdef USE_MEA_NUSSINOV
      _rfold.nus_tau().o.load_range(b, e);
#else
      _rfold.tau().o.load_range(b, e);
#endif
    }
    void finalize_block(int offset) {
      _rfold.outside().o.flush();
      _rfold.cyk().o.flush();
#ifdef USE_MEA_NUSSINOV
      _rfold.nus_tau().o.flush();
#else
      _rfold.tau().o.flush();
#endif
    }
    void compute_period(int i) {//i <= _rfold.len is ensured
      if (!_rfold.is_triangular()) {
	if (0 <= i && i <= (_rfold.len()-_rfold.width()+1)) {
#ifdef USE_TSC_FREQ
	  _rfold.freq().clean();
#endif
	  int je = min((i+_rfold.width()-1), _rfold.len());
	  for (int j = i; j <= je; j++) {
	    _rfold.set_constr(i, j);
#ifdef USE_TSC_FREQ
	    _rfold.freq_move_ne(_rfold.freq(), i, j);
#endif
	    _rfold.init_mea_prob(i, j);// ok since nullified
	    _rfold.inside_transitions(_funcs_inside, i, j);
	  }
	}
      }
#ifdef USE_TSC_FREQ
      if (i == 0) {
	_rfold.freq0() = _rfold.freq();
      }
#endif
      int k = (i+_rfold.width()-1);
      if (0 <= k && k <= _rfold.len()) {
	int je = min((k+_rfold.width()-1), _rfold.len());
	for (int j1 = k; j1 <= je; j1++) {
	  _rfold.set_inside_s1_for_outside(k, j1);
	}
        _rfold.backward_transitions(_funcs_backward, 0, k);
        int ib = max(0, i);
#ifdef USE_TSC_FREQ
	if (ib == 0) {
	  _rfold.freq_move_sw(_rfold.freq0(), ib, k);
	  _rfold.freq() = _rfold.freq0();
	}
#endif
	for (int i1 = ib; i1 <= k; i1++) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_se(_rfold.freq(), i1, k);
#endif
          _rfold.outside_transitions(_funcs_outside, i1, k);
	  // do not modify curr_sc other than curr_sc[DST_C]
	  _rfold.mea_prob_inside_transitions(_funcs_mea_prob_inside, i1, k);
        }
	_rfold.mea_prob_forward_transitions(_funcs_mea_prob_forward, 0, k);
	for (int j1 = k; j1 <= je; j1++) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_ne(_rfold.freq(), k, j1);
#endif
	  _rfold.set_mea_prob(k, j1);
#ifdef USE_MEA_NUSSINOV
	  _rfold.cyk_nussinov_transitions(_funcs_cyk, k, j1);
#else
          _rfold.cyk_transitions(_funcs_cyk, k, j1);
#endif
        }
        //right to left forward transitions
#ifdef USE_MEA_NUSSINOV
	_rfold.viterbi_nussinov_transitions(_funcs_viterbi, k, _rfold.len());
#else
        _rfold.viterbi_transitions(_funcs_viterbi, k, _rfold.len());
#endif
      }
    }
  private:
    Rfold& _rfold;
    FuncsInside<false> _funcs_inside;
    FuncsOutside<false> _funcs_outside;
    FuncsBackward _funcs_backward;
    FuncsMEAProbInside _funcs_mea_prob_inside;
    FuncsMEAProbForward _funcs_mea_prob_forward;
#ifdef USE_MEA_NUSSINOV
    FuncsCYKNussinov<false> _funcs_cyk;
    FuncsViterbiNussinov _funcs_viterbi;
#else
    FuncsCYK<false, true> _funcs_cyk;
    FuncsViterbi<true> _funcs_viterbi;
#endif
  };
    class ComputeBackwardProbFuncs {
  public:
    ComputeBackwardProbFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_inside(_rfold),
       _funcs_outside(_rfold),
       _funcs_backward(_rfold),
       _funcs_mea_prob_inside(_rfold),
       _funcs_mea_prob_forward(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.block_size() + 1);
      int e0 = (offset + _rfold.period());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e);
      if (_rfold.has_constraint()  || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.inside().o.load_range(b, e);
      _rfold.outside().o.load_range(b, e);
    }
    void finalize_block(int offset) {
      _rfold.outside().o.flush();
    }
    void compute_period(int i) {//i <= _rfold.len is ensured
      if (!_rfold.is_triangular()) {
	if (0 <= i && i <= (_rfold.len()-_rfold.width()+1)) {
#ifdef USE_TSC_FREQ
	  _rfold.freq().clean();
#endif
	  int je = min((i+_rfold.width()-1), _rfold.len());
	  for (int j = i; j <= je; j++) {
	    _rfold.set_constr(i, j);
#ifdef USE_TSC_FREQ
	    _rfold.freq_move_ne(_rfold.freq(), i, j);
#endif
	    _rfold.init_mea_prob(i, j);// ok since nullified
	    _rfold.inside_transitions(_funcs_inside, i, j);
	  }
	}
      }
#ifdef USE_TSC_FREQ
      if (i == 0) {
	_rfold.freq0() = _rfold.freq();
      }
#endif
      int k = (i+_rfold.width()-1);
      if (0 <= k && k <= _rfold.len()) {
	int je = min((k+_rfold.width()-1), _rfold.len());
	for (int j1 = k; j1 <= je; j1++) {
	  _rfold.set_inside_s1_for_outside(k, j1);
	}
        _rfold.backward_transitions(_funcs_backward, 0, k);
        int ib = max(0, i);
#ifdef USE_TSC_FREQ
	if (ib == 0) {
	  _rfold.freq_move_sw(_rfold.freq0(), ib, k);
	  _rfold.freq() = _rfold.freq0();
	}
#endif
	for (int i1 = ib; i1 <= k; i1++) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_se(_rfold.freq(), i1, k);
#endif
          _rfold.outside_transitions(_funcs_outside, i1, k);
	  // do not modify curr_sc other than curr_sc[DST_C]
	  _rfold.mea_prob_inside_transitions(_funcs_mea_prob_inside, i1, k);
        }
	_rfold.mea_prob_forward_transitions(_funcs_mea_prob_forward, 0, k);
	for (int j1 = k; j1 <= je; j1++) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_ne(_rfold.freq(), k, j1);
#endif
	  _rfold.set_mea_prob(k, j1);
        }
      }
    }
  private:
    Rfold& _rfold;
    FuncsInside<false> _funcs_inside;
    FuncsOutside<false> _funcs_outside;
    FuncsBackward _funcs_backward;
    FuncsMEAProbInside _funcs_mea_prob_inside;
    FuncsMEAProbForward _funcs_mea_prob_forward;
  };
  void set_inside_s1_for_outside(int i, int j) {
    for (int s = 0; s < NSTATE; s++) {
      int s1 = inside_layer(s);
      if (s1 >= 0) {
	_inside.s1.set(s, (j-i), _inside.p.get(s1, i, j));
      } else {
	_inside.s1.set(s, (j-i), NEG_INF());
      }
    }
  }
  class ComputeBackwardMLFuncs {
  public:
    ComputeBackwardMLFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_cyk(_rfold),
       _funcs_viterbi(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.block_size() + 1);
      int e0 = (offset + _rfold.period());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e); // b < e should be satisfied
      if (_rfold.has_constraint()  || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.cyk().o.load_range(b, e);
      _rfold.tau().o.load_range(b, e);
    }
    void finalize_block(int offset) {
      _rfold.cyk().o.flush();
      _rfold.tau().o.flush();
    }
    void compute_period(int i) {
      if (0 <= i) {
	int je = min((i+_rfold.width()-1), _rfold.len());
	for (int j = i; j <= je; j++) {
	  _rfold.set_constr(i, j);
	}
      }
      int k = (i+_rfold.width()-1);
      if (0 <= k && k <= _rfold.len()) {
#ifdef USE_TSC_FREQ
	_rfold.freq().clean();
#endif
	int je = min((k+_rfold.width()-1), _rfold.len());
	for (int j = k; j <= je; j++) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_ne(_rfold.freq(), k, j);
#endif
          _rfold.cyk_transitions(_funcs_cyk, k, j);
        }
        //right to left forward transitions
        _rfold.viterbi_transitions(_funcs_viterbi, k, _rfold.len());
      }
    }
  private:
    Rfold& _rfold;
    FuncsCYK<false, false> _funcs_cyk;
    FuncsViterbi<false>    _funcs_viterbi;
  };
  class ComputeBackwardFeatureFuncs {
  public:
    ComputeBackwardFeatureFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_inside(_rfold),
       _funcs_outside(_rfold),
       _funcs_backward(_rfold),
       _funcs_feature_inside(_rfold),
       _funcs_feature_forward(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.block_size() + 1);
      int e0 = (offset + _rfold.period());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e); // b < e should be satisfied
      if (_rfold.has_constraint() || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.inside().o.load_range(b, e);
      _rfold.outside().o.load_range(b, e);
    }
    void finalize_block(int offset) {
      _rfold.outside().o.flush();
    }
    void compute_period(int i) {//i <= _rfold.len is ensured
      if (!_rfold.is_triangular()) {
	if (0 <= i && i <= (_rfold.len()-_rfold.width()+1)) {
#ifdef USE_TSC_FREQ
	  _rfold.freq().clean();
#endif
	  int je = min((i+_rfold.width()-1), _rfold.len());
	  for (int j = i; j <= je; j++) {
	    _rfold.set_constr(i, j);
#ifdef USE_TSC_FREQ
	    _rfold.freq_move_ne(_rfold.freq(), i, j);
#endif
	    _rfold.inside_transitions(_funcs_inside, i, j);
	  }
	}
      }
#ifdef USE_TSC_FREQ
      if (i == 0) {
	_rfold.freq0() = _rfold.freq();
      }
#endif
      int k = (i+_rfold.width()-1);
      if (0 <= k && k <= _rfold.len()) {
	int je = min((k+_rfold.width()-1), _rfold.len());
	for (int j1 = k; j1 <= je; j1++) {
	  _rfold.set_inside_s1_for_outside(k, j1);
	}
	_rfold.backward_transitions(_funcs_backward, 0, k);
        int ib = max(0, i);
#ifdef USE_TSC_FREQ
	if (ib == 0) {
	  _rfold.freq_move_sw(_rfold.freq0(), ib, k);
	  _rfold.freq() = _rfold.freq0();
	}
#endif
	for (int i1 = ib; i1 <= k; i1++) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_se(_rfold.freq(), i1, k);
#endif
          _rfold.outside_transitions(_funcs_outside, i1, k);
	  // do not modify curr_sc other than curr_sc[DST_C]
	  _rfold.inside_transitions(_funcs_feature_inside, i1, k);
        }
	_rfold.forward_transitions(_funcs_feature_forward, 0, k);
      }
    }
  private:
    Rfold& _rfold;
    FuncsInside<false> _funcs_inside;
    FuncsOutside<false> _funcs_outside;
    FuncsBackward _funcs_backward;
    FuncsFeatureInside _funcs_feature_inside;
    FuncsFeatureForward _funcs_feature_forward;
  };
  class ComputeTracebackMEAFuncs {
  public:
    ComputeTracebackMEAFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_inside(_rfold),
       _funcs_outside(_rfold),
       _funcs_mea_prob_inside(_rfold),
       _funcs_mea_prob_forward(_rfold),
       _funcs_cyk(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.period() + 1);
      int e0 = (offset + _rfold.block_size());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e);
      if (_rfold.has_constraint() || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.inside().o.load_range(b, e);
      _rfold.outside().o.load_range(b, e);
      _rfold.cyk().o.load_range(b, e);
#ifdef USE_MEA_NUSSINOV
      _rfold.nus_tau().o.load_range(b, e);
#else
      _rfold.tau().o.load_range(b, e);
#endif
      _rfold.ct_pred().load_range((b+1), e);
    }
    void finalize_block(int offset){
      // no-op
    }
    void compute_period(int b0, int e0, int j) {
      if (j <= _rfold.len()) {
#ifdef USE_TSC_FREQ
	_rfold.freq().clean();
#endif
	int ib = max(b0, (j-_rfold.width()+1));
	for (int i = j; i >= ib; i--) {
	  _rfold.set_constr(i, j);
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_nw(_rfold.freq(), i, j);
#endif
	  _rfold.init_mea_prob(i, j);
	  _rfold.inside_transitions(_funcs_inside, i, j);
	}
      }
#ifdef USE_TSC_FREQ
      if (j == _rfold.len()) {
	_rfold.freq0() = _rfold.freq();
      }
#endif
      int k = (j-_rfold.width()+1);
      if (b0 <= k) {
	int ib = max(0, (k-_rfold.width()+1));
	for (int i1 = k; i1 >= ib; i1--) {
	  _rfold.set_inside_s1_for_outside(i1, k);
	}
	int je = min(j, _rfold.len());
#ifdef USE_TSC_FREQ
	if (je == _rfold.len()) {
	  _rfold.freq_move_se(_rfold.freq0(), k, je);
	  _rfold.freq() = _rfold.freq0();
	}
#endif
	for (int j1 = je; j1 >= k; j1--) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_sw(_rfold.freq(), k, j1);
#endif
	  _rfold.outside_transitions(_funcs_outside, k, j1);
	  // do not modify curr_sc other than curr_sc[DST_C]
	  _rfold.mea_prob_inside_transitions(_funcs_mea_prob_inside, k, j1);
	}
	_rfold.mea_prob_forward_transitions(_funcs_mea_prob_forward, 0, j);

	int ib1 = max(b0, (k-_rfold.width()+1));
	for (int i1 = k; i1 >= ib1; i1--) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_nw(_rfold.freq(), i1, k);
#endif
	  _rfold.set_mea_prob(i1, k);
#ifdef USE_MEA_NUSSINOV
	  _rfold.cyk_nussinov_transitions(_funcs_cyk, i1, k);
#else
	  _rfold.cyk_transitions(_funcs_cyk, i1, k);
#endif
	}
      }
    }
    std::pair<int,int> seek_for_outer_bf(int offset) {
      return _funcs_cyk.seek_for_outer_bf(offset);}
    string traceback_range(int b, int e) {return _funcs_cyk.traceback_range(b, e);}
  private:
    Rfold& _rfold;
    FuncsInside<true> _funcs_inside;
    FuncsOutside<true> _funcs_outside;
    FuncsMEAProbInside _funcs_mea_prob_inside;
    FuncsMEAProbForward _funcs_mea_prob_forward;
#ifdef USE_MEA_NUSSINOV
    FuncsCYKNussinov<true> _funcs_cyk;
#else
    FuncsCYK<true> _funcs_cyk;
#endif
  };
  class ComputeTracebackMLFuncs {
  public:
    ComputeTracebackMLFuncs(Rfold& rfold)
      :_rfold(rfold),
       _funcs_cyk(_rfold) {
    }
    void initialize_block(int offset) {
      int b0 = (offset - _rfold.period() + 1);
      int e0 = (offset + _rfold.block_size());
      int b = min(max(0, b0), _rfold.len());
      int e = min(max(0, e0), (_rfold.len()+1));
      Assert(b < e);
      _rfold.seq().load_range((b+1), e);
      if (_rfold.has_constraint() || _rfold.has_constraint_nus()) {
	_rfold.ct_constr().load_range((b+1), e);
      }
      _rfold.cyk().o.load_range(b, e);
      _rfold.tau().o.load_range(b, e);
      _rfold.ct_pred().load_range((b+1), e);
    }
    void finalize_block(int offset){
      // no-op
    }
    void compute_period(int b0, int e0, int j) {
      if (j <= _rfold.len()) {
	int ib = max(b0, (j-_rfold.width()+1));
	for (int i = j; i >= ib; i--) {
	  _rfold.set_constr(i, j);
	}
      }
      int k = (j-_rfold.width()+1);
      if (b0 <= k) {
#ifdef USE_TSC_FREQ
	_rfold.freq().clean();
#endif
	int ib = max(b0, (k-_rfold.width()+1));
	for (int i = k; i >= ib; i--) {
#ifdef USE_TSC_FREQ
	  _rfold.freq_move_nw(_rfold.freq(), i, k);
#endif
	  _rfold.cyk_transitions(_funcs_cyk, i, k);
	}
      }
    }
    std::pair<int,int> seek_for_outer_bf(int offset) {
      return _funcs_cyk.seek_for_outer_bf(offset);}
    string traceback_range(int b, int e) {return _funcs_cyk.traceback_range(b, e);}
  private:
    Rfold& _rfold;
    FuncsCYK<true, false> _funcs_cyk;
  };
#ifdef USE_TMPL_ON_TRANSITION
  template <TrType t> ScoreT tsc(int n, int i, int j, int k, int l) 
#else
  ScoreT tsc(TrType t, int n, int i, int j, int k, int l) 
#endif
  {
    switch (t) {
    case TR_S_S: return _param.score_stack(i, j, n);
    case TR_S_E: return _param.score_stem_close(i, j, n);
      // case TR_M_BF: case TR_M_BFL: case TR_M_BFR: return 0.0;
    case TR_M2_S:   return _param.score_multi_open(i, j);
    case TR_M2_M2:  return _param.score_multi_extend();
      // case TR_M1_M2:  return 0.0;
      // case TR_M1_MBF: return 0.0;
    case TR_M_M:    return _param.score_multi_extend();
      // case TR_M_MBF:  return 0.0;
    case TR_E_H:    return _param.score_hairpin(i, j);
    case TR_E_I:    return _param.score_interior(i, j, k, l);
    case TR_E_M:    return _param.score_multi_close(i, j);
    case TR_IB_S:   return _param.score_outer_branch(k, l, 0, _len);
      // case TR_O_X:    return 0.0;
      // case TR_X_O:    return 0.0;
    case TR_O_O:    return _param.score_outer_extend(j);
      // case TR_O_IB:   return 0.0;
      // case TR_O_BF: case TR_O_BFL: case TR_O_BFR: return 0.0;
    default: return 0.0;
    }
  }
  void tsc_count(TrType t, int n, int i, int j, int k, int l, ScoreT w) {
    switch (t) {
    case TR_S_S:    _counter.count_stack(i, j, n, EXP(w)); return;
    case TR_S_E:    _counter.count_stem_close(i, j, n, EXP(w)); return;
      // case TR_M_BF: case TR_M_BFL: case TR_M_BFR: return;
    case TR_M2_S:   _counter.count_multi_open(i, j, EXP(w)); return;
    case TR_M2_M2:  _counter.count_multi_extend(EXP(w)); return;
      // case TR_M1_M2:  return;
      // case TR_M1_MBF: return;
    case TR_M_M:    _counter.count_multi_extend(EXP(w)); return;
      // case TR_M_MBF:  return;
    case TR_E_H:    _counter.count_hairpin(i, j, EXP(w)); return;
    case TR_E_I:    _counter.count_interior(i, j, k, l, EXP(w)); return;
    case TR_E_M:    _counter.count_multi_close(i, j, EXP(w)); return;
    case TR_IB_S:   _counter.count_outer_branch(k, l, 0, _len, EXP(w)); return;
      // case TR_O_X:    return;
      // case TR_X_O:    return;
    case TR_O_O:    _counter.count_outer_extend(j, EXP(w)); return;
      // case TR_O_IB:   return;
      // case TR_O_BF: case TR_O_BFL: case TR_O_BFR: return;
    default: return;
    }
  }
  void tsc_add_prob(TrType t, int n, int i, int j, int k, int l, ScoreT w) {
    switch (t) {
    case TR_S_S:
      _mea_prob.p.logadd(P_st, i, j, w);
      _mea_prob.o.logadd(P_st, i+1, w);
      _mea_prob.o.logadd(P_st, j, w);
      return;
    case TR_S_E:
      _mea_prob.p.logadd(P_st, i, j, w);
      _mea_prob.o.logadd(P_st, i+1, w);
      _mea_prob.o.logadd(P_st, j, w);
      return;
      // case TR_M_BF: case TR_M_BFL: case TR_M_BFR: return;
      // case TR_M2_S:   return;
      // case TR_M2_M2:  return;
      // case TR_M1_M2:  return;
      // case TR_M1_MBF: return;
      // case TR_M_M:    return;
      // case TR_M_MBF:  return;
      // case TR_E_H:    return;
      // case TR_E_I:    return;
      // case TR_E_M:    return;
      // case TR_IB_S:   return; 
      // case TR_O_X:    return;
      // case TR_X_O:    return;
    case TR_O_O:    
      _mea_prob.o.logadd(O_st, j, w); 
      return;
      // case TR_O_IB:   return;
      // case TR_O_BF: case TR_O_BFL: case TR_O_BFR: return;
    default: return;
    }
  }
  ScoreT tsc_mea(TrType t, int n, int i, int j, int k, int l) {
    switch (t) {
    case TR_S_S: case TR_S_E:
#ifdef MEA_PROB_CUTOFF
      { // tentative
	const ScoreT& prob = _mea_prob.p.get(P_st, i, j);
	return (_print_prob_cutoff <= prob ? (_prob_scales[P_st] * prob) : 0.0);
      }
#else
      return _prob_scales[P_st] * _mea_prob.p.get(P_st, i, j);
#endif
      // case TR_M_BF: case TR_M_BFL: case TR_M_BFR: return 0.0;
      // case TR_M2_S:   return 0.0;
    case TR_M2_M2:  return _prob_scales[I_st] * _mea_prob.p.get(I_st, j-1, j);
      // case TR_M1_M2:  return 0.0;
      // case TR_M1_MBF: return 0.0;
    case TR_M_M:    return _prob_scales[I_st] * _mea_prob.p.get(I_st, i, i+1);
      // case TR_M_MBF:  return 0.0;
    case TR_E_H:    return _prob_scales[I_st] * _mea_prob.p.get(I_st, i, j);
    case TR_E_I:    return _prob_scales[I_st] * (_mea_prob.p.get(I_st, i, k) 
						 + _mea_prob.p.get(I_st, l, j));
      // case TR_E_M:    return 0.0;
      // case TR_IB_S:   return 0.0;
      // case TR_O_X:    return 0.0;
      // case TR_X_O:    return 0.0;
    case TR_O_O:    return _prob_scales[O_st] * _mea_prob.o.get(O_st, j);
      // should subtract from P_st and I_st
      // case TR_O_IB:   return 0.0;
      // case TR_O_BF: case TR_O_BFL: case TR_O_BFR: return 0.0;
    default: return 0.0;
    }
  }
  ScoreT tsc_mea_nussinov(NusTrType t, int n, int i, int j, int k, int l) {
    switch (t) {
      // case NUS_TR_X:   return 0.0;
    case NUS_TR_P: 
#ifdef MEA_PROB_CUTOFF
      {// tentative
	const ScoreT& prob = _mea_prob.p.get(P_st, i, j);
	return ((_print_prob_cutoff <= prob ? (_prob_scales[P_st] * prob) : 0.0)
#ifdef MEA_NUSSINOV_SUBTRACT_OUTER
		- _prob_scales[O_st] * (_mea_prob.o.get(O_st, i+1)
					+ _mea_prob.o.get(O_st, j))
#endif
		);
      }
#else
      return (_prob_scales[P_st] * _mea_prob.p.get(P_st, i, j)
#ifdef MEA_NUSSINOV_SUBTRACT_OUTER
			   - _prob_scales[O_st] * (_mea_prob.o.get(O_st, i+1)
						   + _mea_prob.o.get(O_st, j))
#endif
			   );
#endif
    case NUS_TR_L: return (_prob_scales[I_st] * _mea_prob.o.get(I_st, i+1)
#ifdef MEA_NUSSINOV_SUBTRACT_OUTER
			   - _prob_scales[O_st] * _mea_prob.o.get(O_st, i+1)
#endif
			   );
    case NUS_TR_R: return (_prob_scales[I_st] * _mea_prob.o.get(I_st, j)
#ifdef MEA_NUSSINOV_SUBTRACT_OUTER
			   - _prob_scales[O_st] * _mea_prob.o.get(O_st, j)
#endif
			   );
      // case NUS_TR_B:   return 0.0;
      // case NUS_TR_I:   return 0.0;
      // case NUS_TR_O_X: return 0.0;
#ifdef MEA_NUSSINOV_SUBTRACT_OUTER	     
#else
    case NUS_TR_O_O: return _prob_scales[O_st] * _mea_prob.o.get(O_st, j);
#endif
      // should subtract from P_st and I_st
      // case NUS_TR_O_P: return 0.0;
      // case NUS_TR_O_B: return 0.0;
    default: return 0.0;
    }
  }
  bool allow_pair(int i, int j) {
    // tentative
    if (!(_allow_non_canonical || Alpha::is_canonical(seq(i+1), seq(j)))) return false;
    if (has_constraint() && !_ct_constr->pairs((i+1), j)) return false;
    return true;
  }
  bool allow_inner_loop(int i, int j) {
    return (!has_constraint() || _constr.get(0, i, j).get(I_st));
  }
  bool allow_outer_loop(int i, int j) {
    return (!has_constraint() || _constr.get(0, i, j).get(O_st));
  }
  bool allow_pair_nus(int i, int j) {
    // tentative
    if (!(_allow_non_canonical || Alpha::is_canonical(seq(i+1), seq(j)))) return false;
    if (has_constraint_nus() && !_ct_constr->pairs((i+1), j)) return false;
    return true;
  }
  bool allow_inner_loop_nus(int i, int j) {
    return (!has_constraint_nus() || _constr.get(0, i, j).get(I_st));
  }
  bool allow_outer_loop_nus(int i, int j) {
    return (!has_constraint_nus() || _constr.get(0, i, j).get(O_st));
  }
  void set_constr(int i, int j) {
    if (!(has_constraint() || has_constraint_nus())) return;

    ConstrNode nd = _constr.get(0, i, j);
    if (i == j) {
      nd.set(I_st, true);
      nd.set(O_st, true);
      _constr.set(0, i, j, nd);
    } else {// i < j
      bool allow_inner = false;
      bool allow_outer = false;
      if (_mea_separate_loop_type) {
	allow_inner = (_constr.get(0, i, (j-1)).get(I_st) && _ct_constr->is_inner_loop(j));
	allow_outer = (_constr.get(0, i, (j-1)).get(O_st) && _ct_constr->is_outer_loop(j));
      } else {
	allow_inner = (_constr.get(0, i, (j-1)).get(I_st) && _ct_constr->is_loop(j));
	allow_outer = (_constr.get(0, i, (j-1)).get(O_st) && _ct_constr->is_loop(j));
      }
      nd.set(I_st, allow_inner);
      nd.set(O_st, allow_outer);
      _constr.set(0, i, j, nd);
    }
  }
  void init_mea_prob(int i, int j) {
    if (i == j) {
      for (int s = 0; s < N_MEA_STATE; s++) {
	_mea_prob.o.set(s, j, NEG_INF());
      }
    }
  }
  void set_mea_prob(int i, int j) {
    if (i == j) {
#ifdef USE_MEA_NUSSINOV
      ;// no-op do not use _mea_prob.p.get(I_st, i, j)
#else
      _mea_prob.p.set(I_st, i, j, 0.0);
#endif
      if (j == 0) return;

      if (_mea_separate_loop_type) {
	ScoreT prob0 = EXP(_mea_prob.o.get(P_st, j));
#ifdef DEBUG
	if (_len+1 <= _width) {
	  ScoreT w = NEG_INF();
	  for (int k = 0; k < j; k++) {
	    LOGADD(w, _mea_prob.p.get(P_st, k, j));
	  }
	  for (int k = j; k <= _len; k++) {
	    LOGADD(w, _mea_prob.p.get(P_st, j-1, k));
	  }
	  Assert(abs(EXP(w) - prob0) < 0.001);
	}
#endif
	ScoreT prob1 = EXP(_mea_prob.o.get(O_st, j));
	ScoreT prob = (1.0 - prob0 - prob1);
	Assert(-1.0e-4 <= prob && prob <= (1.0 + 1.0e-4));
	_mea_prob.o.set(P_st, j, prob0);
	_mea_prob.o.set(O_st, j, prob1);
	_mea_prob.o.set(I_st, j, prob);

      } else {
	ScoreT prob0 = EXP(_mea_prob.o.get(P_st, j));
#ifdef DEBUG
	if (_len+1 <= _width) {
	  ScoreT w = NEG_INF();
	  for (int k = 0; k < j; k++) {
	    LOGADD(w, _mea_prob.p.get(P_st, k, j));
	  }
	  for (int k = j; k <= _len; k++) {
	    LOGADD(w, _mea_prob.p.get(P_st, j-1, k));
	  }
	  Assert(abs(EXP(w) - prob0) < 0.001);
	}
#endif
	ScoreT prob = (1.0 - prob0);
	Assert(-1.0e-4 <= prob && prob <= (1.0 + 1.0e-4));
	_mea_prob.o.set(P_st, j, prob0);
	_mea_prob.o.set(I_st, j, prob);
	_mea_prob.o.set(O_st, j, prob);
      }
      if (prints_prob()) {
	print_prob(I_st, i, j, _mea_prob.o.get(I_st, j));
	print_prob(O_st, i, j, _mea_prob.o.get(O_st, j));
      }
    } else {// i < j
      ScoreT prob0 = EXP(_mea_prob.p.get(P_st, i, j));
      _mea_prob.p.set(P_st, i, j, prob0);
      if (prints_prob()) {
	print_prob(P_st, i, j, prob0);
      }
#ifdef USE_MEA_NUSSINOV
      ;// no-op do not use _mea_prob.p.get(I_st, i, j)
#else
      ScoreT prob = (_mea_prob.p.get(I_st, i, j-1) + _mea_prob.o.get(I_st, j));
      _mea_prob.p.set(I_st, i, j, prob);
#endif
    }
  }
  void freq_move_nw(Freq& freq, int i, int j) {
    if (i+2 <= j) {
      Alpha::CodeT c1 = seq(i+1);
      Alpha::CodeT c2 = seq(i+2);
      Alpha::CodeT c12 = Alpha::pcode(c1, c2);
      freq.p[c12] += 1;
      freq.s[c1] += 1;
    } else if (i < j) {
      Alpha::CodeT c1 = seq(i+1);
      freq.s[c1] += 1;
    }
#ifdef DEBUG
    const Freq& freq1 = compute_freq_range(i, j);
    if (!freq.equal(freq1)) {
      cout << "[ " << i << ", " << j << " ]\n";
      cout << "freq: \n" << freq.to_s()
	   << "freq1: \n" << freq1.to_s() << endl;
      Die("freq is inconsistent");
    }
#endif
  }
  void freq_move_ne(Freq& freq, int i, int j) {
    if (i <= j-2) {
      Alpha::CodeT c1 = seq(j-1);
      Alpha::CodeT c2 = seq(j);
      Alpha::CodeT c12 = Alpha::pcode(c1, c2);
      freq.p[c12] += 1;
      freq.s[c2] += 1;
    } else if (i < j) {
      Alpha::CodeT c2 = seq(j);
      freq.s[c2] += 1;
    }
#ifdef DEBUG
    const Freq& freq1 = compute_freq_range(i, j);
    if (!freq.equal(freq1)) {
      cout << "[ " << i << ", " << j << " ]\n";
      cout << "freq: \n" << freq.to_s()
	   << "freq1: \n" << freq1.to_s() << endl;
      Die("freq is inconsistent");
    }
#endif
  }
  void freq_move_sw(Freq& freq, int i, int j) {
    if (j < min(i+_width-1, _len)) {
      if (i < j) {
	Alpha::CodeT c1 = seq(j);
	Alpha::CodeT c2 = seq(j+1);
	Alpha::CodeT c12 = Alpha::pcode(c1, c2);
	freq.p[c12] -= 1;
	freq.s[c2] -= 1;
      } else {
	Alpha::CodeT c2 = seq(j+1);
	freq.s[c2] -= 1;
      }
    }
#ifdef DEBUG
    const Freq& freq1 = compute_freq_range(i, j);
    if (!freq.equal(freq1)) {
      cout << "[ " << i << ", " << j << " ]\n";
      cout << "freq: \n" << freq.to_s()
	   << "freq1: \n" << freq1.to_s() << endl;
      Die("freq is inconsistent");
    }
#endif
  }
  void freq_move_se(Freq& freq, int i, int j) {
    if (max(0, j-_width+1) < i) {
      if (i < j) {
	Alpha::CodeT c1 = seq(i);
	Alpha::CodeT c2 = seq(i+1);
	Alpha::CodeT c12 = Alpha::pcode(c1, c2);
	freq.p[c12] -= 1;
	freq.s[c1] -= 1;
      } else {
	Alpha::CodeT c1 = seq(i);
	freq.s[c1] -= 1;
      }
    }
#ifdef DEBUG
    const Freq& freq1 = compute_freq_range(i, j);
    if (!freq.equal(freq1)) {
      cout << "[ " << i << ", " << j << " ]\n";
      cout << "freq: \n" << freq.to_s()
	   << "freq1: \n" << freq1.to_s() << endl;
      Die("freq is inconsistent");
    }
#endif
  }
  Freq compute_freq_range(int i, int j) {
    Freq freq;
    freq.clean();
    for (int k = (i+1); k <= j; k++) {
      Alpha::CodeT ck = seq(k);
      freq.s[ck] += 1;
      if ((i+1) < k) {
	Alpha::CodeT ck1 = seq(k-1);
	Alpha::CodeT pcode = Alpha::pcode(ck1, ck);
	freq.p[pcode] += 1;
      }
    }
    return freq;
  }
  void curr_sc_move_nw(CurrSC& curr_sc) {
    // curr_sc[DST_E] = curr_sc[DST_NE];
    // curr_sc[DST_NE] = curr_sc[DST_N];
    // curr_sc[DST_N].fill(NEG_INF());

    curr_sc[DST_SE] = curr_sc[DST_C];
    // curr_sc[DST_C] = curr_sc[DST_NW];
    // curr_sc[DST_NW].fill(NEG_INF());

    curr_sc[DST_S] = curr_sc[DST_SW];
    curr_sc[DST_SW] = curr_sc[DST_W];
    // curr_sc[DST_W].fill(NEG_INF());
  }
  void curr_sc_move_ne(CurrSC& curr_sc) {
    // curr_sc[DST_W] = curr_sc[DST_NW];
    // curr_sc[DST_NW] = curr_sc[DST_N];
    // curr_sc[DST_N].fill(NEG_INF());

    curr_sc[DST_SW] = curr_sc[DST_C];
    // curr_sc[DST_C] = curr_sc[DST_NE];
    // curr_sc[DST_NE].fill(NEG_INF());

    curr_sc[DST_S] = curr_sc[DST_SE];
    curr_sc[DST_SE] = curr_sc[DST_E];
    // curr_sc[DST_E].fill(NEG_INF());
  }
  void curr_sc_move_sw(CurrSC& curr_sc) {
    curr_sc[DST_N] = curr_sc[DST_NW];
    curr_sc[DST_NW] = curr_sc[DST_W];
    // curr_sc[DST_W].fill(NEG_INF());

    curr_sc[DST_NE] = curr_sc[DST_C];
    // curr_sc[DST_C] = curr_sc[DST_SW];
    // curr_sc[DST_SW].fill(NEG_INF());

    // curr_sc[DST_E] = curr_sc[DST_SE];
    // curr_sc[DST_SE] = curr_sc[DST_S];
    // curr_sc[DST_S].fill(NEG_INF());
  }
  void curr_sc_move_se(CurrSC& curr_sc) {
    curr_sc[DST_N] = curr_sc[DST_NE];
    curr_sc[DST_NE] = curr_sc[DST_E];
    // curr_sc[DST_E].fill(NEG_INF());

    curr_sc[DST_NW] = curr_sc[DST_C];
    // curr_sc[DST_C] = curr_sc[DST_SE];
    // curr_sc[DST_SE].fill(NEG_INF());

    // curr_sc[DST_W] = curr_sc[DST_SW];
    // curr_sc[DST_SW] = curr_sc[DST_S];
    // curr_sc[DST_S].fill(NEG_INF());
  }
private:
  string _tag;
  Command _command;
  int _max_pair_dist;
  ScoreT _mea_inner_loop_coeff;
  ScoreT _mea_outer_loop_coeff;
  bool _mea_separate_loop_type;
  SeqT* _seq;
  int _len;
  int _width;
  int _period;
  int _block_size;
  bool _is_triangular;

  ScoreT _partition_coeff;
  ScoreT _dp_score;

  CurrSC _curr_sc;
  CurrCode _curr_code;

  StructT _ct_pred;
  StructT* _ct_constr;
  bool _has_constraint;
  bool _has_constraint_nus;
  bool _constraint;
  Constr _constr;
  bool _allow_non_canonical;
  bool _print_prob;
  bool _print_loop_prob;
  ScoreT _print_prob_cutoff;
  ostream* _f_prob;
  ostream* _f_struct;
  
  Inside _inside;
  Outside _outside;
  CYK _cyk;
  Tau _tau;
  NusTau _nus_tau;
  MEAProb _mea_prob;
  ProbScales _prob_scales;
  Freq _freq0;
  Freq _freq;
  
  FeatureCounter _param;
  FeatureCounter _counter;
  Array<ScoreT> _count_vector;
};
}
#endif
