/*
 * seq_file.hpp
 */
#ifndef SEQ_FILE_HPP
#define SEQ_FILE_HPP
#include "./array_file.hpp"
#include "./alpha.hpp"
#include "./ct_file.hpp"

namespace RFOLD {
class SeqFile {// 1 based indexing, read only
public:
  typedef char T;
  typedef T              value_type;
  typedef T*             iterator;
  typedef const T*       const_iterator;
  typedef T&             reference;
  typedef const T&       const_reference;
  typedef std::size_t    size_type;
  typedef std::ptrdiff_t difference_type;
  enum {DEFAULT_NCODE = Alpha::N_N};
  typedef CTFile CT;

  SeqFile(const std::string& tag) 
    : _data(tag + ".seq", 1000, 10, DEFAULT_NCODE) , _ct(tag + ".ct") {}
  ~SeqFile() {}
  bool empty() const {return (_data.size() <= 1);} // Alpha::N_N is padded at index = 0
  std::size_t size() const {return (_data.size() - 1);} // Alpha::N_N is padded at index = 0
  std::string name() const {return _name;}
  bool has_ct() const {return !_ct.empty();}
  CT& ct() {Check(has_ct()); return _ct;}
  void set_tag(const std::string& tag) {
    _data.set_file(tag + ".seq");
    _data.set_file(tag + ".ct");
  }
  void set_margin(difference_type margin) {
    _data.set_margin(margin);
    _ct.set_margin(margin);
  }
  void set_range_size(difference_type range_size) {
    _data.set_range_size(range_size);
    _ct.set_range_size(range_size);
  }
  reference operator[](difference_type i) {return _data[i];}
  const_reference operator[](difference_type i) const {return _data[i];}
  std::string to_fasta() {
    if (empty()) return "";

    std::ostringstream oss;
    oss << ">" << _name << "\n";
    difference_type n = size();
    difference_type width = 70;
    difference_type range_size = _data.range_size();
    for (difference_type i = 1; i <= n; i++) {// 1 based
      if (i % range_size == 1) {
	difference_type e = std::min((i + range_size), (n + 1));
	load_range(i, e);
      }
      oss << Alpha::ncode_to_str((*this)[i]);
      if ((i % width == 0) || i == n) {
	oss << "\n";
      }
    }
    return oss.str();
  }
  string to_s() {return to_fasta();}
  void print() {std::cout << to_s() << std::flush;}
  void get_range(difference_type& b, difference_type& e) {
    return _data.get_range(b, e);}
  difference_type range_begin() const {return _data.range_begin();}
  difference_type range_end() const {return _data.range_end();}
  void load_range(difference_type b, difference_type e) {
    Assert(1 <= b); return _data.load_range(b, e);}
  void remove_non_canonical_pairs() {
    if (!has_ct()) return;

    for (difference_type i = 1; i <= (difference_type)size(); i++) {
      _ct.load_range_if_needed(i, i+1);
      if (_ct.is_left_stem(i)) {
	difference_type j = _ct.pair_pos(i);
	_data.load_range_if_needed(i, i+1);
	Alpha::CodeT ci = _data[i];
	_data.load_range_if_needed(j, j+1);
	Alpha::CodeT cj = _data[j];
	if (!Alpha::is_canonical(ci, cj)) {
	  _ct.unpair(i);
	}
      }
    }
  }
  bool read_fasta(std::istream& fi) {
    _name = "";
    _data.clear();
    _ct.clear();
    bool has_more = false;
    bool prev_is_nl = true;
    char c = 0;
    while (fi.get(c)) {
      if (c == '>' && prev_is_nl) {
        has_more = true;
        break;
      }
      prev_is_nl = (c == '\n');
    }
    if (!has_more) return false;

    _name = read_name(fi);
    
    has_more = false;
    prev_is_nl = true;
    _data.push_back(DEFAULT_NCODE); // padding. 0 based -> 1 based
    while (fi.get(c)) {
      if (c == '>' && prev_is_nl) {
	has_more = true;
        break;
      }
      prev_is_nl = (c == '\n');
      if (SPACE_CHARS().find(c) != std::string::npos) continue;

      char c1 = Alpha::ncode(c);
      Check(!Alpha::is_bad_ncode(c1), "bad char: %c in sequence: %s", c, _name.c_str());
      _data.push_back(c1);
    }
    _data.flush();
    Check(!empty(), "no sequence for %s", _name.c_str());

    if (has_more) {
      fi.unget();
      std::istream::pos_type pos = fi.tellg();
      fi.get(c);
      std::string next_name = read_name(fi);
      if (next_name != ss_tag()) {
	fi.seekg(pos);
      } else {// read secondary structure
	_ct.resize(size());
	_ct.read_structure(fi);
	// cout << _ct.sscons() << endl;
      }
    }
    return true;
  }

private:
  ArrayFile<T> _data;
  CTFile _ct;
  std::string _name;

  SeqFile();
  SeqFile(const SeqFile& other);
  static const string& SPACE_CHARS() {static const string s(" \f\n\r\t\v"); return s;}

  std::string read_name(std::istream& fi) {
    std::string buf = "";
    char c = 0;
    while (fi.get(c)) {
      if (c == '\n') break;
      buf.push_back(c);
    }
    std::string::size_type b = buf.find_first_not_of(SPACE_CHARS());
    Check(b != std::string::npos, "no name");
    std::string::size_type e = buf.find_last_not_of(SPACE_CHARS());
    return buf.substr(b, (e - b + 1));
  }
  std::string ss_tag() const {return "structure";}
};
}
#endif
