# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.

# Import of a quality or coverage file in fasta like style but single quality 
# values are separated by spaces
# Code inspired by the function "read.fasta" from the R package "seqinr"
#
# Coverage example:
# >Contig00001
# 31 31 31 37 35 37 37 37 39 39 39 32 35 10 34 36 38 36 25 31 32 37 33 32 36 36 
#
read.coverage <- function(file){
    
  if (file.exists(file)){
    if (DEBUGGING){ cat(paste("read file:", file, "\n")) }
    
    lines <- readLines(file)
    
    ind <- which(substr(lines, 1L, 1L) == ">")
    nseq <- length(ind)
    if (nseq == 0) {
      stop("no line starting with a > character found")
    }
    start <- ind + 1
    end <- ind - 1
    end <- c(end[-1], length(lines))
        
    coverage <- lapply(seq_len(nseq), function(i) { 
      as.integer(do.call(c, strsplit(lines[start[i]:end[i]], " "))) })
    
    titles <- lapply(seq_len(nseq), function(i) {
      firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
      substr(firstword, 2, nchar(firstword)) })
    
    names(coverage) <- titles
    
#     workaround for zero coverage positions:
#     pseude count, increase all by 1
    coverage <- lapply(coverage, "+", 1)
    
    return(coverage)
    
  } else {
    cat(paste("can not read file:", file, "\n"))
  }
}


# import of files with SuRankCo features, as exported from the feature module
readSurankcoFeatures <- function(files){
  features <- vector(mode="list", length=length(files))
  for (i in 1:length(files)){
    features[[i]] <- read.table(file=files[i], sep="\t", dec = ".", header=TRUE,
                                colClasses=get.colClasses(files[i], 3))
  }
  return(features)
}


# import of files with SuRankCo scores, as exported from the score module
readSurankcoScores <- function(files){
  scores <- vector(mode="list", length=length(files))
  for (i in 1:length(files)){
    scores[[i]] <- read.table(file=files[i], sep="\t", dec = ".", header=TRUE,
                              colClasses=get.colClasses(files[i], 3))
  }
  return(scores)
}


# predefine the column classes for read.table imports
# the first char.num columns will be character and the rest numeric
get.colClasses <- function(filename, char.num=1){
  feature.names <- scan(filename, what="character", nlines=1, quiet=TRUE)
  colClasses <- c(rep("character", char.num), 
                  rep("numeric", length(feature.names)-char.num))
  names(colClasses) <- feature.names
  if (DEBUGGING){ print(colClasses) }
  return(colClasses)
}