#' Process .blk files from CodonW
#'
#' This function processes .blk files from the CodonW software.
#' It reads the files, cleans the data, and returns a combined data frame.
#'
#' @param folder_path The path to the folder containing .blk files to process.
#'
#' @return A combined data frame of all processed .blk files with columns:
#'   \item{AA}{Amino acid abbreviation}
#'   \item{Codon}{DNA codon sequence}
#'   \item{RSCU}{Relative Synonymous Codon Usage value}
#'   \item{Fill}{Position index within amino acid group}
#'   \item{Species}{Species name derived from file name}
#'
#' @examples
#' # Using example data
#' example_dir <- system.file("extdata", "codonw", package = "ggmRSCU")
#' result <- read_codonw(example_dir)
#' head(result)
#'
#' @export
read_codonw <- function(folder_path) {
  suppressMessages(suppressPackageStartupMessages(requireNamespace("dplyr")))
  requireNamespace("tidyr")
  requireNamespace("purrr")

  blk_files <- list_files_with_extension(folder_path, ".blk")

  all_results <- list()

  for (file_path in blk_files) {
    data <- readLines(file_path, warn = FALSE)

    data_cleaned <- clean_data_from_txt(data)
    result_cleaned <- prepare_result(data_cleaned)

    species_name <- tools::file_path_sans_ext(basename(file_path))

    result_cleaned$Species <- species_name

    all_results[[species_name]] <- result_cleaned
  }

  combined_data <- dplyr::bind_rows(all_results)

  return(combined_data)
}

list_files_with_extension <- function(folder_path, extension) {
  files <- list.files(
    path = folder_path, pattern = paste0("\\", extension, "$"),
    full.names = TRUE
  )

  if (length(files) == 0) {
    stop(paste("No", extension, "files found in the specified folder."))
  }

  return(files)
}

clean_data_from_txt <- function(data) {
  if (length(data) > 0) {
    data_clean <- gsub("\\s+", " ", data[-length(data)])
  } else {
    data_clean <- character(0)
  }

  data_split <- strsplit(data_clean, " ")
  max_columns <- 16

  data_matrix <- do.call(rbind, lapply(data_split, function(x) {
    length(x) <- max_columns
    x
  }))

  if (is.null(data_matrix)) {
    return(matrix(nrow = 0, ncol = max_columns))
  }

  data_matrix_clean <- data_matrix[
    apply(data_matrix, 1, function(x) !all(is.na(x))),
  ]

  if (is.vector(data_matrix_clean)) {
    data_matrix_clean <- t(as.matrix(data_matrix_clean))
  }

  data_filled <- fill_empty_cells(data_matrix_clean)

  data_filled_moved <- move_stars_to_aa_columns(data_filled)
  data_filled_moved <- fill_stars_with_previous_aa(data_filled_moved)

  return(data_filled_moved)
}

fill_empty_cells <- function(data_matrix) {
  data_filled <- data_matrix
  data_filled[data_filled == ""] <- "*"
  data_filled[is.na(data_filled)] <- "*"
  return(data_filled)
}

move_stars_to_aa_columns <- function(data_filled) {
  t(apply(data_filled, 1, function(row) {
    target_columns <- c(1, 5, 9, 13)
    star_positions <- which(row == "*")

    for (star_pos in star_positions) {
      if (!(star_pos %in% target_columns)) {
        for (i in target_columns) {
          if (row[i] != "*" && i + 1 <= length(row) &&
                grepl("^\\d+$", row[i + 1])) {
            row[(length(row)):(i + 1)] <- row[(length(row) - 1):(i)]
            row[i] <- "*"
            break
          }
        }
      }
    }
    return(row)
  }))
}

fill_stars_with_previous_aa <- function(data_filled) {
  if (nrow(data_filled) < 2) return(data_filled)

  for (i in 2:nrow(data_filled)) {
    for (j in seq(1, ncol(data_filled), by = 4)) {
      if (data_filled[i, j] == "*") {
        data_filled[i, j] <- data_filled[i - 1, j]
      }
    }
  }
  return(data_filled)
}

prepare_result <- function(data_filled_moved) {
  if (nrow(data_filled_moved) == 0) {
    return(data.frame(AA = character(), Codon = character(),
                      RSCU = numeric(), Fill = integer()))
  }

  data_moved_df <- as.data.frame(data_filled_moved, stringsAsFactors = FALSE)
  colnames(data_moved_df) <- c(
    "AA", "Codon", "Count", "RSCU",
    "AA.1", "Codon.1", "Count.1", "RSCU.1",
    "AA.2", "Codon.2", "Count.2", "RSCU.2",
    "AA.3", "Codon.3", "Count.3", "RSCU.3"
  )

  data_moved_df_cleaned <- data_moved_df[
    , !grepl("Count", colnames(data_moved_df))
  ]

  result <- combine_rows(data_moved_df_cleaned)

  result_cleaned <- result[!result$AA %in% "TER", ]
  return(result_cleaned)
}

combine_rows <- function(data_moved_df_cleaned) {
  result <- data.frame(AA = character(0),
                       Codon = character(0),
                       RSCU = numeric(0))

  for (i in seq_len(nrow(data_moved_df_cleaned))) {
    for (col_suffix in c("", ".1", ".2", ".3")) {
      aa_col <- paste0("AA", col_suffix)
      codon_col <- paste0("Codon", col_suffix)
      rscu_col <- paste0("RSCU", col_suffix)

      if (aa_col %in% names(data_moved_df_cleaned) &&
          codon_col %in% names(data_moved_df_cleaned) &&
          rscu_col %in% names(data_moved_df_cleaned)) {

        aa_value <- data_moved_df_cleaned[[aa_col]][i]
        codon_value <- data_moved_df_cleaned[[codon_col]][i]
        rscu_value <- as.numeric(data_moved_df_cleaned[[rscu_col]][i])

        if (!is.na(aa_value) && !is.na(codon_value) && !is.na(rscu_value)) {
          result <- rbind(result, data.frame(
            AA = aa_value,
            Codon = codon_value,
            RSCU = rscu_value
          ))
        }
      }
    }
  }

  if (nrow(result) > 0) {
    result$Fill <- ave(seq_along(result$AA), result$AA, FUN = seq_along)
  } else {
    result$Fill <- integer(0)
  }

  return(result)
}
