import {groupBy, capitalize, sum, isEmpty} from "lodash"
import {stemmer} from "./stemmer";
const stopWords = new Set(require("./stopWords.json"))
const acronyms = new Set(require("./acronyms.json").map(txt=>txt.toLowerCase()))
const ambiguousAcronyms = new Set(["US", "ICE"])

const overrides = new Set([
    "U.S.", // most instances are probably "us"?
    "Jay-Z", "McConnell",
    "DeVito",
    ...[...acronyms].map(txt=>(txt.split("").join(".") + "."))
])
const overridesLookup = groupBy([...overrides], txt=>txt.toLowerCase().replace(/[\.\-]/g, ""))


export const naiveTitleCase= (str) => {

    // return str
    let lines = str.toLowerCase().split(/\n/)

    lines = lines.map(line => {
        let words = line.split(/\s+/)
        let output = words.map((word, index) => {
            if (isAcronym(word)) {
                return word.toUpperCase()
            }

            // Capitalize first word
            if (index === 0) {
                return capitalize(word)
            }

            // Lowercase stop words
            if (isStopWord(word)) {
                return word.toLowerCase()
            }

            // Capitalize everything else
            return capitalize(word)
        })

        return output.join(" ")
    })

    return lines.join("\n")
}

export const tokenize = (txt) => {
    return txt
        .toLowerCase()
        .replace(/\./g, "") // acronyms
        .split(/[^a-z]+/g)
        .filter(token => !isEmpty(token));
}

export const isStopWord = (word) => {
    return stopWords.has(word)
}

//misleading name?
export const isAcronym = (word) => {
    return acronyms.has(word) || word.match(/^([a-z]\.)+$/)
}

export const stem = stemmer

const charWidth = {
    "0": 6.361,
    "1": 3.633,
    "2": 5.512,
    "3": 5.564,
    "4": 5.37,
    "5": 5.505,
    "6": 5.849,
    "7": 4.53,
    "8": 5.662,
    "9": 5.915,
    "_": 7.289,
    "t": 3.877,
    "l": 2.75,
    "W": 9.917,
    "s": 4.601,
    "c": 5.335,
    ">": 5.39,
    "h": 6.022,
    "j": 2.172,
    "]": 3.48,
    "f": 3.951,
    "o": 5.845,
    "J": 3.759,
    "V": 6.643,
    "-": 4.8,
    "\`": 2.073,
    "C": 7.057,
    "E": 6.147,
    ")": 2.914,
    "M": 9.227,
    "r": 4.395,
    "b": 5.939,
    "|": 2.401,
    "Z": 6.109,
    "D": 7.39,
    "u": 5.977,
    "m": 8.949,
    "{": 3.294,
    "p": 6.07,
    "k": 5.501,
    "a": 5.146,
    "F": 5.484,
    "w": 8.057,
    "#": 8.32,
    "g": 5.702,
    "'": 2.141,
    "n": 6.13,
    "L": 5.642,
    "z": 5.15,
    "q": 5.75,
    "@": 7.939,
    "y": 5.298,
    "S": 5.64,
    "i": 2.867,
    "x": 5.345,
    "d": 6.02,
    "T": 6.243,
    "[": 3.623,
    "K": 6.725,
    "<": 5.275,
    "H": 7.877,
    ":": 2.547,
    "=": 5.317,
    "N": 7.498,
    "P": 5.989,
    "!": 2.728,
    "O": 7.835,
    "A": 6.402,
    ",": 2.416,
    "Y": 6.318,
    "v": 5.062,
    "$": 5.413,
    "e": 5.397,
    ";": 2.737,
    "&": 7.176,
    "+": 5.307,
    "G": 7.534,
    "X": 6.559,
    "B": 6.132,
    "(": 3.012,
    "}": 3.392,
    "I": 3.259,
    "R": 6.199,
    "Q": 7.974,
    "/": 6.525,
    "?": 4.792,
    "*": 5.183,
    "~": 5.367,
    ".": 2.377,
    "^": 8.229,
    "U": 7.403,
    "%": 8.652,
    "\\": 3.405,
    " ": 4.8
}

export const stringWidth = (txt) => {
    return sum(txt.split("").map(c => charWidth[c]))
}