/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.rules.ngrams;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.languagetool.AnalyzedSentence;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.ngrams.GoogleToken;
import org.languagetool.rules.ngrams.Probability;
import org.languagetool.tokenizers.Tokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class LanguageModelUtils {
    private static final Logger logger = LoggerFactory.getLogger(LanguageModelUtils.class);

    private LanguageModelUtils() {
    }

    static Tokenizer getGoogleStyleWordTokenizer(Language language) {
        return language.getWordTokenizer();
    }

    static List<String> getContext(GoogleToken token, List<GoogleToken> tokens, String newToken, int toLeft, int toRight) {
        return LanguageModelUtils.getContext(token, tokens, Collections.singletonList(new GoogleToken(newToken, 0, newToken.length())), toLeft, toRight);
    }

    static List<String> getContext(GoogleToken token, List<GoogleToken> tokens, List<GoogleToken> newTokens, int toLeft, int toRight) {
        List<GoogleToken> result2 = LanguageModelUtils.getContext(token, tokens, newTokens, toLeft, toRight, GoogleToken::isWhitespace, new GoogleToken(".", 0, 0));
        return result2.stream().map(t -> t.token).collect(Collectors.toList());
    }

    public static <T> List<T> getContext(T token, List<T> tokens, List<T> newTokens, int toLeft, int toRight, Predicate<T> isWhitespace, T endToken) {
        int pos = tokens.indexOf(token);
        if (pos == -1) {
            throw new RuntimeException(String.format("Token not found: '%s' in tokens %s", token, tokens));
        }
        ArrayList<T> result2 = new ArrayList<T>();
        int i2 = 1;
        int added = 0;
        while (added < toLeft) {
            if (pos - i2 < 0) {
                result2.clear();
                result2.addAll(newTokens);
                for (int j = pos - 1; j >= 0; --j) {
                    result2.add(0, tokens.get(j));
                }
                return result2;
            }
            if (!isWhitespace.test(tokens.get(pos - i2))) {
                result2.add(0, tokens.get(pos - i2));
                ++added;
            }
            ++i2;
        }
        result2.addAll(newTokens);
        i2 = 1;
        added = 0;
        while (added < toRight) {
            if (pos + i2 >= tokens.size()) {
                result2.add(endToken);
                ++added;
            } else if (!isWhitespace.test(tokens.get(pos + i2))) {
                result2.add(tokens.get(pos + i2));
                ++added;
            }
            ++i2;
        }
        return result2;
    }

    public static double get3gramProbabilityFor(Language lang, LanguageModel lm, int position, AnalyzedSentence sentence, String candidate) {
        Tokenizer tokenizer = LanguageModelUtils.getGoogleStyleWordTokenizer(lang);
        List<GoogleToken> tokens = GoogleToken.getGoogleTokens(sentence, true, tokenizer);
        Optional<GoogleToken> token = tokens.stream().filter(t -> t.startPos == position && !"_START_".equals(t.token)).findFirst();
        if (!token.isPresent()) {
            logger.warn(String.format("Could not find matching Google token in tokenizations '%s' / '%s'", sentence.getText(), tokens));
            return 0.0;
        }
        return LanguageModelUtils.get3gramProbabilityFor(lang, lm, token.get(), tokens, candidate);
    }

    public static double get4gramProbabilityFor(Language lang, LanguageModel lm, int position, AnalyzedSentence sentence, String candidate) {
        Tokenizer tokenizer = LanguageModelUtils.getGoogleStyleWordTokenizer(lang);
        List<GoogleToken> tokens = GoogleToken.getGoogleTokens(sentence, true, tokenizer);
        Optional<GoogleToken> token = tokens.stream().filter(t -> t.startPos == position && !"_START_".equals(t.token)).findFirst();
        if (!token.isPresent()) {
            logger.warn(String.format("Could not find matching Google token in tokenizations '%s' / '%s'", sentence.getText(), tokens));
            return 0.0;
        }
        return LanguageModelUtils.get4gramProbabilityFor(lang, lm, token.get(), tokens, candidate);
    }

    static double get3gramProbabilityFor(Language lang, LanguageModel lm, GoogleToken token, List<GoogleToken> tokens, String term) {
        Probability ngram3Right;
        Probability ngram3Middle;
        Probability ngram3Left;
        Tokenizer tokenizer = LanguageModelUtils.getGoogleStyleWordTokenizer(lang);
        List<GoogleToken> newTokens = GoogleToken.getGoogleTokens(term, false, tokenizer);
        if (newTokens.size() == 1) {
            List<String> leftContext = LanguageModelUtils.getContext(token, tokens, term, 0, 2);
            ngram3Left = lm.getPseudoProbability(leftContext);
            logger.trace(String.format("Left  : %.90f %s\n", ngram3Left.getProb(), Arrays.asList(leftContext)));
            List<String> middleContext = LanguageModelUtils.getContext(token, tokens, term, 1, 1);
            ngram3Middle = lm.getPseudoProbability(middleContext);
            logger.trace(String.format("Middle: %.90f %s\n", ngram3Middle.getProb(), Arrays.asList(middleContext)));
            List<String> rightContext = LanguageModelUtils.getContext(token, tokens, term, 2, 0);
            ngram3Right = lm.getPseudoProbability(rightContext);
            logger.trace(String.format("Right : %.90f %s\n", ngram3Right.getProb(), Arrays.asList(rightContext)));
        } else if (newTokens.size() == 2) {
            ngram3Left = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 0, 1));
            ngram3Right = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 1, 0));
            ngram3Middle = new Probability((ngram3Left.getProb() + ngram3Right.getProb()) / 2.0, 1.0f);
        } else {
            logger.warn("Words that consists of more than 2 tokens (according to Google tokenization) are not supported yet: " + term + " -> " + newTokens);
            return 0.0;
        }
        if (ngram3Left.getCoverage() < 0.5f && ngram3Middle.getCoverage() < 0.5f && ngram3Right.getCoverage() < 0.5f) {
            logger.trace(String.format("  Min coverage of %.2f not reached: %.2f, %.2f, %.2f, assuming p=0\n", Float.valueOf(0.5f), Float.valueOf(ngram3Left.getCoverage()), Float.valueOf(ngram3Middle.getCoverage()), Float.valueOf(ngram3Right.getCoverage())));
            return 0.0;
        }
        return ngram3Left.getProb() * ngram3Middle.getProb() * ngram3Right.getProb();
    }

    static double get4gramProbabilityFor(Language lang, LanguageModel lm, GoogleToken token, List<GoogleToken> tokens, String term) {
        Probability ngram4Right;
        Probability ngram4MiddleRight;
        Probability ngram4MiddleLeft;
        Probability ngram4Left;
        Tokenizer tokenizer = LanguageModelUtils.getGoogleStyleWordTokenizer(lang);
        List<GoogleToken> newTokens = GoogleToken.getGoogleTokens(term, false, tokenizer);
        if (newTokens.size() == 1) {
            ngram4Left = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 0, 3));
            ngram4MiddleLeft = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 2, 1));
            ngram4MiddleRight = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 1, 2));
            ngram4Right = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 3, 0));
        } else if (newTokens.size() == 2) {
            ngram4Left = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 0, 2));
            ngram4MiddleRight = ngram4MiddleLeft = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 1, 1));
            ngram4Right = lm.getPseudoProbability(LanguageModelUtils.getContext(token, tokens, newTokens, 2, 0));
        } else {
            logger.warn("Words that consists of more than 2 tokens (according to Google tokenization) are not supported yet: " + term + " -> " + newTokens);
            return 0.0;
        }
        if (ngram4Left.getCoverage() < 0.5f && ngram4MiddleLeft.getCoverage() < 0.5f && ngram4MiddleRight.getCoverage() < 0.5f && ngram4Right.getCoverage() < 0.5f) {
            logger.trace(String.format("  Min coverage of %.2f not reached: %.2f, %.2f, %.2f, %.2f, assuming p=0\n", Float.valueOf(0.5f), Float.valueOf(ngram4Left.getCoverage()), Float.valueOf(ngram4MiddleLeft.getCoverage()), Float.valueOf(ngram4MiddleRight.getCoverage()), Float.valueOf(ngram4Right.getCoverage())));
            return 0.0;
        }
        return Math.exp(ngram4Left.getLogProb() + ngram4MiddleLeft.getLogProb() + ngram4MiddleRight.getLogProb() + ngram4Right.getLogProb());
    }
}

