/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.encoder;

import ai.grazie.nlp.encoder.BertTextEncoder;
import ai.grazie.nlp.encoder.PreTrainedTextEncoder;
import ai.grazie.nlp.encoder.WordPiece;
import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.word.BasicWordTokenizer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.ranges.RangesKt;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u00008\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010$\n\u0002\u0010\u000e\n\u0002\u0010\b\n\u0000\n\u0002\u0010\u000b\n\u0002\b\b\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0010 \n\u0002\b\u0003\b\u0016\u0018\u00002\u00020\u0001B]\u0012\u0012\u0010\u0002\u001a\u000e\u0012\u0004\u0012\u00020\u0004\u0012\u0004\u0012\u00020\u00050\u0003\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\b\b\u0002\u0010\b\u001a\u00020\u0007\u0012\b\b\u0002\u0010\t\u001a\u00020\u0004\u0012\b\b\u0002\u0010\n\u001a\u00020\u0004\u0012\b\b\u0002\u0010\u000b\u001a\u00020\u0004\u0012\b\b\u0002\u0010\f\u001a\u00020\u0004\u0012\b\b\u0002\u0010\r\u001a\u00020\u0004\u00a2\u0006\u0002\u0010\u000eJ\u0010\u0010\u0017\u001a\u00020\u00042\u0006\u0010\u0018\u001a\u00020\u0005H\u0014J\u0010\u0010\u0019\u001a\u00020\u00052\u0006\u0010\u001a\u001a\u00020\u0004H\u0014J\u0016\u0010\u001b\u001a\u00020\u00042\f\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\u00040\u001dH\u0016J\u0016\u0010\u001e\u001a\b\u0012\u0004\u0012\u00020\u00040\u001d2\u0006\u0010\u001f\u001a\u00020\u0004H\u0014R\u000e\u0010\u000f\u001a\u00020\u0010X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\b\u001a\u00020\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0011\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00040\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\u0012\u001a\u00020\u0005X\u0096\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0013\u0010\u0014R\u001a\u0010\u0002\u001a\u000e\u0012\u0004\u0012\u00020\u0004\u0012\u0004\u0012\u00020\u00050\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0015\u001a\u00020\u0016X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006 "}, d2={"Lai/grazie/nlp/encoder/BertTextEncoder;", "Lai/grazie/nlp/encoder/PreTrainedTextEncoder;", "vocabulary", "", "", "", "toLowerCase", "", "doBasicTokenize", "unkToken", "sepToken", "padToken", "clsToken", "maskToken", "(Ljava/util/Map;ZZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V", "basicWordTokenizer", "Lai/grazie/nlp/tokenizer/word/BasicWordTokenizer;", "idsToToken", "vocabSize", "getVocabSize", "()I", "wordPieceTokenizer", "Lai/grazie/nlp/encoder/WordPiece;", "convertIdToToken", "id", "convertTokenToId", "token", "decodeFromTokens", "tokens", "", "tokenizeText", "text", "nlp-encoder-engine"})
@SourceDebugExtension(value={"SMAP\nBertTextEncoder.kt\nKotlin\n*S Kotlin\n*F\n+ 1 BertTextEncoder.kt\nai/grazie/nlp/encoder/BertTextEncoder\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,54:1\n1179#2,2:55\n1253#2,4:57\n*S KotlinDebug\n*F\n+ 1 BertTextEncoder.kt\nai/grazie/nlp/encoder/BertTextEncoder\n*L\n27#1:55,2\n27#1:57,4\n*E\n"})
public class BertTextEncoder
extends PreTrainedTextEncoder {
    @NotNull
    private final Map<String, Integer> vocabulary;
    private final boolean toLowerCase;
    private final boolean doBasicTokenize;
    @NotNull
    private final Map<Integer, String> idsToToken;
    @NotNull
    private final WordPiece wordPieceTokenizer;
    @NotNull
    private final BasicWordTokenizer basicWordTokenizer;
    private final int vocabSize;

    /*
     * WARNING - void declaration
     */
    public BertTextEncoder(@NotNull Map<String, Integer> vocabulary, boolean toLowerCase, boolean doBasicTokenize, @NotNull String unkToken, @NotNull String sepToken, @NotNull String padToken, @NotNull String clsToken, @NotNull String maskToken) {
        void $this$associateTo$iv$iv;
        void $this$associate$iv;
        Intrinsics.checkNotNullParameter(vocabulary, (String)"vocabulary");
        Intrinsics.checkNotNullParameter((Object)unkToken, (String)"unkToken");
        Intrinsics.checkNotNullParameter((Object)sepToken, (String)"sepToken");
        Intrinsics.checkNotNullParameter((Object)padToken, (String)"padToken");
        Intrinsics.checkNotNullParameter((Object)clsToken, (String)"clsToken");
        Intrinsics.checkNotNullParameter((Object)maskToken, (String)"maskToken");
        super(null, null, unkToken, sepToken, padToken, clsToken, maskToken);
        this.vocabulary = vocabulary;
        this.toLowerCase = toLowerCase;
        this.doBasicTokenize = doBasicTokenize;
        Iterable iterable = this.vocabulary.entrySet();
        BertTextEncoder bertTextEncoder = this;
        boolean $i$f$associate = false;
        int capacity$iv = RangesKt.coerceAtLeast((int)MapsKt.mapCapacity((int)CollectionsKt.collectionSizeOrDefault((Iterable)$this$associate$iv, (int)10)), (int)16);
        void var12_13 = $this$associate$iv;
        Map destination$iv$iv = new LinkedHashMap(capacity$iv);
        boolean $i$f$associateTo = false;
        for (Object element$iv$iv : $this$associateTo$iv$iv) {
            Map map = destination$iv$iv;
            Map.Entry it = (Map.Entry)element$iv$iv;
            boolean bl = false;
            Pair pair = TuplesKt.to(it.getValue(), it.getKey());
            map.put(pair.getFirst(), pair.getSecond());
        }
        bertTextEncoder.idsToToken = destination$iv$iv;
        this.wordPieceTokenizer = new WordPiece(this.vocabulary, unkToken, 0, 4, null);
        this.basicWordTokenizer = new BasicWordTokenizer(this.toLowerCase, false);
        this.vocabSize = this.vocabulary.size();
    }

    public /* synthetic */ BertTextEncoder(Map map, boolean bl, boolean bl2, String string, String string2, String string3, String string4, String string5, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 4) != 0) {
            bl2 = true;
        }
        if ((n & 8) != 0) {
            string = "[UNK]";
        }
        if ((n & 0x10) != 0) {
            string2 = "[SEP]";
        }
        if ((n & 0x20) != 0) {
            string3 = "[PAD]";
        }
        if ((n & 0x40) != 0) {
            string4 = "[CLS]";
        }
        if ((n & 0x80) != 0) {
            string5 = "[MASK]";
        }
        this(map, bl, bl2, string, string2, string3, string4, string5);
    }

    @Override
    public int getVocabSize() {
        return this.vocabSize;
    }

    @Override
    @NotNull
    protected String convertIdToToken(int id) {
        return this.idsToToken.getOrDefault(id, this.getUnkToken());
    }

    @Override
    protected int convertTokenToId(@NotNull String token) {
        Intrinsics.checkNotNullParameter((Object)token, (String)"token");
        return ((Number)this.vocabulary.getOrDefault(token, (Integer)MapsKt.getValue(this.vocabulary, (Object)this.getUnkToken()))).intValue();
    }

    @Override
    @NotNull
    protected List<String> tokenizeText(@NotNull String text2) {
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        ArrayList splitTokens = new ArrayList();
        if (this.doBasicTokenize) {
            for (Tokenizer.Token token : this.basicWordTokenizer.tokenize(text2)) {
                CollectionsKt.addAll((Collection)splitTokens, (Iterable)this.wordPieceTokenizer.tokenize(token.getToken()));
            }
        } else {
            CollectionsKt.addAll((Collection)splitTokens, (Iterable)this.wordPieceTokenizer.tokenize(text2));
        }
        return splitTokens;
    }

    @Override
    @NotNull
    public String decodeFromTokens(@NotNull List<String> tokens) {
        Intrinsics.checkNotNullParameter(tokens, (String)"tokens");
        return CollectionsKt.joinToString$default((Iterable)tokens, (CharSequence)" ", null, null, (int)0, null, (Function1)decodeFromTokens.1.INSTANCE, (int)30, null);
    }
}

