/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import com.clearspring.analytics.util.Lists;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeDescManager;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.dict.IDictionaryBuilder;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.MultipleOutputsRDD;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.scheduler.SparkListenerInterface;
import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.Tuple3;

public class SparkUHCDictionary
extends AbstractApplication
implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger(SparkUHCDictionary.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_CUBING_JOB_ID;
    public static final Option OPTION_COUNTER_PATH;
    private Options options = new Options();

    public SparkUHCDictionary() {
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_CUBING_JOB_ID);
        this.options.addOption(OPTION_COUNTER_PATH);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
        String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
        Class[] kryoClassArray = new Class[]{Class.forName("scala.reflect.ClassTag$$anon$1"), Class.forName("org.apache.kylin.engine.mr.steps.SelfDefineSortableKey")};
        SparkConf conf = new SparkConf().setAppName("Build uhc dictionary with spark for:" + cubeName + " segment " + segmentId);
        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator");
        conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray);
        KylinSparkJobListener jobListener = new KylinSparkJobListener();
        try (JavaSparkContext sc = new JavaSparkContext(conf);){
            sc.sc().addSparkListener((SparkListenerInterface)jobListener);
            HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
            Configuration hadoopConf = sc.hadoopConfiguration();
            hadoopConf.set("mapreduce.input.pathFilter.class", "org.apache.kylin.engine.mr.steps.filter.UHCDictPathFilter");
            SerializableConfiguration sConf = new SerializableConfiguration(hadoopConf);
            KylinConfig config = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
            CubeManager cubeMgr = CubeManager.getInstance(config);
            CubeInstance cube = cubeMgr.getCube(cubeName);
            Job job = Job.getInstance((Configuration)sConf.get());
            LongAccumulator bytesWritten = sc.sc().longAccumulator();
            String hdfsDir = sc.hadoopConfiguration().get("global.dict.base.dir");
            List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns();
            int reducerCount = uhcColumns.size();
            if (reducerCount == 0) {
                return;
            }
            logger.info("RDD Output path: {}", (Object)outputPath);
            logger.info("getTotalReducerNum: {}", (Object)reducerCount);
            logger.info("counter path {}", (Object)counterPath);
            JavaPairRDD wholeSequenceFileNames = null;
            for (TblColRef tblColRef : uhcColumns) {
                String columnPath = inputPath + "/" + tblColRef.getIdentity();
                if (!HadoopUtil.getFileSystem(columnPath).exists(new Path(columnPath))) continue;
                if (wholeSequenceFileNames == null) {
                    wholeSequenceFileNames = sc.wholeTextFiles(columnPath);
                    continue;
                }
                wholeSequenceFileNames = wholeSequenceFileNames.union(sc.wholeTextFiles(columnPath));
            }
            if (wholeSequenceFileNames == null) {
                logger.error("There're no sequence files at " + inputPath + " !");
                return;
            }
            JavaPairRDD pairRDD = wholeSequenceFileNames.map((Function & Serializable)tuple -> (String)tuple._1).mapToPair((PairFunction)new InputPathAndFilterAddFunction2(config, uhcColumns)).filter((Function & Serializable)tuple -> (Integer)tuple._1 != -1).reduceByKey((Function2 & Serializable)(list1, list2) -> this.combineAllColumnDistinctValues((List<String>)list1, (List<String>)list2)).mapToPair((PairFunction)new ProcessUHCColumnValues(cubeName, config, hdfsDir, uhcColumns));
            MultipleOutputs.addNamedOutput((Job)job, (String)"dict", SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
            FileOutputFormat.setOutputPath((Job)job, (Path)new Path(outputPath));
            job.getConfiguration().set("output.path", outputPath);
            LazyOutputFormat.setOutputFormatClass((Job)job, SequenceFileOutputFormat.class);
            MultipleOutputsRDD<Writable, Writable> multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD((JavaPairRDD<String, Tuple3<Writable, Writable, String>>)pairRDD);
            multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration());
            logger.info("Map input records={}", (Object)reducerCount);
            logger.info("HDFS Read: {} HDFS Write", (Object)bytesWritten.value());
            HashMap<String, String> counterMap = Maps.newHashMap();
            counterMap.put("source_records_count", String.valueOf(reducerCount));
            counterMap.put("source_records_size", String.valueOf(bytesWritten.value()));
            HadoopUtil.writeToSequenceFile(sc.hadoopConfiguration(), counterPath, counterMap);
            HadoopUtil.deleteHDFSMeta(metaUrl);
        }
    }

    private List combineAllColumnDistinctValues(List<String> list1, List<String> list2) {
        list1.addAll(list2);
        return list1;
    }

    static {
        OptionBuilder.withArgName((String)"cubename");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Name");
        OPTION_CUBE_NAME = OptionBuilder.create((String)"cubename");
        OptionBuilder.withArgName((String)"metaUrl");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create((String)"metaUrl");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube output path");
        OPTION_OUTPUT_PATH = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"segmentId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Segment Id");
        OPTION_SEGMENT_ID = OptionBuilder.create((String)"segmentId");
        OptionBuilder.withArgName((String)"input");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Hive Intermediate Table PATH");
        OPTION_INPUT_PATH = OptionBuilder.create((String)"input");
        OptionBuilder.withArgName((String)"cubingJobId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cubing job id");
        OPTION_CUBING_JOB_ID = OptionBuilder.create((String)"cubingJobId");
        OptionBuilder.withArgName((String)"counterOutput");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"counter output path");
        OPTION_COUNTER_PATH = OptionBuilder.create((String)"counterOutput");
    }

    static class ProcessUHCColumnValues
    implements PairFunction<Tuple2<Integer, List<String>>, String, Tuple3<Writable, Writable, String>> {
        private volatile transient boolean initialized = false;
        private String cubeName;
        private KylinConfig config;
        private IDictionaryBuilder builder;
        private CubeInstance cube;
        private String hdfsDir;
        private CubeDesc cubeDesc;
        private List<TblColRef> uhcColumns;

        public ProcessUHCColumnValues(String cubeName, KylinConfig config, String hdfsDir, List<TblColRef> uhcColumns) {
            this.cubeName = cubeName;
            this.config = config;
            this.uhcColumns = uhcColumns;
            this.hdfsDir = hdfsDir;
        }

        private void init() {
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(this.config);){
                this.cube = CubeManager.getInstance(this.config).getCube(this.cubeName);
                this.cubeDesc = CubeDescManager.getInstance(this.config).getCubeDesc(this.cubeName);
            }
            this.initialized = true;
        }

        /*
         * Exception decompiling
         */
        public Tuple2<String, Tuple3<Writable, Writable, String>> call(Tuple2<Integer, List<String>> columnValues) throws Exception {
            /*
             * This method has failed to decompile.  When submitting a bug report, please provide this stack trace, and (if you hold appropriate legal rights) the relevant class file.
             * 
             * org.benf.cfr.reader.util.ConfusedCFRException: Tried to end blocks [45[SIMPLE_IF_TAKEN]], but top level block is 15[MONITOR]
             *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.processEndingBlocks(Op04StructuredStatement.java:435)
             *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.buildNestedBlocks(Op04StructuredStatement.java:484)
             *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op03SimpleStatement.createInitialStructuredBlock(Op03SimpleStatement.java:736)
             *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisInner(CodeAnalyser.java:850)
             *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisOrWrapFail(CodeAnalyser.java:278)
             *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysis(CodeAnalyser.java:201)
             *     at org.benf.cfr.reader.entities.attributes.AttributeCode.analyse(AttributeCode.java:94)
             *     at org.benf.cfr.reader.entities.Method.analyse(Method.java:531)
             *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1055)
             *     at org.benf.cfr.reader.entities.ClassFile.analyseInnerClassesPass1(ClassFile.java:923)
             *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1035)
             *     at org.benf.cfr.reader.entities.ClassFile.analyseTop(ClassFile.java:942)
             *     at org.benf.cfr.reader.Driver.doJarVersionTypes(Driver.java:257)
             *     at org.benf.cfr.reader.Driver.doJar(Driver.java:139)
             *     at org.benf.cfr.reader.CfrDriverImpl.analyse(CfrDriverImpl.java:76)
             *     at org.benf.cfr.reader.Main.main(Main.java:54)
             */
            throw new IllegalStateException("Decompilation failed");
        }
    }

    static class InputPathAndFilterAddFunction2
    implements PairFunction<String, Integer, List<String>> {
        private List<TblColRef> uhcColumns;
        private KylinConfig config;

        public InputPathAndFilterAddFunction2(KylinConfig config, List<TblColRef> uhcColumns) {
            this.config = config;
            this.uhcColumns = uhcColumns;
        }

        public Tuple2<Integer, List<String>> call(String sequenceFilePath) throws Exception {
            Path path = new Path(sequenceFilePath);
            logger.info("Column absolute path is " + path.toString());
            if (!HadoopUtil.getFileSystem(path).exists(path)) {
                return new Tuple2((Object)-1, null);
            }
            String columnName = path.getParent().getName();
            int index = -1;
            for (int i = 0; i < this.uhcColumns.size(); ++i) {
                if (!this.uhcColumns.get(i).getIdentity().equalsIgnoreCase(columnName)) continue;
                index = i;
                break;
            }
            if (index == -1) {
                return new Tuple2((Object)-1, null);
            }
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(this.config);){
                List values = Lists.newArrayList();
                values.addAll(HadoopUtil.readDistinctColumnValues(sequenceFilePath));
                logger.info("UHC column " + columnName + " contains distinct values " + values);
                Tuple2 tuple2 = new Tuple2((Object)index, (Object)values);
                return tuple2;
            }
        }
    }
}

