/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeDescManager;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.common.RowKeySplitter;
import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.cube.kv.RowKeyEncoderProvider;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.BatchCubingJobBuilder2;
import org.apache.kylin.engine.mr.IMROutput2;
import org.apache.kylin.engine.mr.MRUtil;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.CubeStatsReader;
import org.apache.kylin.engine.mr.common.CuboidSchedulerUtil;
import org.apache.kylin.engine.mr.common.NDCuboidBuilder;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.SparkCubingByLayer;
import org.apache.kylin.engine.spark.SparkFunction;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.measure.BufferedMeasureCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.scheduler.SparkListenerInterface;
import org.apache.spark.storage.StorageLevel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class SparkCubingByLayerForOpt
extends AbstractApplication
implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger(SparkCubingByLayerForOpt.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_CUBOID_MODE;
    private Options options = new Options();
    private static final Iterable<Tuple2<ByteArray, Object[]>> EMTPY_ITERATOR;

    public SparkCubingByLayerForOpt() {
        this.options.addOption(OPTION_CUBOID_MODE);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_OUTPUT_PATH);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
        String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        String cuboidMode = optionsHelper.getOptionValue(OPTION_CUBOID_MODE);
        SparkConf sparkConf = SparkUtil.setKryoSerializerInConf();
        sparkConf.setAppName("Kylin_Cubing_For_Optimize_" + cubeName + "_With_Spark");
        KylinSparkJobListener jobListener = new KylinSparkJobListener();
        try (JavaSparkContext sc = new JavaSparkContext(sparkConf);){
            int i;
            SparkUtil.modifySparkHadoopConfiguration(sc.sc(), AbstractHadoopJob.loadKylinConfigFromHdfs(new SerializableConfiguration(sc.hadoopConfiguration()), metaUrl));
            sc.sc().addSparkListener((SparkListenerInterface)jobListener);
            SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
            KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
            CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);
            CubeDesc cubeDesc = CubeDescManager.getInstance(envConfig).getCubeDesc(cubeInstance.getDescName());
            CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
            CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, envConfig);
            Job job = Job.getInstance((Configuration)sConf.get());
            SparkUtil.setHadoopConfForCuboid(job, cubeSegment, metaUrl);
            StorageLevel storageLevel = StorageLevel.fromString((String)envConfig.getSparkStorageLevel());
            JavaPairRDD<ByteArray, Object[]> baseCuboIdRDD = SparkUtil.getCuboIdRDDFromHdfs(sc, metaUrl, cubeName, cubeSegment, inputPath, cubeDesc.getMeasures().size(), sConf);
            Set<Long> cuboidsByMode = cubeSegment.getCubeInstance().getCuboidsByMode(cuboidMode);
            int maxLevel = CuboidUtil.getLongestDepth(cuboidsByMode);
            logger.info("cuboidMode" + cuboidMode);
            logger.info("maxLevel" + maxLevel);
            CuboidScheduler scheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSegment, cuboidMode);
            JavaPairRDD[] allRDDs = new JavaPairRDD[maxLevel + 1];
            allRDDs[0] = baseCuboIdRDD;
            SparkCubingByLayer.BaseCuboidReducerFunction2 reducerFunction2 = new SparkCubingByLayer.BaseCuboidReducerFunction2(cubeName, metaUrl, sConf);
            boolean allNormalMeasure = true;
            boolean[] needAggr = new boolean[cubeDesc.getMeasures().size()];
            for (i = 0; i < cubeDesc.getMeasures().size(); ++i) {
                needAggr[i] = !cubeDesc.getMeasures().get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid();
                allNormalMeasure = allNormalMeasure && needAggr[i];
            }
            if (!allNormalMeasure) {
                reducerFunction2 = new SparkCubingByLayer.CuboidReducerFunction2(cubeName, metaUrl, sConf, needAggr);
            }
            for (i = 1; i <= maxLevel; ++i) {
                int partition = SparkUtil.estimateLayerPartitionNum(i, cubeStatsReader, envConfig);
                allRDDs[i] = allRDDs[i - 1].flatMapToPair((PairFlatMapFunction)new CuboidFlatMap(cubeName, segmentId, metaUrl, sConf, scheduler)).reduceByKey((Function2)reducerFunction2, partition);
                allRDDs[i].persist(storageLevel);
                this.saveToHDFS((JavaPairRDD<ByteArray, Object[]>)allRDDs[i], metaUrl, cubeName, cubeSegment, outputPath, i, job);
                allRDDs[i - 1].unpersist(false);
            }
            allRDDs[maxLevel].unpersist(false);
            logger.info("Finished on calculating needed cuboids For Optimize.");
            logger.info("HDFS: Number of bytes written=" + jobListener.metrics.getBytesWritten());
        }
    }

    protected void saveToHDFS(JavaPairRDD<ByteArray, Object[]> rdd, final String metaUrl, final String cubeName, CubeSegment cubeSeg, String hdfsBaseLocation, int level, Job job) throws Exception {
        String cuboidOutputPath = BatchCubingJobBuilder2.getCuboidOutputPathsByLevel(hdfsBaseLocation, level);
        final SerializableConfiguration sConf = new SerializableConfiguration(job.getConfiguration());
        IMROutput2.IMROutputFormat outputFormat = MRUtil.getBatchCubingOutputSide2(cubeSeg).getOutputFormat();
        outputFormat.configureJobOutput(job, cuboidOutputPath, cubeSeg, cubeSeg.getCuboidScheduler(), level);
        rdd.mapToPair((PairFunction)new SparkFunction.PairFunctionBase<Tuple2<ByteArray, Object[]>, Text, Text>(){
            private BufferedMeasureCodec codec;

            @Override
            protected void doInit() {
                KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
                try (KylinConfig.SetAndUnsetThreadLocalConfig autoClose = KylinConfig.setAndUnsetThreadLocalConfig(kylinConfig);){
                    CubeDesc desc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeName);
                    this.codec = new BufferedMeasureCodec(desc.getMeasures());
                }
            }

            @Override
            public Tuple2<Text, Text> doCall(Tuple2<ByteArray, Object[]> tuple2) throws Exception {
                ByteBuffer valueBuf = this.codec.encode((Object[])tuple2._2());
                Text textResult = new Text();
                textResult.set(valueBuf.array(), 0, valueBuf.position());
                return new Tuple2((Object)new Text(((ByteArray)tuple2._1()).array()), (Object)textResult);
            }
        }).saveAsNewAPIHadoopDataset(job.getConfiguration());
        logger.info("Persisting RDD for level " + level + " into " + cuboidOutputPath);
    }

    static {
        OptionBuilder.withArgName((String)"cubename");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Name");
        OPTION_CUBE_NAME = OptionBuilder.create((String)"cubename");
        OptionBuilder.withArgName((String)"segment");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Segment Id");
        OPTION_SEGMENT_ID = OptionBuilder.create((String)"segmentId");
        OptionBuilder.withArgName((String)"metaUrl");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create((String)"metaUrl");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube output path");
        OPTION_OUTPUT_PATH = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"input");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Hive Intermediate Table PATH");
        OPTION_INPUT_PATH = OptionBuilder.create((String)"input");
        OptionBuilder.withArgName((String)"cuboidMode");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"CoboId Mode ");
        OPTION_CUBOID_MODE = OptionBuilder.create((String)"cuboidMode");
        EMTPY_ITERATOR = new ArrayList<Tuple2<ByteArray, Object[]>>(0);
    }

    public static class CuboidFlatMap
    extends SparkFunction.PairFlatMapFunctionBase<Tuple2<ByteArray, Object[]>, ByteArray, Object[]> {
        private String cubeName;
        private String segmentId;
        private String metaUrl;
        private CubeSegment cubeSegment;
        private CubeDesc cubeDesc;
        private NDCuboidBuilder ndCuboidBuilder;
        private RowKeySplitter rowKeySplitter;
        private SerializableConfiguration conf;
        private CuboidScheduler cuboidScheduler;

        public CuboidFlatMap(String cubeName, String segmentId, String metaUrl, SerializableConfiguration conf, CuboidScheduler scheduler) {
            this.cubeName = cubeName;
            this.segmentId = segmentId;
            this.metaUrl = metaUrl;
            this.conf = conf;
            this.cuboidScheduler = scheduler;
        }

        @Override
        protected void doInit() {
            KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(this.conf, this.metaUrl);
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoClose = KylinConfig.setAndUnsetThreadLocalConfig(kConfig);){
                CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(this.cubeName);
                this.cubeSegment = cubeInstance.getSegmentById(this.segmentId);
                this.cubeDesc = cubeInstance.getDescriptor();
                this.rowKeySplitter = new RowKeySplitter(this.cubeSegment);
                this.ndCuboidBuilder = new NDCuboidBuilder(this.cubeSegment, new RowKeyEncoderProvider(this.cubeSegment));
            }
        }

        @Override
        public Iterator<Tuple2<ByteArray, Object[]>> doCall(Tuple2<ByteArray, Object[]> tuple2) throws Exception {
            byte[] key = ((ByteArray)tuple2._1()).array();
            long cuboidId = this.rowKeySplitter.parseCuboid(key);
            List<Long> myChildren = this.cuboidScheduler.getSpanningCuboid(cuboidId);
            if (myChildren == null || myChildren.size() == 0) {
                return EMTPY_ITERATOR.iterator();
            }
            this.rowKeySplitter.split(key);
            Cuboid parentCuboid = Cuboid.findForMandatory(this.cubeDesc, cuboidId);
            ArrayList<Tuple2> tuples = new ArrayList<Tuple2>(myChildren.size());
            for (Long child : myChildren) {
                Cuboid childCuboid = Cuboid.findForMandatory(this.cubeDesc, child);
                ByteArray result = this.ndCuboidBuilder.buildKey2(parentCuboid, childCuboid, this.rowKeySplitter.getSplitBuffers());
                tuples.add(new Tuple2((Object)result, tuple2._2()));
            }
            return tuples.iterator();
        }
    }
}

