/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import com.google.common.collect.Lists;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.KylinVersion;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.CuboidModeEnum;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.cube.kv.RowKeyDecoder;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.SparkFunction;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.shaded.com.google.common.hash.HashFunction;
import org.apache.kylin.shaded.com.google.common.hash.Hasher;
import org.apache.kylin.shaded.com.google.common.hash.Hashing;
import org.apache.spark.SparkConf;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.scheduler.SparkListenerInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class SparkCalculateStatsFromBaseCuboidJob
extends AbstractApplication
implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger(SparkCalculateStatsFromBaseCuboidJob.class);
    protected Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_JOB_MODE;
    public static final Option OPTION_SAMPLING_PERCENT;
    private Options options = new Options();
    private int samplingPercent;
    private int rowCount = 0;
    private long[] rowHashCodesLong = null;
    private boolean isUsePutRowKeyToHllNewAlgorithm;
    private HLLCounter[] allCuboidsHLL = null;
    private Long[] cuboidIds;
    private Integer[][] allCuboidsBitSet = null;
    private HashFunction hf = null;
    protected int nRowKey;
    protected long baseCuboidId;
    RowKeyDecoder rowKeyDecoder;

    public SparkCalculateStatsFromBaseCuboidJob() {
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_JOB_MODE);
        this.options.addOption(OPTION_SAMPLING_PERCENT);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        final String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        final String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String input = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        final String output = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        final String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
        String jobMode = optionsHelper.getOptionValue(OPTION_JOB_MODE);
        this.samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_SAMPLING_PERCENT));
        SparkConf sparkConf = SparkUtil.setKryoSerializerInConf();
        sparkConf.setAppName("Kylin_Calculate_Statics_From_BaseCuboid_Data_" + cubeName + "_With_Spark");
        sparkConf.set("spark.speculation", "false");
        KylinSparkJobListener jobListener = new KylinSparkJobListener();
        try (JavaSparkContext sc = new JavaSparkContext(sparkConf);){
            int cubeStatsHLLPrecision;
            sc.sc().addSparkListener((SparkListenerInterface)jobListener);
            HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(output));
            final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
            KylinConfig config = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(config);){
                CubeManager cubeManager = CubeManager.getInstance(config);
                CubeInstance cube = cubeManager.getCube(cubeName);
                CubeDesc cubeDesc = cube.getDescriptor();
                CubeSegment optSegment = cube.getSegmentById(segmentId);
                this.baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
                this.nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
                Set<Long> cuboids = cube.getCuboidsByMode(jobMode);
                if (cuboids.size() == 0) {
                    Set<Long> current = cube.getCuboidsByMode(CuboidModeEnum.CURRENT);
                    current.removeAll(cube.getCuboidsRecommend());
                    cuboids = current;
                }
                this.cuboidIds = cuboids.toArray(new Long[cuboids.size()]);
                this.allCuboidsBitSet = CuboidUtil.getCuboidBitSet(this.cuboidIds, this.nRowKey);
                cubeStatsHLLPrecision = config.getCubeStatsHLLPrecision();
                this.allCuboidsHLL = new HLLCounter[this.cuboidIds.length];
                for (int i = 0; i < this.cuboidIds.length; ++i) {
                    this.allCuboidsHLL[i] = new HLLCounter(cubeStatsHLLPrecision);
                }
                if (KylinVersion.isBefore200(cubeDesc.getVersion())) {
                    this.isUsePutRowKeyToHllNewAlgorithm = false;
                    this.hf = Hashing.murmur3_32();
                    logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", (Object)cubeDesc.getVersion());
                } else {
                    this.isUsePutRowKeyToHllNewAlgorithm = true;
                    this.rowHashCodesLong = new long[this.nRowKey];
                    this.hf = Hashing.murmur3_128();
                    logger.info("Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", (Object)cubeDesc.getVersion());
                }
            }
            JavaPairRDD inputRDD = sc.sequenceFile(input, Text.class, Text.class);
            JavaPairRDD afterMapRDD = inputRDD.mapPartitionsToPair((PairFlatMapFunction)new SparkFunction.PairFlatMapFunctionBase<Iterator<Tuple2<Text, Text>>, Text, Text>(){

                @Override
                protected void doInit() {
                    KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
                    CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
                    CubeSegment cubeSeg = cubeInstance.getSegmentById(segmentId);
                    SparkCalculateStatsFromBaseCuboidJob.this.rowKeyDecoder = new RowKeyDecoder(cubeSeg);
                }

                @Override
                protected Iterator<Tuple2<Text, Text>> doCall(Iterator<Tuple2<Text, Text>> iterator) throws Exception {
                    while (iterator.hasNext()) {
                        Text key = (Text)iterator.next()._1();
                        long cuboidID = SparkCalculateStatsFromBaseCuboidJob.this.rowKeyDecoder.decode(key.getBytes());
                        if (cuboidID != SparkCalculateStatsFromBaseCuboidJob.this.baseCuboidId) continue;
                        List<String> keyValues = SparkCalculateStatsFromBaseCuboidJob.this.rowKeyDecoder.getValues();
                        if (SparkCalculateStatsFromBaseCuboidJob.this.rowCount < SparkCalculateStatsFromBaseCuboidJob.this.samplingPercent) {
                            Preconditions.checkArgument(SparkCalculateStatsFromBaseCuboidJob.this.nRowKey == keyValues.size());
                            String[] row = keyValues.toArray(new String[keyValues.size()]);
                            if (SparkCalculateStatsFromBaseCuboidJob.this.isUsePutRowKeyToHllNewAlgorithm) {
                                SparkCalculateStatsFromBaseCuboidJob.this.putRowKeyToHLLNew(row);
                            } else {
                                SparkCalculateStatsFromBaseCuboidJob.this.putRowKeyToHLLOld(row);
                            }
                        }
                        if (++SparkCalculateStatsFromBaseCuboidJob.this.rowCount != 100) continue;
                        SparkCalculateStatsFromBaseCuboidJob.this.rowCount = 0;
                    }
                    ByteBuffer hllBuf = ByteBuffer.allocate(0x100000);
                    ArrayList<Tuple2> result = new ArrayList<Tuple2>();
                    for (int i = 0; i < SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds.length; ++i) {
                        HLLCounter hll = SparkCalculateStatsFromBaseCuboidJob.this.allCuboidsHLL[i];
                        Text outputKey = new Text();
                        Text outputValue = new Text();
                        outputKey.set(Bytes.toBytes(SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds[i]));
                        logger.info("Cuboid id to be processed1: " + SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds[i]);
                        hllBuf.clear();
                        hll.writeRegisters(hllBuf);
                        outputValue.set(hllBuf.array(), 0, hllBuf.position());
                        logger.info("Cuboid id to be processed1: " + SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds[i] + "value is " + hllBuf.array().toString());
                        result.add(new Tuple2((Object)outputKey, (Object)outputValue));
                        logger.info("result size: " + result.size());
                        for (Tuple2 t : result) {
                            logger.info("result key: " + ((Text)t._1()).toString());
                            logger.info("result values: " + ((Text)t._2).toString());
                        }
                    }
                    return result.iterator();
                }
            });
            afterMapRDD.groupByKey().foreach((VoidFunction)new SparkFunction.VoidFunctionBase<Tuple2<Text, Iterable<Text>>>(){

                @Override
                protected void doInit() {
                    KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
                    KylinConfig.setAndUnsetThreadLocalConfig(kylinConfig);
                }

                @Override
                protected void doCall(Tuple2<Text, Iterable<Text>> v1) throws Exception {
                    Text key = (Text)v1._1();
                    Iterable values = (Iterable)v1._2();
                    long cuboidId = Bytes.toLong(key.getBytes());
                    logger.info("Cuboid id to be processed: " + cuboidId);
                    ArrayList<Long> baseCuboidRowCountInMappers = Lists.newArrayList();
                    long totalRowsBeforeMerge = 0L;
                    for (Text value : values) {
                        HLLCounter hll = new HLLCounter(cubeStatsHLLPrecision);
                        ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
                        hll.readRegisters(bf);
                        if (cuboidId == SparkCalculateStatsFromBaseCuboidJob.this.baseCuboidId) {
                            baseCuboidRowCountInMappers.add(hll.getCountEstimate());
                        }
                        totalRowsBeforeMerge += hll.getCountEstimate();
                        if (SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.get(cuboidId) != null) {
                            SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.get(cuboidId).merge(hll);
                            continue;
                        }
                        SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.put(cuboidId, hll);
                    }
                    long grandTotal = 0L;
                    for (HLLCounter hll : SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.values()) {
                        grandTotal += hll.getCountEstimate();
                    }
                    double mapperOverlapRatio = grandTotal == 0L ? 0.0 : (double)totalRowsBeforeMerge / (double)grandTotal;
                    logger.info("writer cuboIdstatic to " + output);
                    CubeStatsWriter.writePartialCuboidStatistics(sConf.get(), new Path(output), SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap, SparkCalculateStatsFromBaseCuboidJob.this.samplingPercent, baseCuboidRowCountInMappers.size(), mapperOverlapRatio, TaskContext.getPartitionId());
                }
            });
        }
    }

    private void putRowKeyToHLLOld(String[] row) {
        Hasher hc;
        int i;
        byte[][] rowHashCodes = new byte[this.nRowKey][];
        for (i = 0; i < this.nRowKey; ++i) {
            hc = this.hf.newHasher();
            String colValue = row[i];
            rowHashCodes[i] = colValue != null ? hc.putUnencodedChars(colValue).hash().asBytes() : hc.putInt(0).hash().asBytes();
        }
        for (i = 0; i < this.cuboidIds.length; ++i) {
            hc = this.hf.newHasher();
            for (int position = 0; position < this.allCuboidsBitSet[i].length; ++position) {
                hc.putBytes(rowHashCodes[this.allCuboidsBitSet[i][position]]);
            }
            this.allCuboidsHLL[i].add(hc.hash().asBytes());
        }
    }

    private void putRowKeyToHLLNew(String[] row) {
        int i;
        for (i = 0; i < this.nRowKey; ++i) {
            Hasher hc = this.hf.newHasher();
            String colValue = row[i];
            if (colValue == null) {
                colValue = "0";
            }
            byte[] bytes = hc.putUnencodedChars(colValue).hash().asBytes();
            this.rowHashCodesLong[i] = Bytes.toLong(bytes) + (long)i;
        }
        int n = this.allCuboidsBitSet.length;
        for (i = 0; i < n; ++i) {
            long value = 0L;
            for (int position = 0; position < this.allCuboidsBitSet[i].length; ++position) {
                value += this.rowHashCodesLong[this.allCuboidsBitSet[i][position]];
            }
            this.allCuboidsHLL[i].addHashDirectly(value);
        }
    }

    static {
        OptionBuilder.withArgName((String)"cubename");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_CUBE_NAME = OptionBuilder.create((String)"cubename");
        OptionBuilder.withArgName((String)"segmentId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_SEGMENT_ID = OptionBuilder.create((String)"segmentId");
        OptionBuilder.withArgName((String)"input");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_INPUT_PATH = OptionBuilder.create((String)"input");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_OUTPUT_PATH = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"metaUrl");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create((String)"metaUrl");
        OptionBuilder.withArgName((String)"cuboidMode");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_JOB_MODE = OptionBuilder.create((String)"cuboidMode");
        OptionBuilder.withArgName((String)"statisticssamplingpercent");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_SAMPLING_PERCENT = OptionBuilder.create((String)"statisticssamplingpercent");
    }
}

