/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.kmeans;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntOpenHashMap;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.ObjectUtils;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.matrix.DoubleFactory1D;
import org.apache.mahout.math.matrix.DoubleFactory2D;
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.IntRange;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
@Bindable(prefix="BisectingKMeansClusteringAlgorithm", inherit={CommonAttributes.class})
public class BisectingKMeansClusteringAlgorithm
extends ProcessingComponentBase
implements IClusteringAlgorithm {
    @Processing
    @Input
    @Required
    @Internal
    @Attribute(key="documents", inherit=true)
    public List<Document> documents;
    @Processing
    @Output
    @Internal
    @Attribute(key="clusters", inherit=true)
    public List<Cluster> clusters = null;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2)
    public int clusterCount = 25;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1)
    public int maxIterations = 15;
    @Processing
    @Input
    @Attribute
    public boolean useDimensionalityReduction = true;
    @Processing
    @Input
    @Attribute
    @IntRange(min=2, max=10)
    public int partitionCount = 2;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1, max=10)
    public int labelCount = 3;
    public final BasicPreprocessingPipeline preprocessingPipeline = new BasicPreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final LabelFormatter labelFormatter = new LabelFormatter();

    @Override
    public void process() throws ProcessingException {
        PreprocessingContext preprocessingContext = this.preprocessingPipeline.preprocess(this.documents, null, LanguageCode.ENGLISH);
        int[] stemsMfow = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
        short[] wordsType = preprocessingContext.allWords.type;
        IntArrayList featureIndices = new IntArrayList(stemsMfow.length);
        for (int i = 0; i < stemsMfow.length; ++i) {
            short flag = wordsType[stemsMfow[i]];
            if ((flag & 0x3002) != 0) continue;
            featureIndices.add(stemsMfow[i]);
        }
        preprocessingContext.allLabels.featureIndex = featureIndices.toArray();
        preprocessingContext.allLabels.firstPhraseIndex = -1;
        this.clusters = Lists.newArrayList();
        if (preprocessingContext.hasLabels()) {
            DoubleMatrix2D tdMatrix;
            VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(preprocessingContext);
            ReducedVectorSpaceModelContext reducedVsmContext = new ReducedVectorSpaceModelContext(vsmContext);
            this.matrixBuilder.buildTermDocumentMatrix(vsmContext);
            this.matrixBuilder.buildTermPhraseMatrix(vsmContext);
            IntIntOpenHashMap rowToStemIndex = new IntIntOpenHashMap();
            for (IntIntCursor c : vsmContext.stemToRowIndex) {
                rowToStemIndex.put(c.value, c.key);
            }
            if (this.useDimensionalityReduction) {
                this.matrixReducer.reduce(reducedVsmContext, this.clusterCount);
                tdMatrix = reducedVsmContext.coefficientMatrix.viewDice();
            } else {
                tdMatrix = vsmContext.termDocumentMatrix;
            }
            IntArrayList columns = new IntArrayList(tdMatrix.columns());
            for (int c = 0; c < tdMatrix.columns(); ++c) {
                columns.add(c);
            }
            ArrayList rawClusters = Lists.newArrayList();
            rawClusters.addAll(this.split(this.partitionCount, tdMatrix, columns, this.maxIterations));
            boolean finished = false;
            int emptySplits = 0;
            while (rawClusters.size() < this.clusterCount && !finished) {
                int largestIndex = 0;
                IntArrayList largest = (IntArrayList)rawClusters.get(0);
                finished = largest.size() <= this.partitionCount * 2;
                for (int i = 1; i < rawClusters.size(); ++i) {
                    int size = ((IntArrayList)rawClusters.get(i)).size();
                    if (size <= largest.size() || size <= this.partitionCount * 2) continue;
                    largest = (IntArrayList)rawClusters.get(i);
                    largestIndex = i;
                    finished = false;
                }
                if (finished) break;
                List<IntArrayList> split = this.split(this.partitionCount, tdMatrix, largest, this.maxIterations);
                if (split.size() > 1) {
                    rawClusters.remove(largestIndex);
                    rawClusters.addAll(split);
                    emptySplits = 0;
                    continue;
                }
                if (++emptySplits < rawClusters.size()) continue;
                break;
            }
            for (int i = 0; i < rawClusters.size(); ++i) {
                Cluster cluster = new Cluster();
                IntArrayList rawCluster = (IntArrayList)rawClusters.get(i);
                if (rawCluster.size() <= 1) continue;
                cluster.addPhrases(this.getLabels(rawCluster, vsmContext.termDocumentMatrix, rowToStemIndex, preprocessingContext.allStems.mostFrequentOriginalWordIndex, preprocessingContext.allWords.image));
                for (int j = 0; j < rawCluster.size(); ++j) {
                    cluster.addDocuments(this.documents.get(rawCluster.get(j)));
                }
                this.clusters.add(cluster);
            }
        }
        Collections.sort(this.clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    private List<String> getLabels(IntArrayList documents, DoubleMatrix2D termDocumentMatrix, IntIntOpenHashMap rowToStemIndex, int[] mostFrequentOriginalWordIndex, char[][] wordImage) {
        DoubleMatrix1D centroid = DoubleFactory1D.dense.make(termDocumentMatrix.rows());
        for (IntCursor d : documents) {
            centroid.assign(termDocumentMatrix.viewColumn(d.value), Functions.plus);
        }
        ArrayList labels = Lists.newArrayListWithCapacity((int)this.labelCount);
        double minValueForLabel = centroid.viewSorted().get(centroid.size() - Math.min(this.labelCount, centroid.size()));
        for (int i = 0; i < centroid.size(); ++i) {
            if (!(centroid.getQuick(i) >= minValueForLabel)) continue;
            labels.add(LabelFormatter.format(new char[][]{wordImage[mostFrequentOriginalWordIndex[rowToStemIndex.get(i)]]}, new boolean[]{false}, false));
        }
        return labels;
    }

    private List<IntArrayList> split(int partitions, DoubleMatrix2D input, IntArrayList columns, int iterations) {
        DoubleMatrix2D selected = input.viewSelection(null, columns.toArray()).copy();
        IntIntOpenHashMap selectedToInput = new IntIntOpenHashMap(selected.columns());
        for (int i = 0; i < columns.size(); ++i) {
            selectedToInput.put(i, columns.get(i));
        }
        ArrayList result = Lists.newArrayList();
        ArrayList previousResult = null;
        for (int i = 0; i < partitions; ++i) {
            result.add(new IntArrayList(selected.columns()));
        }
        DoubleMatrix2D centroids = DoubleFactory2D.dense.make(selected.rows(), partitions).assign(selected.viewPart(0, 0, selected.rows(), partitions));
        DoubleMatrix2D similarities = DoubleFactory2D.dense.make(partitions, selected.columns());
        for (int it = 0; it < iterations; ++it) {
            int i;
            centroids.zMult(selected, similarities, 1.0, 0.0, true, false);
            for (int c = 0; c < similarities.columns(); ++c) {
                int maxRow = 0;
                double max = similarities.get(0, c);
                for (int r = 1; r < similarities.rows(); ++r) {
                    if (!(max < similarities.get(r, c))) continue;
                    max = similarities.get(r, c);
                    maxRow = r;
                }
                ((IntArrayList)result.get(maxRow)).add(c);
            }
            if (ObjectUtils.equals(previousResult, (Object)result)) break;
            for (i = 0; i < result.size(); ++i) {
                IntArrayList cluster = (IntArrayList)result.get(i);
                for (int k = 0; k < selected.rows(); ++k) {
                    double sum = 0.0;
                    for (int j = 0; j < cluster.size(); ++j) {
                        sum += selected.get(k, cluster.get(j));
                    }
                    centroids.setQuick(k, i, sum / (double)cluster.size());
                }
            }
            if (it >= iterations - 1) continue;
            previousResult = result;
            result = Lists.newArrayList();
            for (i = 0; i < partitions; ++i) {
                result.add(new IntArrayList(selected.columns()));
            }
        }
        Iterator it = result.iterator();
        while (it.hasNext()) {
            IntArrayList cluster = (IntArrayList)it.next();
            if (cluster.isEmpty()) {
                it.remove();
                continue;
            }
            for (int j = 0; j < cluster.size(); ++j) {
                cluster.set(j, selectedToInput.get(cluster.get(j)));
            }
        }
        return result;
    }
}

