/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sedona.stats.clustering;

import java.io.Serializable;
import org.apache.sedona.util.DfUtils$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT$;
import org.apache.spark.sql.sedona_sql.expressions.st_functions$;
import org.apache.spark.sql.types.DataType;
import org.graphframes.GraphFrame$;
import scala.Function0;
import scala.Function2;
import scala.Predef$;
import scala.collection.ArrayOps$;
import scala.collection.immutable.;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Seq;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

public final class DBSCAN$ {
    public static final DBSCAN$ MODULE$ = new DBSCAN$();
    private static final String ID_COLUMN = "__id";

    private String ID_COLUMN() {
        return ID_COLUMN;
    }

    public Dataset<Row> dbscan(Dataset<Row> dataframe, double epsilon, int minPts, String geometry, boolean includeOutliers, boolean useSpheroid, String isCoreColumnName, String clusterColumnName) {
        String string;
        block3: {
            String string2 = geometry;
            switch (string2 == null ? 0 : string2.hashCode()) {
                case 0: {
                    if (string2 != null) break;
                    string = DfUtils$.MODULE$.getGeometryColumnName(dataframe.schema());
                    break block3;
                }
            }
            string = geometry;
        }
        String geometryCol = string;
        this.validateInputs(dataframe, epsilon, minPts, geometryCol);
        Function2 & Serializable distanceFunction = useSpheroid ? (Function2 & Serializable)(a, b) -> st_functions$.MODULE$.ST_DistanceSpheroid((Column)a, (Column)b) : (Function2 & Serializable)(a, b) -> st_functions$.MODULE$.ST_Distance((Column)a, (Column)b);
        boolean hasIdColumn = ArrayOps$.MODULE$.contains$extension(Predef$.MODULE$.refArrayOps((Object[])dataframe.columns()), (Object)"id");
        Dataset idDataframe = hasIdColumn ? dataframe.withColumnRenamed("id", this.ID_COLUMN()).withColumn("id", functions$.MODULE$.sha2(functions$.MODULE$.to_json(functions$.MODULE$.struct("*", (Seq)Nil$.MODULE$)), 256)) : dataframe.withColumn("id", functions$.MODULE$.sha2(functions$.MODULE$.to_json(functions$.MODULE$.struct("*", (Seq)Nil$.MODULE$)), 256));
        Dataset isCorePointsDF = idDataframe.alias("left").join(idDataframe.alias("right"), ((Column)distanceFunction.apply((Object)functions$.MODULE$.col(new StringBuilder(5).append("left.").append(geometryCol).toString()), (Object)functions$.MODULE$.col(new StringBuilder(6).append("right.").append(geometryCol).toString()))).$less$eq((Object)BoxesRunTime.boxToDouble((double)epsilon))).groupBy((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("left.id")})).agg(functions$.MODULE$.first(functions$.MODULE$.struct("left.*", (Seq)Nil$.MODULE$)).alias("leftContents"), (Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.count(functions$.MODULE$.col("right.id")).alias("neighbors_count"), functions$.MODULE$.collect_list(functions$.MODULE$.col("right.id")).alias("neighbors")})).withColumn(isCoreColumnName, functions$.MODULE$.col("neighbors_count").$greater$eq((Object)functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)minPts)))).select("leftContents.*", (Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new String[]{"neighbors", isCoreColumnName})).checkpoint();
        Dataset corePointsDF = isCorePointsDF.filter(functions$.MODULE$.col(isCoreColumnName));
        Dataset borderPointsDF = isCorePointsDF.filter(functions$.MODULE$.col(isCoreColumnName).unary_$bang());
        Dataset coreEdgesDf = corePointsDF.select((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("id").alias("src"), functions$.MODULE$.explode(functions$.MODULE$.col("neighbors")).alias("dst")})).alias("left").join(corePointsDF.alias("right"), functions$.MODULE$.col("left.dst").$eq$eq$eq((Object)functions$.MODULE$.col("right.id"))).select((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("left.src"), functions$.MODULE$.col("right.id").alias("dst")}));
        Dataset<Row> connectedComponentsDF = GraphFrame$.MODULE$.apply((Dataset<Row>)corePointsDF, (Dataset<Row>)coreEdgesDf).connectedComponents().run();
        Dataset borderComponentsDF = borderPointsDF.select((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.struct("*", (Seq)Nil$.MODULE$).alias("leftContent"), functions$.MODULE$.explode(functions$.MODULE$.col("neighbors")).alias("neighbor")})).join(connectedComponentsDF.alias("right"), functions$.MODULE$.col("neighbor").$eq$eq$eq((Object)functions$.MODULE$.col("right.id"))).groupBy((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("leftContent.id")})).agg(functions$.MODULE$.first(functions$.MODULE$.col("leftContent")).alias("leftContent"), (Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.min(functions$.MODULE$.col("right.component")).alias("component")})).select("leftContent.*", (Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new String[]{"component"}));
        Dataset clusteredPointsDf = borderComponentsDF.union(connectedComponentsDF);
        Dataset outliersDf = idDataframe.join(clusteredPointsDf, (Seq)new .colon.colon((Object)"id", (List)Nil$.MODULE$), "left_anti").withColumn(isCoreColumnName, functions$.MODULE$.lit((Object)BoxesRunTime.boxToBoolean((boolean)false))).withColumn("component", functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)-1))).withColumn("neighbors", functions$.MODULE$.array((Seq)Nil$.MODULE$).cast("array<string>"));
        Dataset completedDf = (includeOutliers ? clusteredPointsDf.unionByName(outliersDf) : clusteredPointsDf).withColumnRenamed("component", clusterColumnName);
        Dataset returnDf = hasIdColumn ? completedDf.drop((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new String[]{"neighbors", "id"})).withColumnRenamed(this.ID_COLUMN(), "id") : completedDf.drop((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new String[]{"neighbors", "id"}));
        return returnDf;
    }

    public String dbscan$default$4() {
        return null;
    }

    public boolean dbscan$default$5() {
        return true;
    }

    public boolean dbscan$default$6() {
        return false;
    }

    public String dbscan$default$7() {
        return "isCore";
    }

    public String dbscan$default$8() {
        return "cluster";
    }

    private void validateInputs(Dataset<Row> geo_df, double epsilon, int minPts, String geometry) {
        Predef$.MODULE$.require(epsilon >= 0.0, (Function0 & Serializable)() -> "epsilon must not be negative");
        Predef$.MODULE$.require(minPts > 0, (Function0 & Serializable)() -> "minPts must be greater than 0");
        Predef$.MODULE$.require(ArrayOps$.MODULE$.contains$extension(Predef$.MODULE$.refArrayOps((Object[])geo_df.columns()), (Object)geometry), (Function0 & Serializable)() -> "geometry column not found in dataframe");
        DataType dataType = geo_df.schema().fields()[geo_df.schema().fieldIndex(geometry)].dataType();
        GeometryUDT$ geometryUDT$ = GeometryUDT$.MODULE$;
        Predef$.MODULE$.require(!(dataType != null ? !dataType.equals((Object)geometryUDT$) : geometryUDT$ != null), (Function0 & Serializable)() -> "geometry column must be of type GeometryType");
    }

    private DBSCAN$() {
    }
}

