SparkSQL official website Java example ERROR CodeGenerator: failed to compile problem

Table of Contents

 

Problem code

Positioning problem

Is it over?

Positioning problem

Correct code

to sum up


Problem code

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定义安全类型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee内部类
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 内部类
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            return new Average(0L,0L);
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

problem:

When learning the Type-Safe User-Defined Aggregate Functions example on Spark SQL official website, the problems encountered are as follows:

19/11/15 14:26:36 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
 .......(省略)
19/11/15 14:26:36 INFO CodeGenerator: 
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */   public SpecificSafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */     mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */   }
/* 016 */
/* 017 */   public void initialize(int partitionIndex) {
/* 018 */
/* 019 */   }
/* 020 */
/* 021 */   public java.lang.Object apply(java.lang.Object _i) {
/* 022 */     InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */     sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_6 = InitializeJavaBean_0(i);
/* 025 */     if (false) {
/* 026 */       mutableRow.setNullAt(0);
/* 027 */     } else {
/* 028 */
/* 029 */       mutableRow.update(0, value_6);
/* 030 */     }
/* 031 */
/* 032 */     return mutableRow;
/* 033 */   }
/* 034 */
/* 035 */
/* 036 */   private sparkSQL.apachedemo.SparkSessionJavaTest$Employee InitializeJavaBean_0(InternalRow i) {
/* 037 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();
/* 038 */     sparkSQL.apachedemo.SparkSessionJavaTest$Employee javaBean_0 = value_1;
/* 039 */     if (!false) {
/* 040 */
/* 041 */
/* 042 */       boolean isNull_3 = i.isNullAt(0);
/* 043 */       UTF8String value_3 = isNull_3 ? null : (i.getUTF8String(0));
/* 044 */       boolean isNull_2 = true;
/* 045 */       java.lang.String value_2 = null;
/* 046 */       if (!isNull_3) {
/* 047 */
/* 048 */         isNull_2 = false;
/* 049 */         if (!isNull_2) {
/* 050 */
/* 051 */           Object funcResult_0 = null;
/* 052 */           funcResult_0 = value_3.toString();
/* 053 */
/* 054 */           if (funcResult_0 != null) {
/* 055 */             value_2 = (java.lang.String) funcResult_0;
/* 056 */           } else {
/* 057 */             isNull_2 = true;
/* 058 */           }
/* 059 */
/* 060 */
/* 061 */         }
/* 062 */       }
/* 063 */       javaBean_0.setName(value_2);
/* 064 */
/* 065 */
/* 066 */       boolean isNull_5 = i.isNullAt(1);
/* 067 */       long value_5 = isNull_5 ? -1L : (i.getLong(1));
/* 068 */
/* 069 */       if (isNull_5) {
/* 070 */         throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 071 */       }
/* 072 */       javaBean_0.setSalary(value_5);
/* 073 */
/* 074 */     }
/* 075 */
/* 076 */     return value_1;
/* 077 */   }
/* 078 */
/* 079 */ }

19/11/15 14:26:37 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: 

...(abridgement)

Positioning problem

File 'generated.java', Line 37, Column 85: failed to compile

/* 037 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();

 As you can see, the no-parameter constructor in the Employees.class class is called when the code is running, while the parameterized constructor is used in the code.

  public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

Comment out this constructor and use the default constructor, and the problem disappears.

Is it over?

However, there is also an Average.class class that also defines a parameterized constructor. Will it also have this problem? By the way, check whether the above analysis is correct?

(Aha, the original exception has disappeared, but a new exception has been thrown.)

19/11/15 14:50:06 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

 .......(省略)

19/11/15 14:50:06 INFO CodeGenerator: 
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */   public SpecificSafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */     mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */   }
/* 016 */
/* 017 */   public void initialize(int partitionIndex) {
/* 018 */
/* 019 */   }
/* 020 */
/* 021 */   public java.lang.Object apply(java.lang.Object _i) {
/* 022 */     InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();
/* 025 */     sparkSQL.apachedemo.SparkSessionJavaTest$Average javaBean_0 = value_1;
/* 026 */     if (!false) {
/* 027 */
/* 028 */
/* 029 */       long value_3 = i.getLong(0);
/* 030 */
/* 031 */       if (false) {
/* 032 */         throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 033 */       }
/* 034 */       javaBean_0.setCount(value_3);
/* 035 */
/* 036 */
/* 037 */       long value_5 = i.getLong(1);
/* 038 */
/* 039 */       if (false) {
/* 040 */         throw new NullPointerException(((java.lang.String) references[1] /* errMsg */));
/* 041 */       }
/* 042 */       javaBean_0.setSum(value_5);
/* 043 */
/* 044 */     }
/* 045 */     if (false) {
/* 046 */       mutableRow.setNullAt(0);
/* 047 */     } else {
/* 048 */
/* 049 */       mutableRow.update(0, value_1);
/* 050 */     }
/* 051 */
/* 052 */     return mutableRow;
/* 053 */   }
/* 054 */
/* 055 */
/* 056 */ }

19/11/15 14:50:06 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

...(abridgement)

Positioning problem

File 'generated.java', Line 24, Column 84: failed to compile

/* 024 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();

Indeed, it is because the code uses a parameterless constructor when it is compiled, and the parameterized constructor has been specified in the code, causing an exception to be thrown. 

Comment out the parameter constructor and use the default constructor

  public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

And modify the zero function in the code.

        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }

In this way, the problem can be completely solved.

Correct code

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定义安全类型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee内部类
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 内部类
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        // public Average(long sum, long count) {
        //     this.sum = sum;
        //     this.count = count;
        // }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

to sum up

This is the first time I have encountered such a problem. I found out the problem while groping slowly. Why did I take such a long detour and found myself setting the log level in the code to Logger.getLogger("org").setLevel (Level.ERROR), no specific problem was found at all. After setting the log level to Logger.getLogger("org").setLevel(Level.INFO), you can see all the logs, and it is easy to find problems. Pay more attention to such problems in the future.

Guess you like

Origin blog.csdn.net/someby/article/details/103084807