[Spark] Operator Case

 1 package spark_example01;
 2 
 3 
 4 import java.io.File;
 5 import java.io.FileWriter;
 6 import java.io.IOException;
 7 import java.util.Random;
 8 
 9 /**
10  */
11 public class PeopleInfoFileGenerator {
12     public static void main(String[] args){
13         File file = new File("/Users/xls/Desktop/code/bigdata/data/PeopleInfo.txt");
14 
15         try {
16              the Random Random = new new the Random (); // generates a random number 
. 17              FileWriter FileWriter = new new FileWriter (File); // Create a new file 
18 is              for ( Long I =. 1; I <= 100000000; I ++) {    // generate 10 million digits 
. 19                  int height = random.nextInt (220 );
 20 is                  IF (height <50 ) {
 21 is                      height = height + 50 ;
 22 is                  }
 23 is                  String getRandomGender gender = (); // gender method 
24                  IF (height < 100 && gender == "M") {
25                     height = height + 100;
26                 }
27                 if (height < 100 && gender == "F") {
28                     height = height + 40;
29                 }
30                 fileWriter.write( i + " " + getRandomGender() + " " + height); //文件格式:ID 性别 身高
31                 fileWriter.write(System.getProperty("line.separator"));
32             }
33             fileWriter.flush();
34             fileWriter.close();
35             System.out.println ( "People Information File Generated successfully." );
 36          } the catch (IOException E) {
 37 [              e.printStackTrace ();
 38 is          }
 39      }
 40  
41 is      public  static String getRandomGender () { // build a randomly generated gender method 
42 is          the Random Random = new new the Random ();
 43 is          int randomNum random.nextInt = (2) +. 1 ;
 44 is          IF (randomNum% 2 == 0 ) {
 45              return "M" ;
 46 is          }else{
47             return "F";
48         }
49     }
50 }

 

  1 package spark_example01;
  2 
  3 import org.apache.spark.SparkConf;
  4 import org.apache.spark.api.java.JavaRDD;
  5 import org.apache.spark.api.java.JavaSparkContext;
  6 import org.apache.spark.api.java.function.FlatMapFunction;
  7 import org.apache.spark.api.java.function.Function;
  8 import java.util.Arrays;
  9 /**
 10  * Created by Administrator on 2017/11/17.
 11  */
 12 public class PeopleInfoCalculator {
 13     public static void main(String[] args){
 14 
 15         SparkConf sparkConf = new SparkConf().setAppName("PeopleInfoCalculator").setMaster("local[3]");
 16 
 17         JavaSparkContext sc = new JavaSparkContext(sparkConf);
 18 
 19         JavaRDD<String> dataFile = sc.textFile("/Users/xls/Desktop/code/bigdata/data/PeopleInfo.txt");
 20 
 21         // step1:过滤出性别为M的数据
 22         JavaRDD<String> maleFilterData = dataFile.filter(new Function<String, Boolean>() {
 23 
 24             public Boolean call(String s) throws Exception {
 25                 return s.contains("M");
 26             }
 27         });
 28 
 29         //step2:过滤出性别为F的数据
 30         JavaRDD<String> femaleFilterData = dataFile.filter(new Function<String, Boolean>() {
 31 
 32             public Boolean call(String s) throws Exception {
 33                 return s.contains("F");
 34             }
 35         });
 36 
 37         //setp3: gender obtain the height data string of M --- be segmented for each row, and finally get [2] used in the string --- flatMap string segmentation 
38 is          JavaRDD <String> maleHeightData = maleFilterData.flatMap ( new new FlatMapFunction <String, String> () {
 39              @Override
 40              public the java.util.Iterator <String> Call (String S) throws Exception {
 41 is                  return Arrays.asList (s.split ( "") [2 ]). Iterator ();
 42 is              }
 43 is          });
 44 is  
45          // Step4: the height data is obtained gender F --- string of each line is segmented, and finally get [2] used in the string --- flatMap segmentation string 
46 is          JavaRDD <string> femaleHeightData femaleFilterData.flatMap = (new new FlatMapFunction <String, String> () {
 47              @Override
 48              public the java.util.Iterator <String> Call (String S) throws Exception {
 49                  return Arrays.asList (s.split ( "") [2 ]). Iterator ();
 50              }
 51 is          });
 52 is  
53 is          // STEP5: male height of the transformed format to integer format string 
54 is          JavaRDD <integer> = maleHeightDataInt maleHeightData.map ( new new Function <string, integer> () { //
 55              @Override
 56 is              public Integer Call (String S) throws{Exception
 57 is                  return the Integer.parseInt (String.valueOf (S));
 58              }
 59          });
 60  
61 is          // Step6: converting the format string female height integer format 
62 is          JavaRDD <Integer> = femaleHeightData.map femaleHeightDataInt ( new new Function <string, integer> () { // convert string format to integer format 
63 is              @Override
 64              public integer Call (string S) throws Exception {
 65                  return the Integer.parseInt (String.valueOf (S));
 66              }
 67          });
 68  
69         // the sortBy (<T>, Ascending, numPartitions) Explanation:
 70          // first parameter is a function that also has a generic parameter T belt, return type and RDD type elements are consistent;
 71          // second parameter is ascending, which parameters determine the elements in the sorted RDD is ascending or descending, the default is true, that is, in ascending order;
 72          // third parameter is numPartitions, this parameter determines the sorted partition RDD number, the number of partitions and the number before ordering the default sort equal, i.e. this.partitions.size.
73 is  
74          // STEP7: pressing the male height from lowest to highest --- represents a true default sorting parameter, sorted in ascending order, from low to high exhaust 
75          JavaRDD <Integer> = maleHeightLowSort maleHeightDataInt.sortBy ( new new Function <Integer , Integer> () {
 76              public Integer Call (Integer S) throws Exception {
 77                  return S;
 78             }
 79          }, true ,. 3 );
 80  
81          // Step8: female pressing height from lowest to highest --- represents a true default sorting parameter, sorted in ascending order, from low to high exhaust 
82          JavaRDD <Integer> femaleHeightLowSort femaleHeightDataInt.sortBy = ( new new Function <Integer, Integer> () {
 83              public Integer Call (Integer S) throws Exception {
 84                  return S;
 85              }
 86          }, to true ,. 3 );
 87  
88          // Step9: male press height the descending sort --- false represented as descending order, from high to low 
89         JavaRDD <Integer> = maleHeightHightSort maleHeightDataInt.sortBy ( new new Function <Integer, Integer> () {
 90              public Integer Call (Integer S) throws Exception {
 91 is                  return S;
 92              }
 93          }, to false ,. 3 );
 94  
95          // Step10 : height of the female sort descending --- pressing to false as represented in descending order from high to low 
96          JavaRDD <Integer> = femaleHeightHightSort femaleHeightDataInt.sortBy ( new new Function <Integer, Integer> () {
 97              public Integer Call (Integer S) throws Exception {
98                  return S;
 99              }
 100          }, to false ,. 3 );
 101  
102          Integer lowestMale maleHeightLowSort.first = (); // first number determined in ascending order, i.e., the minimum value of 
103          Integer lowestFemale femaleHeightLowSort.first = (); / / first number determined in ascending order, i.e., the minimum value of 
104          Integer highestMale maleHeightHightSort.first = (); // first number determined in descending order, i.e., the maximum value of 
105          Integer highestFemale femaleHeightHightSort.first = (); // find descending the first number, i.e., the maximum value of 
106  
107          System.out.println ( "number the peole of for Woman:" + femaleHeightData.count ());// total number of women obtaining 
108          System.out.println ( "Number The peole of for a Man:" + maleHeightData.count ()); // total number of males is determined 
109          System.out.println ( "Lowest for a Man : "+ lowestMale); // find the shortest male height 
110          System.out.println (" Lowest female: "+ lowestFemale); // find women's shortest height 
111          System.out.println (" Highest male: " highestMale +); // determine the maximum height male 
112          System.out.println ( "highest for woman:" + highestFemale); // determine the maximum height F 
113  
114      }
 115  }
 1 16  
117  / * 
1 18  . * A case is described
119  This case Suppose we need statistical population (100,000) a province of gender as well as height, need to calculate the number of men and women, men of the highest and lowest height, as well as women in the highest and lowest height.
120  in this case used in the source file has the following format, three are ID, gender, height (cm), the following format:
 121  . B generate demographic data
 122  using a randomly generated set of Java language demographic data, including sequence ID gender M / F, height cm, code is as follows:
 123  c examples of process analysis.
 124  for this case, we have to the statistics for men and women, respectively, then it is natural to think first of all need to be from the corresponding RDD source file for the men and women information separation, it will produce two new RDD, each containing information for men and women;
 125  followed by men and women, respectively, corresponding to the information data RDD further mapping to include only the height data, so we get two RDD, respectively male height and female height;
 126  finally need to sort these two RDD, and then get the highest and lowest male or female height.
127  first step, the separation of men and women information, using the filter operator filter criteria include "M" line of men, includes "F" line are women;
 128  second step we need to map the operator's height data of each sex separated from the RDD;
 129  third step we need to couple the height sortBy operator sort the data.
130 Special Note: need to convert the data into an integer Height RDD conversion process, the operator would otherwise sortBy it as a string, then sort the results will be affected,
 131  such as height data if it is: 123,110,84,72,100, then the results will be sorted in ascending order 100,110,123,72,84, obviously this is not right.
132  . Height D determined to achieve statistical code:
 133  * * /

 

Guess you like

Origin www.cnblogs.com/xuelisheng/p/11530903.html