Scala implements wordcount method - commodity label statistics - temperature statistics

Scala implements word statistics
 ---------------------
     import scala.io.Source

    /**
      * Created by Administrator on 2018/5/7.
      */
    object WCApp {
        def main(args: Array[String]): Unit = {
             // 1. Load file 
            val src = Source.fromFile("d:/mr/word.txt" )

            // 2. Get all lines 
            val lines = src.getLines().toList
     //         for(line <- lines){
     //             println(line)
     //         }

            // 3. Flatten words 
            val words = lines.flatMap(_.split(" " ))

            // Mark a pair 
            val map1 = words.map((w:String) => (w, 1 ))

            // Group by word 
            val map2 = map1.groupBy(t=> t._1)
            val map3 = map2.mapValues(list => list.size)

            lines.foreach(println)
        }

    }



Scala implements word statistics 2
---------------------
     import scala.io.Source

    /**
      * Created by Administrator on 2018/5/7.
      */
    object WCApp2 {
        def main(args: Array[String]): Unit = {
             // 1. Load file 
            val src = Source.fromFile("d:/mr/word.txt" )

            // 2. Get all lines 
            val lines = src.getLines().toList
     //         for(line <- lines){
     //             println(line)
     //         }

            // 3. Flatten words 
            val words = lines.flatMap(_.split(" " ))

            // Mark a pair 
            val map1 = words.map((w:String) => (w, 1 ))

            // Group by word {hello->[(hello,1),(hello,1),(hello,1)]} 
            val map2 = map1.groupBy(t=> t._1)

            //{hello->(hello,4) , ...}
            val map3 = map2.mapValues(list => {
    //            def op(a:Tuple2[String,Int] , b:Tuple2[String,Int]) = {
    //                val word = a._1
    //                val cnt = a._2 + b._2
    //                (word , cnt)
    //            }
    //            list.reduce(op _)
                list.reduce((a,b)=>(a._1,a._2 + b._2 ))
            })
            //
            val map4 = map3.map((t:Tuple2[String,Tuple2[String,Int]])=>t._2)
            map4.foreach(println)
        }

    }




Bitmap implements topn statistics
------------------------
     import scala.io.Source

    /**
      * Annual topN query of temperature, implemented using reduce
      */
    object TempTopN2_Bitmap {
        def main(args: Array[String]): Unit = {
             // 1. Load temperature file 
            val f = Source.fromFile("d:/mr/temp.dat" )

            // 2. Get all lines 
            val temps = f.getLines().toList

            // 3. Extract the year and temperature of each line to form a tuple {(1900,28),....} 
            val map1 = temps.map((line:String) => {
                val arr = line.split(" ")
                val year = arr(0).toInt
                val temp = arr(1).toInt
                (year, temp)
            })

            // 4. Group by year {(1920->{(),(),(),...}),...} 
            val map2 = map1.groupBy((t:Tuple2[Int,Int])= > t._1)

            // 5. Aggregate the value corresponding to each key according to the temperature only top3 
            val map3 = map2.mapValues(list=> {
                val bytes = list.foldLeft(new Array[Byte](128))((a,b)=>{
                    val temp = b._2
                    if(temp > 0){
                        val index = temp / 8
                        val mod = temp % 8
                        a(index) = (a(index) | (1 << mod)).toByte
                    }
                    a
                })

                // Define method, process bitmap 
                def process(): String = {
                    var count = 0;
                    var tempStr = "";
                    for (x <- (0 until bytes.length).reverse) {
                        val b = bytes(x)
                        for (y <- (0 to 7).reverse) {
                            if (((b >> y) & 1) != 0) {
                                count += 1
                                tempStr = tempStr + "," + (8 * x + y)
                                if (count == 3) {
                                    return tempStr
                                }
                            }
                        }
                    }
                    tempStr
                }
                process()
            })

            val map4 = map3.toList.sortBy(e=>e._1)
            map4.foreach(println(_))
        }
    }

Scala implements product reviews
---------------------
    1 .TagUtil.java
         package com.oldboy.scala.util;

        import com.alibaba.fastjson.JSON;
        import com.alibaba.fastjson.JSONArray;
        import com.alibaba.fastjson.JSONObject;

        import java.util.ArrayList;
        import java.util.List;

        /**
         * Label tool class
         * / 
        public  class TagUtil {
             / **
             * Extract the comment collection from the json data
             */
            public static List<String> extractTags(String json){
                //评论集合
                List<String> tags = new ArrayList<String>() ;

                // Parse the file into a json object 
                JSONObject obj = JSON.parseObject(json) ;

                // Get the array 
                JSONArray array = obj.getJSONArray("extInfoList" );

                // Determine the validity of the array 
                if (array != null && array.size() > 0 ){
                    JSONObject obj2 = array.getJSONObject(0);
                    JSONArray arr2 = obj2.getJSONArray("values") ;
                    if(arr2 != null && arr2.size() > 0 ){
                        for(int i = 0 ; i < arr2.size() ; i ++){
                            tags.add(arr2.getString(i));
                        }
                    }
                }

                return tags ;
            }
        }

    2.TaggenDemo
        import javax.swing.text.html.HTML.Tag

        import com.oldboy.scala.util.TagUtil

        import scala.io.Source

        /**
          * Notes generation statistics
          */
        object TaggingDemo {

            def main(args: Array[String]): Unit = {
                //1.加载文件
                val file = Source.fromFile("d:/mr/temptags.txt") ;

                // 2. Extract all lines 
                val lines = file.getLines().toList

                // 3. Flatten transform each line to form (busid, tag) 
                val map1 = lines.flatMap(line=> {
                    var list0: List [(String, String)] = Nile
                    var arr = line.split("\t")
                    val busid = arr(0 )
                    var json = arr(1)
                    import scala.collection.JavaConversions._
                    val list:List[String] = TagUtil.extractTags(json).toList ;
                    for(tag <- list){
                        list0 = (busid, tag) + : list0
                    }
                    list0
                })

                // 4. Group the tuples, {(busid,tag)->List((busid,tag),(busid,tag),...} 
                val map2 = map1.groupBy(t => t)

                // 5. Count the size of the List under each key, {(busid,tag)->300} 
                val map3 = map2.mapValues(_.size)

                // 6. Swap element positions, List((busid , (tag,cnt)),...) 
                val map4 = map3.toList.map(t=> (t._1._1 , (t._1._2, t._2)))

                // 7. Group by busid again Map(busid->List((busid , (tag,cnt)),...)) 
                val map5 = map4.groupBy(t=> t._1)

                // 8. Sort the reviews in each business in reverse order. Map(busid->List((busid,(tag,59))) 
                val map6 = map5.mapValues(list=> {
                    val list2 = list.sortBy(t=> -t._2._2).take(5)
                    val list3 = list2.map(t=>t._2)
                    list3
                })

                // 9. Sort the merchants in reverse order according to the maximum number of reviews of the merchants 
                val map7 = map6.toList.sortBy(t=> -t._2(0 )._2)
                map7.foreach(t=>{
                    val busid = t._1
                    val str = t._2.mkString(";")
                    println(busid + "==>" + str)
                })
            }
        }

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325689650&siteId=291194637