Frequency Statistics applet -WordCount.exe

Scratch. Background

Recent top brother in order to complete their studies to enhance the education of small jobs to do a word frequency statistics .exe applet. Because it was done in less time online, so the top brother decided to make her work a little awkward issue to provide an idea to those who need, Tell me what you do not want to do dis. Finally, attach the source link, and interested friends can continue to optimize oh.   

II. Look at the results

Double click the drop-down box to select the source file source, supports local and network resources as shown:

                      

                

                             Local source file example

                

                              Network source file example

 

III. Major Code

1.pom file

  <dependencies>
        <!--   分词器     -->
        <dependency>
            <groupId>com.janeluo</groupId>
            <artifactId>ikanalyzer</artifactId>
            <version>2012_u6</version>
        </dependency>
        <!--   单元测试     -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.11.3</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>2.18.1</version>
                <configuration>
                    <skipTests>true</skipTests>
                </configuration>
            </plugin>
            <!--打包插件 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.4.1</version>

                <configuration>
                    <!-- get all project dependencies -->
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <!-- MainClass in mainfest make a executable jar -->
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <mainClass>cn.dintalk.service.WordCount</mainClass>
                        </manifest>
                    </archive>
                </configuration>

                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <!-- bind to the packaging phase -->
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

2.WebUtils

/ ** 
 * @author Mr.song 
 * @date 2019/10/13 9:26 
 * / 
public  class WebUtils { 

    / ** 
     * get request transmitted in accordance with the parameters, and url 
     * 
     * @param url 
     * @param param 
     * @return return web page content
      * / 
    public  static String sendGet (String url, String param) { 
        String the Result = "" ;
         IF (! param = null ) { 
            url = url + "" +? param; 
        } 
        the try { 
            the uRL of realUrl = new new the URL (URL);
             // open a connection and the URL 
            the HttpURLConnection Conn = getHttpURLConnection (realUrl); 
            Result = getResponse (Conn); 
        } the catch (Exception E) { 
            e.printStackTrace (); 
        } 
        return Result; 
    } 


    // URL acquired connection according 
    Private  static the HttpURLConnection getHttpURLConnection (the URL realUrl) { 
        the StringBuilder SB = new new the StringBuilder (); 
        sb.append ( "the Mozilla / 5.0 (the Windows NT 10.0; Win64; x64-)" ); 
        sb.append (" AppleWrbKit/537.36(KHTML, like Gecko)");
        sb.append(" Chrome/72.0.3626.119 Safari/537.36");
        HttpURLConnection conn = null;
        try {
            // 打开和URL之间的连接
            conn = (HttpURLConnection) realUrl.openConnection();
            // 设置通用的请求属性
            conn.setRequestProperty("accept", "*/*");
            conn.setRequestProperty("connection", "Keep-Alive");
            conn.setRequestProperty("user-agent", sb.toString());
        } catch (IOException e) {
            e.printStackTrace();
        }
        return conn;
    }

    // 根据url连接获取响应
    private static String getResponse(HttpURLConnection conn) {
        // 读取URL的响应
        String result = "";
        try (InputStream is = conn.getInputStream();
             InputStreamReader isr = new InputStreamReader(is, "utf-8");
             BufferedReader in = new BufferedReader(isr)) {
            String line;
            while ((line = in.readLine()) != null) {
                result += "\n" + line;
            }
        } catch (Exception e) {
            System.out.println("Err:getResponse()");
            e.printStackTrace();
        } finally {
            conn.disconnect();
        }
//        System.out.println("getResponse():" + result.length());
        return result;
    }

    /**
     * 解析网页为文本
     *
     * @param html
     * @return
     */
    public static String parseHtmlToText(String html) {
        Document document = Jsoup.parse(html);
        return document.text();
    }
}

3.IKSUtils

/**
 * @author Mr.song
 * @date 2019/10/10 21:12
 */
public class IKSUtils {

    /**
     * 对文本进行分词
     * @param text
     * @return
     * @throws Exception
     */
    public static List<String> getStringList(String text) throws Exception{
        //独立Lucene实现
        StringReader re = new StringReader(text);
        IKSegmenter ik = new IKSegmenter(re, true);
        Lexeme lex;
        List<String> s = new ArrayList<>();
        while ((lex = ik.next()) != null) {
            s.add(lex.getLexemeText());
        }
        return s;
    }

    /**
     * 统计词频
     * @param wordList
     * @return
     */
    public static Map<String,Integer> wordCount(List<String> wordList){
        if (wordList == null) return null;
        Map<String,Integer> result = new HashMap<>();
        for (String s : wordList) {
            Integer count = result.get(s);
            if (count ==  null){
                result.put(s,1);
            }else {
                result.put(s,++count);
            }
        }
        //按照次数排序
        result = result
                .entrySet()
                .stream()
                .sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2,
                                LinkedHashMap::new));
        return result;
    }
}

IV. Relevant address

Source Address:

https://github.com/MrSonghui/wordCount

The packaged into a jar package .exe files, a reference herein to:

https://www.cnblogs.com/xiaoMzjm/p/3879766.html

 

Like friends can focus on my public number, hosted advertising needs friends can add QQ Oh!

 

 

Guess you like

Origin www.cnblogs.com/dintalk/p/11917657.html