819. Most Common Word（leetcode）

Given a paragraph and a list of banned words, return the most frequent word that is not in the list of banned words.  It is guaranteed there is at least one word that isn't banned, and that the answer is unique.

Words in the list of banned words are given in lowercase, and free of punctuation.  Words in the paragraph are not case sensitive.  The answer is in lowercase.

Example:

Input: paragraph = “Bob hit a ball, the hit BALL flew far after it
was hit.” banned = [“hit”]
Output: “ball”

Explanation:

"hit" occurs 3 times, but it is a banned word.
"ball" occurs twice (and no other word does), so it is the most frequent non-banned word in the paragraph. 
Note that words in the paragraph are not case sensitive,
that punctuation is ignored (even if adjacent to words, such as "ball,"), 
and that "hit" isn't the answer even though it occurs more because it is banned.

Note:

1 <= paragraph.length <= 1000.
1 <= banned.length <= 100.
1 <= banned[i].length <= 10.
The answer is unique, and written in lowercase (even if its occurrences in paragraph may have uppercase symbols, and even if it is a proper noun.)
paragraph only consists of letters, spaces, or the punctuation symbols !?',;.
Different words in paragraph are always separated by a space.
There are no hyphens or hyphenated words.
Words only consist of letters, never apostrophes or other punctuation symbols.

问题描述

1.给定一个段落paragraph和一个禁用的单词词列表，返回不在禁止列表中的最频繁的单词。至少有一个单词不在被禁止列表中，并且答案唯一。

2.在被禁止列表中的单词为小写。段落中的单词大小写不敏感，返回结果需以小写形式。

第一种实现代码（c#）：
实现思路（清晰麻烦）：

给定一个paragraph和banned字符串数组，找出paragraph中出现频率最高的并且没在banned数组中的子字符串,对小写字母不敏感，ball与BALL是相同字符串

a.首先将paragraph中除了空格字母之外的东西去掉，并且将所有字母转成小写字母

b.将paragraph拆分成一个个的子字符串，定义一个数组存储每个子字符串出现的次数，用HashMap存储，当出现相同的字符串时，获取key对应的value;
count[value]++;

c.将count进行排序，注意其原来的下标也要进行变化，因为我们最后是要输出下标对应的字符串。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Spire.Pdf;
using System.Text.RegularExpressions;

namespace pdfExtSVGError1
{
    class Program
    {
        static void Main(string[] args)
        {

            // save2svg();

            InvokeMostCommonWord();



        }

        private static void InvokeMostCommonWord()
        {


            String[] banned = { "m", "q", "e", "l", "c", "i", "z", "j", "g", "t", "w", "v", "h", "p", "d", "b", "a", "r", "x", "n" };
            Console.WriteLine(new Program().MostCommonWord("j. t? T. z! R, v, F' x! L; l! W. M; S. y? r! n; O. q; I? h; w. t; y; X? y, p. k! k, h, J, r? w! U! V; j' u; R! z. s. T' k. P? M' I' j! y. P, T! e; X. w? M! Y, X; G; d, X? S' F, K? V, r' v, v, D, w, K! S? Q! N. n. V. v. t? t' x! u. j; m; n! F, V' Y! h; c! V, v, X' X' t? n; N' r; x. W' P? W; p' q, S' X, J; R. x; z; z! G, U; m. P; o. P! Y; I, I' l' J? h; Q; s? U, q, x. J, T! o. z, N, L; u, w! u, S. Y! V; S? y' E! O; p' X, w. p' M, h! R; t? K? Y' z? T? w; u. q' R, q, T. R? I. R! t, X, s? u; z. u, Y, n' U; m; p? g' P? y' v, o? K? R. Q? I! c, X, x. r' u! m' y. t. W; x! K? B. v; m, k; k' x; Z! U! p. U? Q, t, u' E' n? S' w. y; W, x? r. p! Y? q, Y. t, Z' V, S. q; W. Z, z? x! k, I. n; x? z; V? s! g, U; E' m! Z? y' x? V! t, F. Z? Y' S! z, Y' T? x? v? o! l; d; G' L. L, Z? q. w' r? U! E, H. C, Q! O? w! s? w' D. R, Y? u. w, N. Z? h. M? o, B, g, Z! t! l, W? z, o? z, q! O? u, N; o' o? V; S! z; q! q. o, t! q! w! Z? Z? w, F? O' N' U' p? r' J' L; S. M; g' V. i, P, v, v, f; W? L, y! i' z; L? w. v, s! P?", banned));

            String input = "Bob hit a ball, the hit BALL flew far after it was hit.";
            banned = null;
            banned = new string[] { "hit" };
            Console.WriteLine(new Program().MostCommonWord(input, banned));

            Console.ReadKey();
        }


        public string MostCommonWord(string paragraph, string[] banned)
        {

            //大小写屏蔽,并且去除掉一些奇奇怪怪的标点符号
            paragraph = paragraph.ToLower();
            string[] split = paragraph.Split(new char[] { ' ' });

            for (int i = 0; i < split.Length; i++)
            {
                split[i] = Regex.Replace(split[i], "['?!,; .]+", "");
            }

            // paragraph = Regex.Replace(paragraph, "['?!,; .]+", "");

            string result = "";
            Dictionary<string, int> dics = new Dictionary<string, int>();

            int[] count = new int[split.Length];//用于存放单词出现频率的数组
            int[] index = new int[split.Length];//用于存放count数组中的下标值
            for (int i = 0; i < split.Length; i++)
            {
                if (!dics.ContainsKey(split[i]))
                {
                    dics.Add(split[i], i);
                    count[i]++;
                }
                else
                {
                    int res = dics[split[i]];
                    count[res]++;

                }

            }
            //这一步循环完成之后得到的结果为一个count类型的数组，并且这个count数组的下标代表的是原来split[]数组中所表示的元素的下标（很重要）

            for (int i = 0; i < index.Length; i++)
            {
                index[i] = i;//代表split中每一个元素的下标值，将下标跟数组元素绑定

            }

            for (int i = 0; i < count.Length - 1; i++)
            {
                for (int j = i + 1; j < count.Length; j++)
                {
                    if (count[i] < count[j])
                    {
                        int tem = count[i];
                        count[i] = count[j];
                        count[j] = tem;
                        int met = index[i];//下标也要变化
                        index[i] = index[j];
                        index[j] = met;
                    }
                }
            }
            //做完以上的冒泡排序，可以得到两个已经排好序（从大到小）的两个数组
            //其中，count数组表示的是split数组的众数，而index表示split数组中的索引
            for (int i = 0; i < split.Length; i++)
            {//count从大到小排列，help[i]即是count[i]在原数组split中的下标
                String le = split[index[i]];
                //Console.WriteLine("le is " + le);
                Boolean issame = false;
                for (int j = 0; j < banned.Length; j++)
                {
                    if (String.Equals(le, banned[j]))
                    {
                        issame = true; break;
                    }
                }
                if (issame == false) return le;
            }


            return result;


        }
        private static void save2svg()
        {
            PdfDocument document = new PdfDocument();
            document.LoadFromFile("Tista reviseted.pdf");
            document.SaveToFile("Result.svg", FileFormat.SVG);
            System.Diagnostics.Process.Start("Result.pdf");
        }
    }
}

第二种实现代码：

实现思路：

1.去掉标点符号
2.将段落转换为小写
3.以空白符分隔段落
4.通过HashMap计数

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Spire.Pdf;
using System.Text.RegularExpressions;

namespace pdfExtSVGError1
{
    class Program
    {
        static void Main(string[] args)
        {

            // save2svg();

            InvokeMostCommonWord();



        }

        private static void InvokeMostCommonWord()
        {


            String[] banned = { "m", "q", "e", "l", "c", "i", "z", "j", "g", "t", "w", "v", "h", "p", "d", "b", "a", "r", "x", "n" };
            Console.WriteLine(new Program().MostCommonWord("j. t? T. z! R, v, F' x! L; l! W. M; S. y? r! n; O. q; I? h; w. t; y; X? y, p. k! k, h, J, r? w! U! V; j' u; R! z. s. T' k. P? M' I' j! y. P, T! e; X. w? M! Y, X; G; d, X? S' F, K? V, r' v, v, D, w, K! S? Q! N. n. V. v. t? t' x! u. j; m; n! F, V' Y! h; c! V, v, X' X' t? n; N' r; x. W' P? W; p' q, S' X, J; R. x; z; z! G, U; m. P; o. P! Y; I, I' l' J? h; Q; s? U, q, x. J, T! o. z, N, L; u, w! u, S. Y! V; S? y' E! O; p' X, w. p' M, h! R; t? K? Y' z? T? w; u. q' R, q, T. R? I. R! t, X, s? u; z. u, Y, n' U; m; p? g' P? y' v, o? K? R. Q? I! c, X, x. r' u! m' y. t. W; x! K? B. v; m, k; k' x; Z! U! p. U? Q, t, u' E' n? S' w. y; W, x? r. p! Y? q, Y. t, Z' V, S. q; W. Z, z? x! k, I. n; x? z; V? s! g, U; E' m! Z? y' x? V! t, F. Z? Y' S! z, Y' T? x? v? o! l; d; G' L. L, Z? q. w' r? U! E, H. C, Q! O? w! s? w' D. R, Y? u. w, N. Z? h. M? o, B, g, Z! t! l, W? z, o? z, q! O? u, N; o' o? V; S! z; q! q. o, t! q! w! Z? Z? w, F? O' N' U' p? r' J' L; S. M; g' V. i, P, v, v, f; W? L, y! i' z; L? w. v, s! P?", banned));

            String input = "Bob hit a ball, the hit BALL flew far after it was hit.";
            banned = null;
            banned = new string[] { "hit" };
            Console.WriteLine(new Program().MostCommonWord(input, banned));

            Console.ReadKey();
        }

        public string MostCommonWord(string paragraph, string[] banned)
        {
            string result = "";
            HashSet<String> ban = new HashSet<String>(banned);
            Dictionary<String, int> count = new Dictionary<String, int>();
            // 使用"['?!,; .]+"去除标点符号

            paragraph = paragraph.ToLower();
            String[] words = paragraph.Split(new char[] { ' ' });
            for (int i = 0; i < words.Length; i++)
            {
                words[i] = Regex.Replace(words[i], "['?!,; .]+", "");

            }

            int max = 0;


            foreach (var w in words)
            {

                if (!ban.Contains(w))
                {
                    if (!count.ContainsKey(w)) count.Add(w, 1);
                    else {
                        //count.Add(w, count[w] + 1);
                        count[w] += 1;
                    }

                    if (count[w] > max)
                    {
                        result = w;
                        max = count[w];
                    }
                }

            }




            return result;


        }


    }
}

===============================================
第一种实现的java代码

import java.util.HashMap;

public class MostCommonWords {
    HashMap<String, Integer> map = new HashMap<>();
    public static void main(String[] args) {

        String[] banned = {"m","q","e","l","c","i","z","j","g","t","w","v","h","p","d","b","a","r","x","n"};
        System.out.println(MostCommonWord("j. t? T. z! R, v, F' x! L; l! W. M; S. y? r! n; O. q; I? h; w. t; y; X? y, p. k! k, h, J, r? w! U! V; j' u; R! z. s. T' k. P? M' I' j! y. P, T! e; X. w? M! Y, X; G; d, X? S' F, K? V, r' v, v, D, w, K! S? Q! N. n. V. v. t? t' x! u. j; m; n! F, V' Y! h; c! V, v, X' X' t? n; N' r; x. W' P? W; p' q, S' X, J; R. x; z; z! G, U; m. P; o. P! Y; I, I' l' J? h; Q; s? U, q, x. J, T! o. z, N, L; u, w! u, S. Y! V; S? y' E! O; p' X, w. p' M, h! R; t? K? Y' z? T? w; u. q' R, q, T. R? I. R! t, X, s? u; z. u, Y, n' U; m; p? g' P? y' v, o? K? R. Q? I! c, X, x. r' u! m' y. t. W; x! K? B. v; m, k; k' x; Z! U! p. U? Q, t, u' E' n? S' w. y; W, x? r. p! Y? q, Y. t, Z' V, S. q; W. Z, z? x! k, I. n; x? z; V? s! g, U; E' m! Z? y' x? V! t, F. Z? Y' S! z, Y' T? x? v? o! l; d; G' L. L, Z? q. w' r? U! E, H. C, Q! O? w! s? w' D. R, Y? u. w, N. Z? h. M? o, B, g, Z! t! l, W? z, o? z, q! O? u, N; o' o? V; S! z; q! q. o, t! q! w! Z? Z? w, F? O' N' U' p? r' J' L; S. M; g' V. i, P, v, v, f; W? L, y! i' z; L? w. v, s! P?", banned));
    }
    //应该输出y
    public static String MostCommonWord(String paragraph,String[] banned){
        paragraph = paragraph.replaceAll("['?!,; .]+", " ");//去符号操作
        paragraph = paragraph.toLowerCase();//将大写转成小写
        System.out.println(paragraph);
        String[] str = paragraph.split(" ");
        int[] count = new int[str.length];//存储每个子字符串出现的次数
        for(int i=0;i<str.length;i++){
            if(!map.containsKey(str[i])){
                map.put(str[i], i);
                count[i]++;
            }else{
                int tm = map.get(str[i]);
                count[tm]++;
            }
        }
        int[] indexArr = new int[count.length];
        for(int i=0;i<indexArr.length;i++){
            indexArr[i] = i;//代表str中每个元素的下标
        }
        for(int i=0;i<count.length-1;i++){
            for(int j=i+1;j<count.length;j++){
                if(count[i]<count[j]){
                    int tem = count[i];
                    count[i] = count[j];
                    count[j] = tem;
                    int met = indexArr[i];//下标也要变化
                    indexArr[i] = indexArr[j];
                    indexArr[j] = met;
                }
            }
        }
        for(int i=0;i<count.length;i++){//count从大到小排列，indexArr[i]即是count[i]在原数组str中的下标
            String result = str[indexArr[i]];
            //System.out.println("result is "+result);
            boolean issame = false;
            for(int j=0;j<banned.length;j++){
                if(result.equals(banned[j])){
                    issame = true;break;
                }
            }
            if(issame==false) return result;
        }
        return "";
    }
}

第二种实现的java代码

import java.util.HashMap;

public class MostCommonWords {
    HashMap<String, Integer> map = new HashMap<>();
    public static void main(String[] args) {

        String[] banned = {"m","q","e","l","c","i","z","j","g","t","w","v","h","p","d","b","a","r","x","n"};
        System.out.println(MostCommonWord("j. t? T. z! R, v, F' x! L; l! W. M; S. y? r! n; O. q; I? h; w. t; y; X? y, p. k! k, h, J, r? w! U! V; j' u; R! z. s. T' k. P? M' I' j! y. P, T! e; X. w? M! Y, X; G; d, X? S' F, K? V, r' v, v, D, w, K! S? Q! N. n. V. v. t? t' x! u. j; m; n! F, V' Y! h; c! V, v, X' X' t? n; N' r; x. W' P? W; p' q, S' X, J; R. x; z; z! G, U; m. P; o. P! Y; I, I' l' J? h; Q; s? U, q, x. J, T! o. z, N, L; u, w! u, S. Y! V; S? y' E! O; p' X, w. p' M, h! R; t? K? Y' z? T? w; u. q' R, q, T. R? I. R! t, X, s? u; z. u, Y, n' U; m; p? g' P? y' v, o? K? R. Q? I! c, X, x. r' u! m' y. t. W; x! K? B. v; m, k; k' x; Z! U! p. U? Q, t, u' E' n? S' w. y; W, x? r. p! Y? q, Y. t, Z' V, S. q; W. Z, z? x! k, I. n; x? z; V? s! g, U; E' m! Z? y' x? V! t, F. Z? Y' S! z, Y' T? x? v? o! l; d; G' L. L, Z? q. w' r? U! E, H. C, Q! O? w! s? w' D. R, Y? u. w, N. Z? h. M? o, B, g, Z! t! l, W? z, o? z, q! O? u, N; o' o? V; S! z; q! q. o, t! q! w! Z? Z? w, F? O' N' U' p? r' J' L; S. M; g' V. i, P, v, v, f; W? L, y! i' z; L? w. v, s! P?", banned));
    }
    //应该输出y
    public static String MostCommonWord(String paragraph,String[] banned){
        Set<String> ban = new HashSet<>(Arrays.asList(banned));
        Map<String, Integer> count = new HashMap<>();
        //"['?!,; .]+"去除标点符号
        String[] words = paragraph.replaceAll("['?!,; .]+", " ").toLowerCase().split("\\s+");
        String res = "";
        int max = 0;

        for (String w : words) {
            if (!ban.contains(w)) {
                count.put(w, count.getOrDefault(w, 0) + 1);
                if (count.get(w) > max) {
                    res = w;
                    max = count.get(w);
                }
            }
        }

        return res;
    }
}

参考文章：
1.https://blog.csdn.net/LaputaFallen/article/details/79951699

819. Most Common Word（leetcode）

猜你喜欢