猿问

使用 StreamTokenizer () 、 HashMap() 、 HashSet() 计算词频

import java.io.*;

import java.util.*;

class A {

    public static void main(String args[]) throws Exception {

        Console con = System.console();

        String str;

        int i=0;

  

        HashMap map = new HashMap();

        HashSet set = new HashSet();

 

        System.out.println("Enter File Name : ");

        str = con.readLine();

        File f = new File(str);

        f.createNewFile();

        

        FileInputStream fis = new FileInputStream(str);

        StreamTokenizer st = new StreamTokenizer(fis);

        while(st.nextToken()!=StreamTokenizer.TT_EOF) {

         String s;

    

         switch(st.ttype) {

             case StreamTokenizer.TT_NUMBER:  s = st.nval+"";

             break;

              case StreamTokenizer.TT_WORD:    s = st.sval;

               break;

                default: s = ""+((char)st.ttype);

            }



            map.put(i+"",s);

            set.add(s);

            i++;

        }

 

        Iterator iter = set.iterator();

        System.out.println("Frequency Of Words :");

        while(iter.hasNext()) {

          String word;

          int count=0;

          word=(String)iter.next();

          

            for(int j=0; j<i ; j++) {

             String word2;

              word2=(String)map.get(j+"");

               if(word.equals(word2))

                count++;

            }

            System.out.println(" WORD : "+ word+" = "+count);

        }

        System.out.println("Total Words In Files: "+i);

    }

}

在此代码中,首先我已经创建了一个包含以下数据的文本文件:


@ Hello Hii World # * c++ java salesforce


此代码的输出是:


**单词频率 :


字数 : # = 1


字数 : @ = 1


字 : c = 1


WORD : salesforce = 1


字数 : * = 1


字数 : Hii = 1


字数 : + = 2


单词 : java = 1


单词 : 世界 = 1


单词 : 你好 = 1


文件中的总字数:11**


在那里,我无法找到为什么这将c ++显示为一个单独的单词。我想将c ++组合成一个单词,就像在输出中一样


猛跑小猪
浏览 106回答 1
1回答

慕森王

你可以用这种方式来做&nbsp; &nbsp; // Create the file at path specified in the String str&nbsp; &nbsp; // ...&nbsp; &nbsp; HashMap<String, Integer> map = new HashMap<>();&nbsp; &nbsp; InputStream fis = new FileInputStream(str);&nbsp; &nbsp; Reader bufferedReader = new BufferedReader(new InputStreamReader(fis));&nbsp; &nbsp; StreamTokenizer st = new StreamTokenizer(bufferedReader);&nbsp; &nbsp; st.wordChars('+', '+');&nbsp; &nbsp; while(st.nextToken() != StreamTokenizer.TT_EOF) {&nbsp; &nbsp; &nbsp; &nbsp; String s;&nbsp; &nbsp; &nbsp; &nbsp; switch(st.ttype) {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; case StreamTokenizer.TT_NUMBER:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; s = String.valueOf(st.nval);&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; break;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; case StreamTokenizer.TT_WORD:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; s = st.sval;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; break;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; default:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; s = String.valueOf((char)st.ttype);&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; Integer val = map.get(s);&nbsp; &nbsp; &nbsp; &nbsp; if(val == null)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; val = 1;&nbsp; &nbsp; &nbsp; &nbsp; else&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; val++;&nbsp; &nbsp; &nbsp; &nbsp; map.put(s, val);&nbsp; &nbsp; }&nbsp; &nbsp; Set<String> keySet = map.keySet();&nbsp; &nbsp; Iterator<String> iter = keySet.iterator();&nbsp; &nbsp; System.out.println("Frequency Of Words :");&nbsp; &nbsp; int sum = 0;&nbsp; &nbsp; while(iter.hasNext()) {&nbsp; &nbsp; &nbsp; &nbsp; String word = iter.next();&nbsp; &nbsp; &nbsp; &nbsp; int count = map.get(word);&nbsp; &nbsp; &nbsp; &nbsp; sum += count;&nbsp; &nbsp; &nbsp; &nbsp; System.out.println(" WORD : " + word + " = " + count);&nbsp; &nbsp; }&nbsp; &nbsp; System.out.println("Total Words In Files: " + sum);请注意,我已使用泛型而不是哈希映射和迭代器的原始版本更新了您的代码。此外,您用于 StreamTokenizer 的构造函数已被弃用。映射和 set 的使用是无用的,因为您可以使用 .keySet() 方法循环访问映射的键集。现在,映射将从字符串(单词)变为整数(字数)。无论如何,关于你所做的示例,我认为简单的拆分方法会更合适。
随时随地看视频慕课网APP

相关分类

Java
我要回答