How to Develop a Program for Count Word, Letter and Frequency using Java?

Using java get to develop a program for count word, letter and frequency of their occurrence perform various test and match the given value and test that are already done perform some calculations or statistical analysis of the text.

Solution:

ProcessText.java

package project;

import java.util.Arrays;

/**
 *
 *
 */
public class ProcessText {
    String text = “”;
    ProcessText(TextStatisticsInterface stats){
    int characount = stats.getCharCount();
int linecount= stats.getLineCount();
int wordsCount=stats.getWordCount();
    System.out.println(linecount + ” Line”);
    System.out.println(wordsCount + ” words”);
     System.out.println(characount + ” characters”);
     int [] testWordFreq = stats.getLetterCount();
     char[] alphabet = {‘a’,’b’,’c’,’d’,’e’,’f’,’g’,’h’,’i’,’j’,’k’,’l’,’m’,’n’,’o’,’p’,’q’,’r’,’s’,’t’,’u’,’v’,’w’,’x’,’y’,’z’};
     for(int i = 0 ; i < 25; i++){
      System.out.println(alphabet[i] + “=” + testWordFreq[i]);
     
     }
      System.out.println(“————————————–“);
   int [] WordLengthCount = stats.getWordLengthCount();
     System.out.println(” length        frequency”);
     System.out.println(” ——-       ———-“);
   for(int i = 1 ; i < 12 ; i++){
   
       System.out.println(i   +”          “+  (WordLengthCount[i]));  
   
   }
      System.out.println(“Average word length = “+ stats.getAverageWordLength());
     
    }
    public String toString(){
    
                        return   text ;
    
    }
}

TextStatistics.java

package project;

import com.sun.xml.internal.ws.util.StringUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import static java.lang.System.in;
import java.nio.charset.Charset;
import static java.nio.file.Files.lines;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.io. *;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
 
import java.util.*;
import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;

public class TextStatistics implements TextStatisticsInterface{

int characount = 0;
int linecount=0;
int wordsCount=0;
InputStream is;
File filename;

    TextStatistics(File nextFile) throws FileNotFoundException {
    filename=nextFile;
    is = new BufferedInputStream(new FileInputStream(nextFile));
    }

    @Override
    public int getCharCount()
    {  String str  = null;
        File file;
    file = new File(filename.toString());
    byte[] data = null;
    try (FileInputStream fis = new FileInputStream(file)) {
        data = new byte[(int) file.length()];
        fis.read(data);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
          str = new String(data, “UTF-8”);
    } catch (UnsupportedEncodingException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
       
   //  declare Variables
        int length = 0;
        int numberOfblanks = 0;
        int whitespaceCount = 0;
        int totalCharacter = 0;
        boolean done = false;

       

        length = str.length();     //  conversion of strings
    String charactersWithoutWhiteSpaces = str.trim();

        while(!done)  //  continue while characters is input
        {
            {
                //test for valid codes 1, 2, 3, or 4
                if (str.length() > 0)
                {
                     length = str.length();     //  conversion of strings
                    numberOfblanks = whitespaceCount;

                    for (int i = 0; i < length; i++)  //  counting spaces in string
                    {
                        if (Character.isWhitespace(str.charAt(i)))
                        whitespaceCount++;
                    }

                    for (int j = 0; j < str.length(); j++)
                    {
                        if (Character.isLetter(str.charAt(j)))
                        totalCharacter++;
                    }
                }
                else done = true;
                }

            numberOfblanks = length-whitespaceCount;         //  removes spaces

            
 done = true;
               
        }

     return length;
    }

    @Override
    public int getWordCount() {
        wordsCount= 0;
        String line = “”;
        int count=0;
    InputStream fis = null;
    try {
        fis = new FileInputStream(filename);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
    InputStreamReader isr = new InputStreamReader(fis, Charset.forName(“UTF-8”));
    BufferedReader br = new BufferedReader(isr);

   
    try {
        while ((line = br.readLine()) != null) {
            
           
             //   String replaceAll = line.replaceAll(“\\s+”, “”);
             
                wordsCount += countWords(line);
            
        }
    } catch (IOException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
        return wordsCount ;
    }

    @Override
    public int getLineCount() {
    linecount = 0;
  String line = “”;
      
    InputStream fis = null;
    try {
        fis = new FileInputStream(filename);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
    InputStreamReader isr = new InputStreamReader(fis, Charset.forName(“UTF-8”));
    BufferedReader br = new BufferedReader(isr);

   
    try {
        while ((line = br.readLine()) != null) {
            
            linecount++;
        }
    } catch (IOException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }

 
     return linecount;
    
      
       }
    
    
    public static int countWords(String s){

    int wordCount = 0;

    boolean word = false;
    int endOfLine = s.length() – 1;

    for (int i = 0; i < s.length(); i++) {
        // if the char is a letter, word = true.
        if (Character.isLetter(s.charAt(i)) && i != endOfLine) {
            word = true;
            // if char isn’t a letter and there have been letters before,
            // counter goes up.
        } else if (!Character.isLetter(s.charAt(i)) && word) {
            wordCount++;
            word = false;
            // last word of String; if it doesn’t end with a non letter, it
            // wouldn’t count without this.
        } else if (Character.isLetter(s.charAt(i)) && i == endOfLine) {
            wordCount++;
        }
    }
    return wordCount;
}
    
    
    
    
    
    @Override
    public int[] getLetterCount() {
   String line = “”; String linee = “”;
      int[] LetterCount = new int[26];  
    InputStream fis = null;
    try {
        fis = new FileInputStream(filename);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
    InputStreamReader isr = new InputStreamReader(fis, Charset.forName(“UTF-8”));
    BufferedReader br = new BufferedReader(isr);

   
    try {
        while ((line = br.readLine()) != null) {
           
            linee += line;
            
          
            linecount++;
        }
    } catch (IOException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
    char[] alphabet = {‘a’,’b’,’c’,’d’,’e’,’f’,’g’,’h’,’i’,’j’,’k’,’l’,’m’,’n’,’o’,’p’,’q’,’r’,’s’,’t’,’u’,’v’,’w’,’x’,’y’,’z’};
 int i;
  for (  i = 0 ; i < alphabet.length ; i++){
                       int    occurance;
            int o = i;
            occurance =   (int) linee.chars().filter(num -> num == alphabet[o]).count();
       
               LetterCount[i] = occurance;
             
  }
  linee= “”;
    return LetterCount;
    }
 
    @Override
    public int[] getWordLengthCount() {
        int[] inlist = new  int[24];
         String str  = null;
        File file;
    file = new File(filename.toString());
    byte[] data = null;
    try (FileInputStream fis = new FileInputStream(file)) {
        data = new byte[(int) file.length()];
        fis.read(data);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
          str = new String(data, “UTF-8”);
    } catch (UnsupportedEncodingException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
       
 
  String[] arr=str.split(” “);
    Map<Integer,Integer> lengthMap=new HashMap<>();
    for(String i:arr){
        Integer val=lengthMap.get(i.length());
        if(val==null){
           val=0;
        }
        lengthMap.put(i.length(),val+1);
    }
    for(Map.Entry<Integer,Integer> i:lengthMap.entrySet()){
        if(i.getKey() <= 24){
        inlist[i.getKey()] = i.getValue();}
      //  System.out.println(“Number of String with length “+i.getKey()+” is “+i.getValue());
    }
    return inlist;        
    }

    @Override
    public double getAverageWordLength() {
    
         
        InputStream fis = null;
    try {
        fis = new FileInputStream(filename);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
    InputStreamReader isr = new InputStreamReader(fis, Charset.forName(“UTF-8”));
    BufferedReader br = new BufferedReader(isr);
    String line = “”;
               
 double words = 0;  double totalChars = 0;
    try {
        while (( line = br.readLine()) != null) {
            
            
            String[] strArray = line.split(” “);
      
        for(String s : strArray){
            totalChars += s.length();
        }
          words += strArray.length;
      
        }
        
    } catch (IOException ex) {
        Logger.getLogger(TextStatistics.class.getName()).log(Level.SEVERE, null, ex);
    }
    //words  = getWordCount();//Count();
     double averageWordLength = (double)(totalChars/words);
    return averageWordLength;
    }
}  

TextStatisticsInterface.java

package project;

public interface TextStatisticsInterface
{
    /**
     * @return the number of characters in the text file
     */
    public int getCharCount();

    /**
     * @return the number of words in the text file
     */
    public int getWordCount();

    /**
     * @return the number of lines in the text file
     */
    public int getLineCount();

    /**
     * @return the letterCount array with locations [0]..[25] for ‘a’ through ‘z’
     */
    public int[] getLetterCount();

    /**
     * @return the wordLengthCount array with locations [0]..[23] with location [i]
     * storing the number of words of length i in the text file. Location [0] is not used.
     * Location [23] holds the count of words of length 23 and higher.
     */
    public int[] getWordLengthCount();

    /**
     * @return the average word length in the text file
     */
    public double getAverageWordLength();
}

TextStatisticsTest.java

package project;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;

public class TextStatisticsTest
{
private final static int PRECISION = 2; //number of digits after floating point to match

private static boolean approxEquals(double x, double y, int precision) {
final double EPSILON = Math.pow(10, -precision);
if (Math.abs(x – y) < EPSILON)
return true;
else
return false;
}

private static void test(TextStatisticsInterface stats,
int numChars,
int numWords,
int numLines,
double avgWordLength,
int[] wordFreq,
int[] letterFreq)
{

if (stats.getCharCount() == numChars){
System.out.println(“Passed! getCharCount()”);
} else {
System.out.println(“—-> Failed ! getCharCount()  correct: ” + numChars + ” generated: ” + stats.getCharCount());
}

if (stats.getWordCount() == numWords) {
System.out.println(“Passed! getWordCount()”);
} else {
System.out.println(“—-> Failed ! getWordCount()  correct: ” + numWords + ” generated: ” + stats.getWordCount());
}
if (stats.getLineCount() == numLines) {
System.out.println(“Passed! getLineCount()”);
} else {
System.out.println(“—-> Failed ! getLineCount()  correct: ” + numLines + ” generated: ” + stats.getLineCount());
}
if (approxEquals(stats.getAverageWordLength(), avgWordLength, PRECISION)) {
System.out.println(“Passed! getAverageWordLength()”);
} else {
System.out.println(“—-> Failed ! getAverageWordLength()  correct: ” + avgWordLength + ” generated: ” + stats.getAverageWordLength());
}

int [] testWordFreq = stats.getWordLengthCount();
if (Arrays.equals(testWordFreq, wordFreq)) {
System.out.println(“Passed! Word length frequencies”);
} else {
System.out.println(“\n—-> Failed ! Word length frequencies\n\n” +
”   correct: ” + Arrays.toString(wordFreq) + “\n” +
” generated: ” + Arrays.toString(testWordFreq) + “\n”);
}

int[] testLetterFreq = stats.getLetterCount();
if (Arrays.equals(testLetterFreq, letterFreq)) {
System.out.println(“Passed! Letter frequencies”);
} else {
System.out.println(“\n—-> Failed ! Letter frequencies\n\n” +
”   correct: ” + Arrays.toString(letterFreq) + “\n” +
” generated: ” + Arrays.toString(testLetterFreq) +”\n”);
}

System.out.println();
}

public static void main(String[] args) throws FileNotFoundException
{
// expected results
String [] textfile = {“etext” + File.separator + “testfile.txt”,
“etext” + File.separator + “Gettysburg-Address.txt”,
“etext” + File.separator + “Alice-in-Wonderland.txt”};
int[][] wordFreq = {{0, 3, 13, 24, 13, 10, 2, 5, 3, 1, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 8, 50, 55, 61, 35, 27, 17, 7, 10, 6, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1705, 4412, 7062, 5782, 3340, 1951, 1569, 723, 448, 181, 108, 34, 11, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0}
};
int[][] letterFreq = {{27, 1, 11, 10, 33, 9, 7, 24, 25, 0, 2, 18, 5, 25, 26, 5, 0, 21, 30, 35, 7, 1, 10, 1, 2, 0},
{107, 18, 32, 61, 175, 28, 33, 81, 74, 0, 3, 47, 14, 86, 96, 17, 1, 84, 53, 132, 25, 27, 28, 0, 13, 0},
{8787, 1474, 2397, 4931, 13569, 2000, 2528, 7372, 7511, 146, 1158, 4713, 2107, 7013, 8141, 1522, 209, 5433,
6495, 10684, 3468, 845, 2674, 148, 2264, 78}
};
int[] numChars = {465, 1622, 148482};
int[] numWords = {79, 281, 27331};
int[] numLines = {11, 39, 3610};
double[] avgWordLength = {4.24, 4.40, 3.94};

for (int i = 0; i < textfile.length; i++) {
File nextFile = new File(textfile[i]);
if (nextFile.exists() && nextFile.canRead()) {
System.out.println(“\nTesting on data file:” + textfile[i] + “\n”);
TextStatisticsInterface stats = new TextStatistics(nextFile);

new  ProcessText(stats);
test(stats, numChars[i], numWords[i], numLines[i],
avgWordLength[i], wordFreq[i], letterFreq[i]);
} else {
System.err.println(“Cannot access test file: ” + textfile[i]);
}
}
}
}

Output Screen:

TextStatisticsInterface.java

LEAVE A REPLY

Please enter your comment!
Please enter your name here