Today share a tool class – word frequency statistician.

instructions

The following code implements the function of counting the number of occurrences of a word in a file.

The efficiency of statistics is accelerated through multithreading.

The relevant knowledge

  • Regular expression
  • recursive
  • Callable
  • Future
  • Executors

code

import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.*; import java.util.function.Predicate; import java.util.regex.Pattern; /** * Count the number of occurrences of a word in a text file under the specified path, including all subdirectories. * <p> * uses UTF-8 encoding. * <p> * Use multiple threads to read the contents of a file and count the sum of the execution results of multiple threads. ** @author ijiangtao.net */ public class FilesWordCounter {/** * separate with non-letter delimiters. * <p> * For example, if the number of occurrences of "hello" is counted, "hello-hello" is counted twice, and "helloHello" is not counted. * <p> * Precompile regular expressions to improve execution speed. */ private static Pattern P = Pattern.compile("\\PL+"); /** * Count the number of word occurrences in the text ** @param word * @param path * @return
     */
    public static long occurrences(String word, Path path) {
        try {
            String contents = new String(Files.readAllBytes(path),
                    StandardCharsets.UTF_8);
            return P.splitAsStream(contents)
                    .filter(Predicate.isEqual(word))
                    .count();
        } catch (IOException ex) {
            ex.printStackTrace();
            return0; }} /** * Multiple threads count the number of word occurrences in all files under the specified path ** @param word * @param path * @return*/ public static long countWordInAllFiles(String word, String path) { ExecutorService executor = null; Set<Path> Paths = getAllFilePaths(Path, null); //descendants(Paths.get(path)); There will be a Java nio. File. AccessDeniedException??? List<Callable<Long>> tasks = new ArrayList<>();for (Path p : paths) {
                tasks.add(() -> {
                    returnoccurrences(word, p); }); }} int processors = runtime.getruntime ().availableprocessors (); executor = Executors.newFixedThreadPool(processors); // Call all Callable methods count the number of occurrences of word in each file and place the results into Future List<Future<Long>> results = executor.invokeall (tasks); Long total = 0;for (Future<Long> result : results) {
                total += result.get();
            }


            returntotal; } catch (Exception e) { e.printStackTrace(); } finally {// Close the thread poolif (null != executor) {
                executor.shutdown();
            }
        }


        return0; } /** * recursively gets the set of paths for all files in the specified directory. * * @param fileDir * @param paths * @return
     */
    private static Set<Path> getAllFilePaths(String fileDir, Set<Path> paths) {
        if(null == paths) { paths = new HashSet<>(); } File File = new File(fileDir); File[] files = file.listFiles(); // If the directory is empty, exitif (files == null) {
            returnpaths; } // Go through all the files in the directoryfor (File f : files) {
            if (f.isFile()) {
                Path path = Paths.get(f.getAbsolutePath());
                paths.add(path);
            } else if(f.isDirectory()) { getAllFilePaths(f.getAbsolutePath(), paths); }}returnpaths; } public static void main(String[] args) throws InterruptedException, ExecutionException, IOException {// Specify the word you want to count and the directory where the file resides to test system.out.println ("Occurrences of String: " + countWordInAllFiles("hello"."C:\\1files\\test\\")); }}Copy the code

Title: Imprisoned Marius by Germain Drouet