Он работает, за исключением метода StopWordsRemoval (последний в коде), он не меняет выходные данные по мере необходимости, выходные данные до и после этого метода одинаковы!
Не могли бы вы помочь мне разобраться, в чем проблема? Это мой первый код на Java
import java.io.*;
import java.util.*;
public class Information_Retrieval_Hw1 {
//Global variables
public static BufferedReader buffer;
public static Hashtable wordList = new Hashtable();
public static ArrayList fileMap = new ArrayList();
public static Set tagNames = new HashSet();
//public static ArrayList list;
public static int documentsCount = 0;
public static int totalTokens = 0;
public static int uniqueWords = 0;
public static int tagCount = 0;
public static int singleOccureneWords = 0;
public static ArrayList sortedList;
public Information_Retrieval_Hw1() {
// TODO Auto-generated constructor stub
}
public static void main(String[] args) throws IOException {
String cranfield = "/Users/Manal/Desktop/semster1/IR/assigenment 1/cranfieldDocs";
File cranfieldFiles = new File(cranfield);
ReadFile(cranfieldFiles);
System.out.println("Total number of documents: " + fileMap.size());
//Calculate total number of tokens
totalTokens = CalculateNumberOfTokens(wordList);
System.out.println("Total number Of words = " + totalTokens);
//Calculate number of unique words
uniqueWords = CalculateUniqueWords(wordList);
System.out.println("Total number Of distinct words = " + uniqueWords);
//Calculate number of unique words
singleOccureneWords = CalculateSingleOccurenceWords(wordList);
System.out.println("Total number Of words that occur only once = " + singleOccureneWords);
//Find the 30 most frequent words
FindFiftyMostFrequentWords(wordList);
StopWordsRemoval (cranfieldFiles,wordList);
//reprint all information after removing stopword;
System.out.println("\n***********************************\nAfter removing stop words \n***********************************\n");
//Calculate total number of tokens
totalTokens = CalculateNumberOfTokens(wordList);
System.out.println("Total number Of words = " + totalTokens);
//Calculate number of unique words
uniqueWords = CalculateUniqueWords(wordList);
System.out.println("Total number Of distinct words = " + uniqueWords);
//Calculate number of unique words
singleOccureneWords = CalculateSingleOccurenceWords(wordList);
System.out.println("Total number Of words that occur only once = " + singleOccureneWords);
//Find the 30 most frequent words
FindFiftyMostFrequentWords(wordList);
}
public static void ReadFile(File cranfieldFiles) throws IOException{
for (File file: cranfieldFiles.listFiles())
{
//read files recursively if path contains folder
if(file.isDirectory())
{
ReadFile(file);
}
else
{
documentsCount++;
try
{
buffer = new BufferedReader(new FileReader(file));
}
catch (FileNotFoundException e)
{
System.out.println("File not Found");
}
//find the tags and their count
tagCount = tagCount + TagHandler(file, tagNames);
//find words in the cranfield
TokenHandler(file, tagNames);
}
}
}
public static int TagHandler(File file, Set tagNames) throws IOException
{
String line;
int tag_count = 0;
buffer = new BufferedReader(new FileReader(file));
while((line = buffer.readLine()) != null)
{
/*
* If the line contains a '
Подробнее здесь: https://stackoverflow.com/questions/393 ... ot-working
Мобильная версия