import java.util.*; import java.io.*; public class NaiveBayes { /** Takes the names of the directories containing the data, reads in all files in the directory, and stores each file as an Instance object whose class label is the name of the directory it was in. The entire list of files is stored in an ArrayList named instanceList. You will pass instanceList to the appropriate methods. */ public static void main(String[] argv) throws IOException { if (argv.length < 1) { System.out.println("usage: NaiveBayes dir1, ..."); System.exit(0); } for (int di = 0; di < argv.length; di++) { File dir = new File(argv[di]); if (! dir.isDirectory()) throw new IllegalArgumentException("argument " + di + " is not a directory: " + argv[di]); File[] files = dir.listFiles(); ArrayList instanceList = new ArrayList(); for (int fi = 0; fi < files.length; fi++) instanceList.add(Instance.createInstance(files[fi])); // You need to add methods to classify the data } } } class Instance { // This contains the mapping from tokens to integers used // for indexing into the data array of Instance objects static Hashtable dictionary = new Hashtable(); // This contains a list from int to Integer for efficiency reasons static ArrayList dictIndices = new ArrayList(); String m_name; String m_label; Hashtable m_data; public static Instance createInstance(File file) throws IOException { String name = file.getName(); String label = file.getParent(); Hashtable data = new Hashtable(); BufferedReader reader = new BufferedReader(new FileReader(file)); String line = reader.readLine(); while (line != null) { // Here a feature is considered to be any space-delimited token String[] tokens = line.split("\\s+"); for (int ti = 0; ti < tokens.length; ti++) { // This creates an Instance and updates the dictionary // with features it has not seen before. if (dictionary.get(tokens[ti]) == null) { Integer newIdx = new Integer(dictIndices.size()); dictIndices.add(newIdx); dictionary.put(tokens[ti], newIdx); } Object key = dictionary.get(tokens[ti]); double oldVal = data.get(key) == null ? 0 : ((Double) data.get(key)).doubleValue(); data.put(key, new Double(oldVal+1)); } line = reader.readLine(); } reader.close(); return new Instance(name, label, data); } protected Instance(String name, String label, Hashtable data) { m_name = name; m_label = label; m_data = data; } public String getName() { return m_name; } public String getLabel() { return m_label; } public double getValue(int index) { return index >= dictIndices.size() || m_data.get(dictIndices.get(index)) == null ? 0 : ((Double) m_data.get(dictIndices.get(index))).doubleValue(); } }