/*************************************************** Starter code for CS182 Assignment 6: Model Merging (Computational Credit) **************************************************/ import java.util.*; import java.io.*; /** Data serves mainly as a wrapper to the getData method, which * extracts data from a file with 1 sentence per line into a Vector; * each element in the returned Vector is itself a Vector of Strings. * * Example use: * * Vector data = Data.getData("a6-training-data.txt"); * */ public class Data { /** Read in file with name filename and build an initial grammar. */ public static Vector getData(String filename){ Vector data = new Vector(); try { // create a BufferedReader based on the file BufferedReader datafile = new BufferedReader(new FileReader(filename)); // read in a line String line = datafile.readLine(); while (line != null) { String sent = line.trim(); // remove any whitespace around line if (!sent.equals("")) { // uncomment below to print out sentences read in // System.out.println("Read sentence: " + sent); data.add(string2Vector(sent)); } line = datafile.readLine(); } } catch(java.io.IOException exception) { System.out.println("IO exception for file " + filename); } // end catch return data; } /** Take a String and use StringTokenizer to split it into a Vector of words. */ public static Vector string2Vector(String s) { Vector v = new Vector(); StringTokenizer st = new StringTokenizer(s); while (st.hasMoreTokens()) { v.add(st.nextToken()); } return v; } }