001 package org.maltparser.parser;
002
003 import java.io.File;
004 import java.io.IOException;
005 import java.util.Formatter;
006 import java.util.regex.Pattern;
007
008 import org.apache.log4j.FileAppender;
009 import org.apache.log4j.Level;
010 import org.apache.log4j.Logger;
011 import org.apache.log4j.PatternLayout;
012 import org.maltparser.core.config.ConfigurationDir;
013 import org.maltparser.core.config.ConfigurationException;
014 import org.maltparser.core.config.ConfigurationRegistry;
015 import org.maltparser.core.exception.MaltChainedException;
016 import org.maltparser.core.helper.SystemLogger;
017 import org.maltparser.core.helper.Util;
018 import org.maltparser.core.io.dataformat.DataFormatInstance;
019 import org.maltparser.core.options.OptionManager;
020 import org.maltparser.core.propagation.PropagationManager;
021 import org.maltparser.core.symbol.SymbolTableHandler;
022 import org.maltparser.core.syntaxgraph.DependencyStructure;
023 import org.maltparser.parser.guide.ClassifierGuide;
024
025 /**
026 * @author Johan Hall
027 *
028 */
029 public class SingleMalt implements DependencyParserConfig {
030 public static final int LEARN = 0;
031 public static final int PARSE = 1;
032 protected ConfigurationDir configDir;
033 protected Logger configLogger;
034 protected int optionContainerIndex;
035 protected Algorithm parsingAlgorithm = null;
036 protected int mode;
037 protected ConfigurationRegistry registry;
038 protected SymbolTableHandler symbolTableHandler;
039 protected long startTime;
040 protected long endTime;
041 protected int nIterations = 0;
042 protected PropagationManager propagationManager;
043
044 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
045
046 this.optionContainerIndex = containerIndex;
047 this.mode = mode;
048 setConfigurationDir(configDir);
049 startTime = System.currentTimeMillis();
050 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
051 registry = new ConfigurationRegistry();
052 symbolTableHandler = dataFormatInstance.getSymbolTables();
053
054 if (mode == SingleMalt.LEARN) {
055 checkOptionDependency();
056 }
057 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
058 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
059 // registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
060 initPropagation();
061 initParsingAlgorithm();
062
063 }
064
065 private void initPropagation() throws MaltChainedException {
066 String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
067 if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
068 return;
069 }
070 propagationManager = new PropagationManager(configDir, symbolTableHandler);
071 if (mode == SingleMalt.LEARN) {
072 propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
073 OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
074 }
075 getConfigLogger().info(" Propagation : " + propagationSpecFileName+"\n");
076 propagationManager.loadSpecification(propagationSpecFileName);
077 }
078
079 /**
080 * Initialize the parsing algorithm
081 *
082 * @throws MaltChainedException
083 */
084 protected void initParsingAlgorithm() throws MaltChainedException {
085 if (mode == LEARN) {
086 parsingAlgorithm = new BatchTrainer(this);
087 } else if (mode == PARSE) {
088 parsingAlgorithm = new DeterministicParser(this);
089 }
090 }
091
092 public void addRegistry(Class<?> clazz, Object o) {
093 registry.put(clazz, o);
094 }
095
096 public void process(Object[] arguments) throws MaltChainedException {
097 if (mode == LEARN) {
098 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
099 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
100 }
101 DependencyStructure systemGraph = (DependencyStructure)arguments[0];
102 DependencyStructure goldGraph = (DependencyStructure)arguments[1];
103 if (systemGraph.hasTokens() && getGuide() != null) {
104 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
105 }
106 } else if (mode == PARSE) {
107 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
108 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
109 }
110 DependencyStructure processGraph = (DependencyStructure)arguments[0];
111 if (processGraph.hasTokens()) {
112 ((Parser)getAlgorithm()).parse(processGraph);
113 }
114 }
115 }
116
117 public void parse(DependencyStructure graph) throws MaltChainedException {
118 if (graph.hasTokens()) {
119 ((Parser)getAlgorithm()).parse(graph);
120 }
121 }
122
123 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
124 if (oracleGraph.hasTokens()) {
125 if (getGuide() != null) {
126 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph));
127 } else {
128 ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph);
129 }
130 }
131 }
132
133 public void train() throws MaltChainedException {
134 if (getGuide() == null) {
135 ((Trainer)getAlgorithm()).train();
136 }
137 }
138
139 public void terminate(Object[] arguments) throws MaltChainedException {
140 // if (getAlgorithm() instanceof Trainer) {
141 // ((Trainer)getAlgorithm()).terminate();
142 // }
143 getAlgorithm().terminate();
144 if (getGuide() != null) {
145 getGuide().terminate();
146 }
147 if (mode == LEARN) {
148 endTime = System.currentTimeMillis();
149 long elapsed = endTime - startTime;
150 if (configLogger.isInfoEnabled()) {
151 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
152 }
153 } else if (mode == PARSE) {
154 endTime = System.currentTimeMillis();
155 long elapsed = endTime - startTime;
156 if (configLogger.isInfoEnabled()) {
157 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
158 }
159 }
160 if (SystemLogger.logger() != configLogger && configLogger != null) {
161 configLogger.removeAllAppenders();
162 }
163 }
164
165 /**
166 * Initialize the configuration logger
167 *
168 * @return the configuration logger
169 * @throws MaltChainedException
170 */
171 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
172 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
173 configLogger = Logger.getLogger(logfile);
174 FileAppender fileAppender = null;
175 try {
176 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
177 } catch(IOException e) {
178 throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
179 }
180 fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
181 configLogger.addAppender(fileAppender);
182 configLogger.setLevel(Level.toLevel(level, Level.INFO));
183 } else {
184 configLogger = SystemLogger.logger();
185 }
186
187 return configLogger;
188 }
189
190 public Logger getConfigLogger() {
191 return configLogger;
192 }
193
194 public void setConfigLogger(Logger logger) {
195 configLogger = logger;
196 }
197
198 public ConfigurationDir getConfigurationDir() {
199 return configDir;
200 }
201
202 public void setConfigurationDir(ConfigurationDir configDir) {
203 this.configDir = configDir;
204 }
205
206 public int getMode() {
207 return mode;
208 }
209
210 public ConfigurationRegistry getRegistry() {
211 return registry;
212 }
213
214 public void setRegistry(ConfigurationRegistry registry) {
215 this.registry = registry;
216 }
217
218 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
219 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
220 }
221
222 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
223 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
224 }
225
226 public OptionManager getOptionManager() throws MaltChainedException {
227 return OptionManager.instance();
228 }
229 /******************************** MaltParserConfiguration specific ********************************/
230
231 /**
232 * Returns the list of symbol tables
233 *
234 * @return the list of symbol tables
235 */
236 public SymbolTableHandler getSymbolTables() {
237 return symbolTableHandler;
238 }
239
240 public PropagationManager getPropagationManager() {
241 return propagationManager;
242 }
243
244 public Algorithm getAlgorithm() {
245 return parsingAlgorithm;
246 }
247 /**
248 * Returns the guide
249 *
250 * @return the guide
251 */
252 public ClassifierGuide getGuide() {
253 return parsingAlgorithm.getGuide();
254 }
255
256 public void checkOptionDependency() throws MaltChainedException {
257 try {
258 if (configDir.getInfoFileWriter() != null) {
259 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
260 }
261
262 // Copy the feature model file into the configuration directory
263 String featureModelFileName = getOptionValue("guide", "features").toString().trim();
264 if (featureModelFileName.equals("")) {
265 // use default feature model depending on the selected parser algorithm
266 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
267 featureModelFileName = getOptionValue("guide", "features").toString().trim();
268 featureModelFileName = featureModelFileName.replace("{learner}", getOptionValueString("guide", "learner"));
269 featureModelFileName = configDir.copyToConfig(Util.findURLinJars(featureModelFileName));
270 } else {
271 featureModelFileName = configDir.copyToConfig(featureModelFileName);
272 }
273 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
274 if (configDir.getInfoFileWriter() != null) {
275 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n");
276 }
277
278 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
279 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
280 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
281 if (configDir.getInfoFileWriter() != null) {
282 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n");
283 }
284 }
285 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
286 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
287 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
288 if (configDir.getInfoFileWriter() != null) {
289 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n");
290 }
291 }
292
293 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
294 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
295 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
296 StringBuilder newDecisionSettings = new StringBuilder();
297 // if ((Boolean)getOptionValue("malt0.4", "behavior") == true) {
298 // decisionSettings = "T.TRANS+A.DEPREL";
299 // }
300 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
301 decisionSettings = "T.TRANS+A.DEPREL";
302 } else {
303 decisionSettings = decisionSettings.toUpperCase();
304 }
305
306 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
307 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
308 newDecisionSettings.append("+A.PPLIFTED");
309 }
310 }
311 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
312 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
313 newDecisionSettings.append("+A.PPPATH");
314 }
315 }
316 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
317 newDecisionSettings.append("+A.PPCOVERED");
318 }
319 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
320 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
321 if (configDir.getInfoFileWriter() != null) {
322 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n");
323 }
324 }
325 if (configDir.getInfoFileWriter() != null) {
326 configDir.getInfoFileWriter().flush();
327 }
328 } catch (IOException e) {
329 throw new ConfigurationException("Could not write to the configuration information file. ", e);
330 }
331 }
332 }