001/* 002 * Copyright (c) 2011, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.corpus.timeml; 025 026import java.io.File; 027import java.io.FileWriter; 028import java.io.IOException; 029import java.io.OutputStream; 030import java.io.PrintWriter; 031import java.net.URI; 032import java.util.ArrayList; 033import java.util.Arrays; 034import java.util.HashMap; 035import java.util.List; 036import java.util.Map; 037 038import org.apache.uima.UimaContext; 039import org.apache.uima.analysis_engine.AnalysisEngineDescription; 040import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 041import org.apache.uima.jcas.JCas; 042import org.apache.uima.resource.ResourceInitializationException; 043import org.cleartk.timeml.type.Anchor; 044import org.cleartk.timeml.type.DocumentCreationTime; 045import org.cleartk.timeml.type.Event; 046import org.cleartk.timeml.type.TemporalLink; 047import org.cleartk.timeml.type.Time; 048import org.cleartk.token.type.Sentence; 049import org.cleartk.token.type.Token; 050import org.cleartk.util.ViewUriUtil; 051import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 052import org.apache.uima.fit.descriptor.ConfigurationParameter; 053import org.apache.uima.fit.factory.AnalysisEngineFactory; 054import org.apache.uima.fit.util.JCasUtil; 055 056import com.google.common.base.Joiner; 057 058/** 059 * <br> 060 * Copyright (c) 2011, Regents of the University of Colorado <br> 061 * All rights reserved. 062 * 063 * @author Steven Bethard 064 */ 065public class TempEval2010Writer extends JCasAnnotator_ImplBase { 066 067 public static AnalysisEngineDescription getDescription() throws ResourceInitializationException { 068 return AnalysisEngineFactory.createEngineDescription(TempEval2010Writer.class); 069 } 070 071 @ConfigurationParameter( 072 name = PARAM_OUTPUT_DIRECTORY, 073 mandatory = true, 074 description = "The directory where the TempEval .tab " + "files should be written.") 075 private File outputDirectory; 076 077 @ConfigurationParameter( 078 name = PARAM_TEXT_VIEW, 079 mandatory = true, 080 description = "View containing the document text.") 081 private String textView; 082 083 @ConfigurationParameter( 084 name = PARAM_DOCUMENT_CREATION_TIME_VIEW, 085 mandatory = false, 086 description = "View containing DocumentCreationTime annotations. If " 087 + "provided, the document creation times file will be written.") 088 private String documentCreationTimeView; 089 090 @ConfigurationParameter( 091 name = PARAM_TIME_EXTENT_VIEW, 092 mandatory = false, 093 description = "View containing Time annotations. If provided, the time " 094 + "extents file will be written.") 095 private String timeExtentView; 096 097 @ConfigurationParameter( 098 name = PARAM_TIME_ATTRIBUTE_VIEW, 099 mandatory = false, 100 description = "View containing Time annotations with their attributes. " 101 + "If provided, the time attributes file will be written.") 102 private String timeAttributeView; 103 104 @ConfigurationParameter( 105 name = PARAM_EVENT_EXTENT_VIEW, 106 mandatory = false, 107 description = "View containing Event annotations. If provided, the " 108 + "event extents will be written.") 109 private String eventExtentView; 110 111 @ConfigurationParameter( 112 name = PARAM_EVENT_ATTRIBUTE_VIEW, 113 mandatory = false, 114 description = "View containing Event annotations with their attributes. " 115 + "If provided, the event attributes file will be written.") 116 private String eventAttributeView; 117 118 @ConfigurationParameter( 119 name = PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEW, 120 mandatory = false, 121 description = "View containing TemporalLink annotations between events " 122 + "and the document creation time. If provided, the corresponding temporal links file will " 123 + "be written.") 124 private String temporalLinkEventToDocumentCreationTimeView; 125 126 @ConfigurationParameter( 127 name = PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEW, 128 mandatory = false, 129 description = "View containing TemporalLink annotations between events " 130 + "and times within the same sentence. If provided, the corresponding temporal links file " 131 + "will be written.") 132 private String temporalLinkEventToSameSentenceTimeView; 133 134 @ConfigurationParameter( 135 name = PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEW, 136 mandatory = false, 137 description = "View containing TemporalLink annotations between events " 138 + "and syntactically dominated events. If provided, the corresponding temporal links file " 139 + "will be written.") 140 private String temporalLinkEventToSubordinatedEventView; 141 142 @ConfigurationParameter( 143 name = PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEW, 144 mandatory = false, 145 description = "View containing TemporalLink annotations between main " 146 + "events in adjacent sentences. If provided, the corresponding temporal links file will be " 147 + "written.") 148 private String temporalLinkMainEventToNextSentenceMainEventView; 149 150 public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory"; 151 152 public static final String PARAM_TEXT_VIEW = "textView"; 153 154 public static final String PARAM_DOCUMENT_CREATION_TIME_VIEW = "documentCreationTimeView"; 155 156 public static final String PARAM_TIME_EXTENT_VIEW = "timeExtentView"; 157 158 public static final String PARAM_TIME_ATTRIBUTE_VIEW = "timeAttributeView"; 159 160 public static final String PARAM_EVENT_EXTENT_VIEW = "eventExtentView"; 161 162 public static final String PARAM_EVENT_ATTRIBUTE_VIEW = "eventAttributeView"; 163 164 public static final String PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEW = "temporalLinkEventToDocumentCreationTimeView"; 165 166 public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEW = "temporalLinkEventToSameSentenceTimeView"; 167 168 public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEW = "temporalLinkEventToSubordinatedEventView"; 169 170 public static final String PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEW = "temporalLinkMainEventToNextSentenceMainEventView"; 171 172 private List<PrintWriter> writers; 173 174 private PrintWriter baseWriter; 175 176 private PrintWriter dctWriter; 177 178 private PrintWriter timexExtentWriter; 179 180 private PrintWriter timexAttributeWriter; 181 182 private PrintWriter eventExtentWriter; 183 184 private PrintWriter eventAttributeWriter; 185 186 private PrintWriter tlinkDCTEventWriter; 187 188 private PrintWriter tlinkMainEventsWriter; 189 190 private PrintWriter tlinkSubordinatedEventsWriter; 191 192 private PrintWriter tlinkTimexEventWriter; 193 194 @Override 195 public void initialize(UimaContext context) throws ResourceInitializationException { 196 super.initialize(context); 197 if (!this.outputDirectory.exists()) { 198 this.outputDirectory.mkdirs(); 199 } 200 this.writers = new ArrayList<PrintWriter>(); 201 this.baseWriter = this.createWriter( 202 TempEval2010CollectionReader.BASE_SEGMENTATION_VIEW_NAME, 203 this.textView); 204 this.dctWriter = this.createWriter( 205 TempEval2010CollectionReader.DCT_VIEW_NAME, 206 this.documentCreationTimeView); 207 this.timexExtentWriter = this.createWriter( 208 TempEval2010CollectionReader.TIMEX_EXTENTS_VIEW_NAME, 209 this.timeExtentView); 210 this.timexAttributeWriter = this.createWriter( 211 TempEval2010CollectionReader.TIMEX_ATTRIBUTES_VIEW_NAME, 212 this.timeAttributeView); 213 this.eventExtentWriter = this.createWriter( 214 TempEval2010CollectionReader.EVENT_EXTENTS_VIEW_NAME, 215 this.eventExtentView); 216 this.eventAttributeWriter = this.createWriter( 217 TempEval2010CollectionReader.EVENT_ATTRIBUTES_VIEW_NAME, 218 this.eventAttributeView); 219 this.tlinkDCTEventWriter = this.createWriter( 220 TempEval2010CollectionReader.TLINK_DCT_EVENT_VIEW_NAME, 221 this.temporalLinkEventToDocumentCreationTimeView); 222 this.tlinkTimexEventWriter = this.createWriter( 223 TempEval2010CollectionReader.TLINK_TIMEX_EVENT_VIEW_NAME, 224 this.temporalLinkEventToSameSentenceTimeView); 225 this.tlinkSubordinatedEventsWriter = this.createWriter( 226 TempEval2010CollectionReader.TLINK_SUBORDINATED_EVENTS_VIEW_NAME, 227 this.temporalLinkEventToSubordinatedEventView); 228 this.tlinkMainEventsWriter = this.createWriter( 229 TempEval2010CollectionReader.TLINK_MAIN_EVENTS_VIEW_NAME, 230 this.temporalLinkMainEventToNextSentenceMainEventView); 231 } 232 233 @Override 234 public void process(JCas jCas) throws AnalysisEngineProcessException { 235 // determine the filename 236 URI uri = ViewUriUtil.getURI(jCas); 237 String fileName = uri.getFragment(); 238 if (fileName == null) { 239 fileName = new File(uri.getPath()).getName(); 240 } 241 242 // get the view with text, sentences and tokens 243 JCas textJCas = JCasUtil.getView(jCas, this.textView, false); 244 245 // write the document creation time 246 if (this.documentCreationTimeView != null) { 247 JCas dctJCas = JCasUtil.getView(jCas, this.documentCreationTimeView, false); 248 for (DocumentCreationTime time : JCasUtil.select(dctJCas, DocumentCreationTime.class)) { 249 this.write(this.dctWriter, fileName, time.getValue().replaceAll("-", "")); 250 } 251 } 252 253 // align tokens to times 254 Map<Token, Time> tokenTimeExtents = new HashMap<Token, Time>(); 255 if (this.timeExtentView != null) { 256 JCas timeExtentJCas = JCasUtil.getView(jCas, this.timeExtentView, false); 257 for (Time time : JCasUtil.select(timeExtentJCas, Time.class)) { 258 for (Token token : JCasUtil.selectCovered(textJCas, Token.class, time)) { 259 tokenTimeExtents.put(token, time); 260 } 261 } 262 } 263 Map<Token, Time> tokenTimeAttributes = new HashMap<Token, Time>(); 264 if (this.timeAttributeView != null) { 265 JCas timeAttributeJCas = JCasUtil.getView(jCas, this.timeAttributeView, false); 266 for (Time time : JCasUtil.select(timeAttributeJCas, Time.class)) { 267 for (Token token : JCasUtil.selectCovered(textJCas, Token.class, time)) { 268 tokenTimeAttributes.put(token, time); 269 } 270 } 271 } 272 273 // align tokens to events 274 Map<Token, Event> tokenEventExtents = new HashMap<Token, Event>(); 275 if (this.eventExtentView != null) { 276 JCas eventExtentJCas = JCasUtil.getView(jCas, this.eventExtentView, false); 277 for (Event event : JCasUtil.select(eventExtentJCas, Event.class)) { 278 for (Token token : JCasUtil.selectCovered(textJCas, Token.class, event)) { 279 tokenEventExtents.put(token, event); 280 } 281 } 282 } 283 284 Map<Token, Event> tokenEventAttributes = new HashMap<Token, Event>(); 285 if (this.eventAttributeView != null) { 286 JCas eventAttributeJCas = JCasUtil.getView(jCas, this.eventAttributeView, false); 287 for (Event event : JCasUtil.select(eventAttributeJCas, Event.class)) { 288 for (Token token : JCasUtil.selectCovered(textJCas, Token.class, event)) { 289 tokenEventAttributes.put(token, event); 290 } 291 } 292 } 293 294 // walk through tokens by sentence, writing tokens, times, events, etc. 295 int sentIndex = -1; 296 for (Sentence sentence : JCasUtil.select(textJCas, Sentence.class)) { 297 sentIndex += 1; 298 int tokenIndex = -1; 299 for (Token token : JCasUtil.selectCovered(textJCas, Token.class, sentence)) { 300 tokenIndex += 1; 301 302 // write the token to the segmentation file 303 this.write(this.baseWriter, fileName, sentIndex, tokenIndex, token.getCoveredText()); 304 305 // write the time extent and attributes 306 this.writeAnchors( 307 this.timexExtentWriter, 308 this.timexAttributeWriter, 309 tokenTimeExtents, 310 tokenTimeAttributes, 311 "timex3", 312 token, 313 fileName, 314 sentIndex, 315 tokenIndex, 316 new AttributeGetter<Time>() { 317 @Override 318 public List<Attribute> getAttributes(Time time) { 319 Attribute value = new Attribute("value", time.getValue()); 320 Attribute type = new Attribute("type", time.getTimeType()); 321 return Arrays.asList(value, type); 322 } 323 }); 324 325 // write the event extent and attributes 326 this.writeAnchors( 327 this.eventExtentWriter, 328 this.eventAttributeWriter, 329 tokenEventExtents, 330 tokenEventAttributes, 331 "event", 332 token, 333 fileName, 334 sentIndex, 335 tokenIndex, 336 new AttributeGetter<Event>() { 337 @Override 338 public List<Attribute> getAttributes(Event event) { 339 Attribute polarity = new Attribute("polarity", event.getPolarity()); 340 Attribute modality = new Attribute("modality", event.getModality()); 341 Attribute pos = new Attribute("pos", event.getPos()); 342 Attribute tense = new Attribute("tense", event.getTense()); 343 Attribute aspect = new Attribute("aspect", event.getAspect()); 344 Attribute eventClass = new Attribute("class", event.getEventClass()); 345 return Arrays.asList(polarity, modality, pos, tense, aspect, eventClass); 346 } 347 }); 348 } 349 } 350 351 // write the temporal links 352 this.writeTemporalLinks( 353 this.tlinkDCTEventWriter, 354 jCas, 355 this.temporalLinkEventToDocumentCreationTimeView, 356 fileName); 357 this.writeTemporalLinks( 358 this.tlinkTimexEventWriter, 359 jCas, 360 this.temporalLinkEventToSameSentenceTimeView, 361 fileName); 362 this.writeTemporalLinks( 363 this.tlinkSubordinatedEventsWriter, 364 jCas, 365 this.temporalLinkEventToSubordinatedEventView, 366 fileName); 367 this.writeTemporalLinks( 368 this.tlinkMainEventsWriter, 369 jCas, 370 this.temporalLinkMainEventToNextSentenceMainEventView, 371 fileName); 372 } 373 374 @Override 375 public void batchProcessComplete() throws AnalysisEngineProcessException { 376 super.batchProcessComplete(); 377 for (PrintWriter writer : this.writers) { 378 writer.flush(); 379 } 380 } 381 382 @Override 383 public void collectionProcessComplete() throws AnalysisEngineProcessException { 384 super.collectionProcessComplete(); 385 for (PrintWriter writer : this.writers) { 386 writer.close(); 387 } 388 } 389 390 private PrintWriter createWriter(String tabFileName, String viewParam) 391 throws ResourceInitializationException { 392 PrintWriter writer; 393 if (viewParam != null) { 394 try { 395 writer = new PrintWriter(new FileWriter(new File(this.outputDirectory, tabFileName))); 396 } catch (IOException e) { 397 throw new ResourceInitializationException(e); 398 } 399 } else { 400 writer = new PrintWriter(new OutputStream() { 401 @Override 402 public void write(int b) throws IOException { 403 // do nothing 404 } 405 }); 406 } 407 this.writers.add(writer); 408 return writer; 409 } 410 411 private void write(PrintWriter writer, Object... columns) { 412 writer.println(Joiner.on('\t').join(columns)); 413 } 414 415 private static class Attribute { 416 public String name; 417 418 public Object value; 419 420 public Attribute(String name, Object value) { 421 this.name = name; 422 this.value = value; 423 } 424 } 425 426 private static interface AttributeGetter<T extends Anchor> { 427 public List<Attribute> getAttributes(T anchor); 428 } 429 430 private <T extends Anchor> void writeAnchors( 431 PrintWriter extentWriter, 432 PrintWriter attrWriter, 433 Map<Token, T> tokenAnchorExtents, 434 Map<Token, T> tokenAnchorAttributes, 435 String anchorType, 436 Token token, 437 String fileName, 438 int sentIndex, 439 int tokenIndex, 440 AttributeGetter<T> attributeGetter) { 441 T anchor = tokenAnchorExtents.get(token); 442 if (anchor != null) { 443 String id = anchor.getId(); 444 this.write(extentWriter, fileName, sentIndex, tokenIndex, anchorType, id, "1"); 445 } 446 anchor = tokenAnchorAttributes.get(token); 447 if (anchor != null) { 448 String id = anchor.getId(); 449 boolean isFirstToken = token.getBegin() == anchor.getBegin(); 450 if (isFirstToken) { 451 for (Attribute attr : attributeGetter.getAttributes(anchor)) { 452 if (attr.value != null) { 453 this.write( 454 attrWriter, 455 fileName, 456 sentIndex, 457 tokenIndex, 458 anchorType, 459 id, 460 "1", 461 attr.name, 462 attr.value); 463 } 464 } 465 } 466 } 467 } 468 469 private void writeTemporalLinks(PrintWriter writer, JCas jCas, String viewName, String fileName) { 470 if (viewName != null) { 471 JCas view = JCasUtil.getView(jCas, viewName, false); 472 for (TemporalLink tlink : JCasUtil.select(view, TemporalLink.class)) { 473 String relation = tlink.getRelationType(); 474 if (relation == null) { 475 relation = "NONE"; 476 } 477 this.write(writer, fileName, tlink.getSource().getId(), tlink.getTarget().getId(), relation); 478 } 479 } 480 } 481}