001/*
002 * Copyright (c) 2011, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.corpus.timeml;
025
026import java.io.File;
027import java.io.FileWriter;
028import java.io.IOException;
029import java.io.OutputStream;
030import java.io.PrintWriter;
031import java.net.URI;
032import java.util.ArrayList;
033import java.util.Arrays;
034import java.util.HashMap;
035import java.util.List;
036import java.util.Map;
037
038import org.apache.uima.UimaContext;
039import org.apache.uima.analysis_engine.AnalysisEngineDescription;
040import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
041import org.apache.uima.jcas.JCas;
042import org.apache.uima.resource.ResourceInitializationException;
043import org.cleartk.timeml.type.Anchor;
044import org.cleartk.timeml.type.DocumentCreationTime;
045import org.cleartk.timeml.type.Event;
046import org.cleartk.timeml.type.TemporalLink;
047import org.cleartk.timeml.type.Time;
048import org.cleartk.token.type.Sentence;
049import org.cleartk.token.type.Token;
050import org.cleartk.util.ViewUriUtil;
051import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
052import org.apache.uima.fit.descriptor.ConfigurationParameter;
053import org.apache.uima.fit.factory.AnalysisEngineFactory;
054import org.apache.uima.fit.util.JCasUtil;
055
056import com.google.common.base.Joiner;
057
058/**
059 * <br>
060 * Copyright (c) 2011, Regents of the University of Colorado <br>
061 * All rights reserved.
062 * 
063 * @author Steven Bethard
064 */
065public class TempEval2010Writer extends JCasAnnotator_ImplBase {
066
067  public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
068    return AnalysisEngineFactory.createEngineDescription(TempEval2010Writer.class);
069  }
070
071  @ConfigurationParameter(
072      name = PARAM_OUTPUT_DIRECTORY,
073      mandatory = true,
074      description = "The directory where the TempEval .tab " + "files should be written.")
075  private File outputDirectory;
076
077  @ConfigurationParameter(
078      name = PARAM_TEXT_VIEW,
079      mandatory = true,
080      description = "View containing the document text.")
081  private String textView;
082
083  @ConfigurationParameter(
084      name = PARAM_DOCUMENT_CREATION_TIME_VIEW,
085      mandatory = false,
086      description = "View containing DocumentCreationTime annotations. If "
087          + "provided, the document creation times file will be written.")
088  private String documentCreationTimeView;
089
090  @ConfigurationParameter(
091      name = PARAM_TIME_EXTENT_VIEW,
092      mandatory = false,
093      description = "View containing Time annotations. If provided, the time "
094          + "extents file will be written.")
095  private String timeExtentView;
096
097  @ConfigurationParameter(
098      name = PARAM_TIME_ATTRIBUTE_VIEW,
099      mandatory = false,
100      description = "View containing Time annotations with their attributes. "
101          + "If provided, the time attributes file will be written.")
102  private String timeAttributeView;
103
104  @ConfigurationParameter(
105      name = PARAM_EVENT_EXTENT_VIEW,
106      mandatory = false,
107      description = "View containing Event annotations. If provided, the "
108          + "event extents will be written.")
109  private String eventExtentView;
110
111  @ConfigurationParameter(
112      name = PARAM_EVENT_ATTRIBUTE_VIEW,
113      mandatory = false,
114      description = "View containing Event annotations with their attributes. "
115          + "If provided, the event attributes file will be written.")
116  private String eventAttributeView;
117
118  @ConfigurationParameter(
119      name = PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEW,
120      mandatory = false,
121      description = "View containing TemporalLink annotations between events "
122          + "and the document creation time. If provided, the corresponding temporal links file will "
123          + "be written.")
124  private String temporalLinkEventToDocumentCreationTimeView;
125
126  @ConfigurationParameter(
127      name = PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEW,
128      mandatory = false,
129      description = "View containing TemporalLink annotations between events "
130          + "and times within the same sentence. If provided, the corresponding temporal links file "
131          + "will be written.")
132  private String temporalLinkEventToSameSentenceTimeView;
133
134  @ConfigurationParameter(
135      name = PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEW,
136      mandatory = false,
137      description = "View containing TemporalLink annotations between events "
138          + "and syntactically dominated events. If provided, the corresponding temporal links file "
139          + "will be written.")
140  private String temporalLinkEventToSubordinatedEventView;
141
142  @ConfigurationParameter(
143      name = PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEW,
144      mandatory = false,
145      description = "View containing TemporalLink annotations between main "
146          + "events in adjacent sentences. If provided, the corresponding temporal links file will be "
147          + "written.")
148  private String temporalLinkMainEventToNextSentenceMainEventView;
149
150  public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory";
151
152  public static final String PARAM_TEXT_VIEW = "textView";
153
154  public static final String PARAM_DOCUMENT_CREATION_TIME_VIEW = "documentCreationTimeView";
155
156  public static final String PARAM_TIME_EXTENT_VIEW = "timeExtentView";
157
158  public static final String PARAM_TIME_ATTRIBUTE_VIEW = "timeAttributeView";
159
160  public static final String PARAM_EVENT_EXTENT_VIEW = "eventExtentView";
161
162  public static final String PARAM_EVENT_ATTRIBUTE_VIEW = "eventAttributeView";
163
164  public static final String PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEW = "temporalLinkEventToDocumentCreationTimeView";
165
166  public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEW = "temporalLinkEventToSameSentenceTimeView";
167
168  public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEW = "temporalLinkEventToSubordinatedEventView";
169
170  public static final String PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEW = "temporalLinkMainEventToNextSentenceMainEventView";
171
172  private List<PrintWriter> writers;
173
174  private PrintWriter baseWriter;
175
176  private PrintWriter dctWriter;
177
178  private PrintWriter timexExtentWriter;
179
180  private PrintWriter timexAttributeWriter;
181
182  private PrintWriter eventExtentWriter;
183
184  private PrintWriter eventAttributeWriter;
185
186  private PrintWriter tlinkDCTEventWriter;
187
188  private PrintWriter tlinkMainEventsWriter;
189
190  private PrintWriter tlinkSubordinatedEventsWriter;
191
192  private PrintWriter tlinkTimexEventWriter;
193
194  @Override
195  public void initialize(UimaContext context) throws ResourceInitializationException {
196    super.initialize(context);
197    if (!this.outputDirectory.exists()) {
198      this.outputDirectory.mkdirs();
199    }
200    this.writers = new ArrayList<PrintWriter>();
201    this.baseWriter = this.createWriter(
202        TempEval2010CollectionReader.BASE_SEGMENTATION_VIEW_NAME,
203        this.textView);
204    this.dctWriter = this.createWriter(
205        TempEval2010CollectionReader.DCT_VIEW_NAME,
206        this.documentCreationTimeView);
207    this.timexExtentWriter = this.createWriter(
208        TempEval2010CollectionReader.TIMEX_EXTENTS_VIEW_NAME,
209        this.timeExtentView);
210    this.timexAttributeWriter = this.createWriter(
211        TempEval2010CollectionReader.TIMEX_ATTRIBUTES_VIEW_NAME,
212        this.timeAttributeView);
213    this.eventExtentWriter = this.createWriter(
214        TempEval2010CollectionReader.EVENT_EXTENTS_VIEW_NAME,
215        this.eventExtentView);
216    this.eventAttributeWriter = this.createWriter(
217        TempEval2010CollectionReader.EVENT_ATTRIBUTES_VIEW_NAME,
218        this.eventAttributeView);
219    this.tlinkDCTEventWriter = this.createWriter(
220        TempEval2010CollectionReader.TLINK_DCT_EVENT_VIEW_NAME,
221        this.temporalLinkEventToDocumentCreationTimeView);
222    this.tlinkTimexEventWriter = this.createWriter(
223        TempEval2010CollectionReader.TLINK_TIMEX_EVENT_VIEW_NAME,
224        this.temporalLinkEventToSameSentenceTimeView);
225    this.tlinkSubordinatedEventsWriter = this.createWriter(
226        TempEval2010CollectionReader.TLINK_SUBORDINATED_EVENTS_VIEW_NAME,
227        this.temporalLinkEventToSubordinatedEventView);
228    this.tlinkMainEventsWriter = this.createWriter(
229        TempEval2010CollectionReader.TLINK_MAIN_EVENTS_VIEW_NAME,
230        this.temporalLinkMainEventToNextSentenceMainEventView);
231  }
232
233  @Override
234  public void process(JCas jCas) throws AnalysisEngineProcessException {
235    // determine the filename
236    URI uri = ViewUriUtil.getURI(jCas);
237    String fileName = uri.getFragment();
238    if (fileName == null) {
239      fileName = new File(uri.getPath()).getName();
240    }
241
242    // get the view with text, sentences and tokens
243    JCas textJCas = JCasUtil.getView(jCas, this.textView, false);
244
245    // write the document creation time
246    if (this.documentCreationTimeView != null) {
247      JCas dctJCas = JCasUtil.getView(jCas, this.documentCreationTimeView, false);
248      for (DocumentCreationTime time : JCasUtil.select(dctJCas, DocumentCreationTime.class)) {
249        this.write(this.dctWriter, fileName, time.getValue().replaceAll("-", ""));
250      }
251    }
252
253    // align tokens to times
254    Map<Token, Time> tokenTimeExtents = new HashMap<Token, Time>();
255    if (this.timeExtentView != null) {
256      JCas timeExtentJCas = JCasUtil.getView(jCas, this.timeExtentView, false);
257      for (Time time : JCasUtil.select(timeExtentJCas, Time.class)) {
258        for (Token token : JCasUtil.selectCovered(textJCas, Token.class, time)) {
259          tokenTimeExtents.put(token, time);
260        }
261      }
262    }
263    Map<Token, Time> tokenTimeAttributes = new HashMap<Token, Time>();
264    if (this.timeAttributeView != null) {
265      JCas timeAttributeJCas = JCasUtil.getView(jCas, this.timeAttributeView, false);
266      for (Time time : JCasUtil.select(timeAttributeJCas, Time.class)) {
267        for (Token token : JCasUtil.selectCovered(textJCas, Token.class, time)) {
268          tokenTimeAttributes.put(token, time);
269        }
270      }
271    }
272
273    // align tokens to events
274    Map<Token, Event> tokenEventExtents = new HashMap<Token, Event>();
275    if (this.eventExtentView != null) {
276      JCas eventExtentJCas = JCasUtil.getView(jCas, this.eventExtentView, false);
277      for (Event event : JCasUtil.select(eventExtentJCas, Event.class)) {
278        for (Token token : JCasUtil.selectCovered(textJCas, Token.class, event)) {
279          tokenEventExtents.put(token, event);
280        }
281      }
282    }
283
284    Map<Token, Event> tokenEventAttributes = new HashMap<Token, Event>();
285    if (this.eventAttributeView != null) {
286      JCas eventAttributeJCas = JCasUtil.getView(jCas, this.eventAttributeView, false);
287      for (Event event : JCasUtil.select(eventAttributeJCas, Event.class)) {
288        for (Token token : JCasUtil.selectCovered(textJCas, Token.class, event)) {
289          tokenEventAttributes.put(token, event);
290        }
291      }
292    }
293
294    // walk through tokens by sentence, writing tokens, times, events, etc.
295    int sentIndex = -1;
296    for (Sentence sentence : JCasUtil.select(textJCas, Sentence.class)) {
297      sentIndex += 1;
298      int tokenIndex = -1;
299      for (Token token : JCasUtil.selectCovered(textJCas, Token.class, sentence)) {
300        tokenIndex += 1;
301
302        // write the token to the segmentation file
303        this.write(this.baseWriter, fileName, sentIndex, tokenIndex, token.getCoveredText());
304
305        // write the time extent and attributes
306        this.writeAnchors(
307            this.timexExtentWriter,
308            this.timexAttributeWriter,
309            tokenTimeExtents,
310            tokenTimeAttributes,
311            "timex3",
312            token,
313            fileName,
314            sentIndex,
315            tokenIndex,
316            new AttributeGetter<Time>() {
317              @Override
318              public List<Attribute> getAttributes(Time time) {
319                Attribute value = new Attribute("value", time.getValue());
320                Attribute type = new Attribute("type", time.getTimeType());
321                return Arrays.asList(value, type);
322              }
323            });
324
325        // write the event extent and attributes
326        this.writeAnchors(
327            this.eventExtentWriter,
328            this.eventAttributeWriter,
329            tokenEventExtents,
330            tokenEventAttributes,
331            "event",
332            token,
333            fileName,
334            sentIndex,
335            tokenIndex,
336            new AttributeGetter<Event>() {
337              @Override
338              public List<Attribute> getAttributes(Event event) {
339                Attribute polarity = new Attribute("polarity", event.getPolarity());
340                Attribute modality = new Attribute("modality", event.getModality());
341                Attribute pos = new Attribute("pos", event.getPos());
342                Attribute tense = new Attribute("tense", event.getTense());
343                Attribute aspect = new Attribute("aspect", event.getAspect());
344                Attribute eventClass = new Attribute("class", event.getEventClass());
345                return Arrays.asList(polarity, modality, pos, tense, aspect, eventClass);
346              }
347            });
348      }
349    }
350
351    // write the temporal links
352    this.writeTemporalLinks(
353        this.tlinkDCTEventWriter,
354        jCas,
355        this.temporalLinkEventToDocumentCreationTimeView,
356        fileName);
357    this.writeTemporalLinks(
358        this.tlinkTimexEventWriter,
359        jCas,
360        this.temporalLinkEventToSameSentenceTimeView,
361        fileName);
362    this.writeTemporalLinks(
363        this.tlinkSubordinatedEventsWriter,
364        jCas,
365        this.temporalLinkEventToSubordinatedEventView,
366        fileName);
367    this.writeTemporalLinks(
368        this.tlinkMainEventsWriter,
369        jCas,
370        this.temporalLinkMainEventToNextSentenceMainEventView,
371        fileName);
372  }
373
374  @Override
375  public void batchProcessComplete() throws AnalysisEngineProcessException {
376    super.batchProcessComplete();
377    for (PrintWriter writer : this.writers) {
378      writer.flush();
379    }
380  }
381
382  @Override
383  public void collectionProcessComplete() throws AnalysisEngineProcessException {
384    super.collectionProcessComplete();
385    for (PrintWriter writer : this.writers) {
386      writer.close();
387    }
388  }
389
390  private PrintWriter createWriter(String tabFileName, String viewParam)
391      throws ResourceInitializationException {
392    PrintWriter writer;
393    if (viewParam != null) {
394      try {
395        writer = new PrintWriter(new FileWriter(new File(this.outputDirectory, tabFileName)));
396      } catch (IOException e) {
397        throw new ResourceInitializationException(e);
398      }
399    } else {
400      writer = new PrintWriter(new OutputStream() {
401        @Override
402        public void write(int b) throws IOException {
403          // do nothing
404        }
405      });
406    }
407    this.writers.add(writer);
408    return writer;
409  }
410
411  private void write(PrintWriter writer, Object... columns) {
412    writer.println(Joiner.on('\t').join(columns));
413  }
414
415  private static class Attribute {
416    public String name;
417
418    public Object value;
419
420    public Attribute(String name, Object value) {
421      this.name = name;
422      this.value = value;
423    }
424  }
425
426  private static interface AttributeGetter<T extends Anchor> {
427    public List<Attribute> getAttributes(T anchor);
428  }
429
430  private <T extends Anchor> void writeAnchors(
431      PrintWriter extentWriter,
432      PrintWriter attrWriter,
433      Map<Token, T> tokenAnchorExtents,
434      Map<Token, T> tokenAnchorAttributes,
435      String anchorType,
436      Token token,
437      String fileName,
438      int sentIndex,
439      int tokenIndex,
440      AttributeGetter<T> attributeGetter) {
441    T anchor = tokenAnchorExtents.get(token);
442    if (anchor != null) {
443      String id = anchor.getId();
444      this.write(extentWriter, fileName, sentIndex, tokenIndex, anchorType, id, "1");
445    }
446    anchor = tokenAnchorAttributes.get(token);
447    if (anchor != null) {
448      String id = anchor.getId();
449      boolean isFirstToken = token.getBegin() == anchor.getBegin();
450      if (isFirstToken) {
451        for (Attribute attr : attributeGetter.getAttributes(anchor)) {
452          if (attr.value != null) {
453            this.write(
454                attrWriter,
455                fileName,
456                sentIndex,
457                tokenIndex,
458                anchorType,
459                id,
460                "1",
461                attr.name,
462                attr.value);
463          }
464        }
465      }
466    }
467  }
468
469  private void writeTemporalLinks(PrintWriter writer, JCas jCas, String viewName, String fileName) {
470    if (viewName != null) {
471      JCas view = JCasUtil.getView(jCas, viewName, false);
472      for (TemporalLink tlink : JCasUtil.select(view, TemporalLink.class)) {
473        String relation = tlink.getRelationType();
474        if (relation == null) {
475          relation = "NONE";
476        }
477        this.write(writer, fileName, tlink.getSource().getId(), tlink.getTarget().getId(), relation);
478      }
479    }
480  }
481}