package edu.stanford.nlp.ie.machinereading.domains.roth;

import edu.stanford.nlp.ie.machinereading.GenericDataSetReader;
import edu.stanford.nlp.ie.machinereading.structure.AnnotationUtils;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.parser.ner.NERecogniser;
import org.opensaml.saml.saml2.core.IDPEntry;

/* loaded from: input_file:BOOT-INF/lib/stanford-corenlp-4.0.0.jar:edu/stanford/nlp/ie/machinereading/domains/roth/RothCONLL04Reader.class */
public class RothCONLL04Reader extends GenericDataSetReader {
    private boolean warnedNER;

    public RothCONLL04Reader() {
        super(null, true, true, true);
        this.logger = Logger.getLogger(RothCONLL04Reader.class.getName());
        this.logger.setLevel(Level.SEVERE);
    }

    @Override // edu.stanford.nlp.ie.machinereading.GenericDataSetReader
    public Annotation read(String str) throws IOException {
        Annotation annotation = new Annotation("");
        this.logger.info("Reading file: " + str);
        Iterator<String> it = IOUtils.readLines(str).iterator();
        while (it.hasNext()) {
            AnnotationUtils.addSentence(annotation, readSentence(str, it));
        }
        return annotation;
    }

    private String getNormalizedNERTag(String str) {
        if (str.equalsIgnoreCase("O")) {
            return "O";
        }
        if (str.equalsIgnoreCase("Peop")) {
            return "PERSON";
        }
        if (str.equalsIgnoreCase(IDPEntry.LOC_ATTRIB_NAME)) {
            return NERecogniser.LOCATION;
        }
        if (str.equalsIgnoreCase("Org")) {
            return "ORGANIZATION";
        }
        if (str.equalsIgnoreCase("Other")) {
            return "OTHER";
        }
        if (!this.warnedNER) {
            this.warnedNER = true;
            this.logger.warning("This file contains NER tags not in the original Roth/Yih dataset, e.g.: " + str);
        }
        throw new RuntimeException("Cannot normalize ner tag " + str);
    }

    private Annotation readSentence(String str, Iterator<String> it) {
        Annotation annotation = new Annotation("");
        annotation.set(CoreAnnotations.DocIDAnnotation.class, str);
        annotation.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList());
        StringBuilder sb = new StringBuilder();
        int i = 0;
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        String str2 = null;
        HashMap hashMap = new HashMap();
        while (it.hasNext() && i2 < 2) {
            List<String> split = StringUtils.split(it.next().replace("COMMA", ","));
            switch (split.size()) {
                case 1:
                    i2++;
                    break;
                case 3:
                    String str3 = split.get(2);
                    ArrayList arrayList2 = new ArrayList();
                    EntityMention entityMention = (EntityMention) hashMap.get(split.get(0));
                    EntityMention entityMention2 = (EntityMention) hashMap.get(split.get(1));
                    arrayList2.add(entityMention);
                    arrayList2.add(entityMention2);
                    AnnotationUtils.addRelationMention(annotation, new RelationMention(RelationMention.makeUniqueId(), annotation, new Span(entityMention.getExtentTokenStart(), entityMention2.getExtentTokenEnd()), str3, (String) null, arrayList2));
                    break;
                case 9:
                    List<String> split2 = StringUtils.split(split.get(5), "/");
                    String join = StringUtils.join(split2, " ");
                    String str4 = "entity" + split.get(0) + '-' + split.get(2);
                    String normalizedNERTag = getNormalizedNERTag(split.get(1));
                    if (str2 == null) {
                        str2 = split.get(0);
                    }
                    if (!normalizedNERTag.equals("O")) {
                        Span span = new Span(i, i + split2.size());
                        EntityMention entityMention3 = new EntityMention(str4, annotation, span, span, normalizedNERTag, null, null);
                        AnnotationUtils.addEntityMention(annotation, entityMention3);
                        hashMap.put(split.get(2), entityMention3);
                    }
                    for (String str5 : split2) {
                        CoreLabel coreLabel = new CoreLabel();
                        coreLabel.setWord(str5);
                        coreLabel.set(CoreAnnotations.TextAnnotation.class, str5);
                        coreLabel.set(CoreAnnotations.ValueAnnotation.class, str5);
                        arrayList.add(coreLabel);
                    }
                    sb.append(join);
                    sb.append(' ');
                    i += split2.size();
                    break;
            }
        }
        annotation.set(CoreAnnotations.TextAnnotation.class, sb.toString());
        annotation.set(CoreAnnotations.ValueAnnotation.class, sb.toString());
        annotation.set(CoreAnnotations.TokensAnnotation.class, arrayList);
        annotation.set(CoreAnnotations.SentenceIDAnnotation.class, str2);
        return annotation;
    }

    private static <X> int getIndexByObjectEquality(List<X> list, X x) {
        int size = list.size();
        for (int i = 0; i < size; i++) {
            if (list.get(i) == x) {
                return i;
            }
        }
        return -1;
    }

    private void setHeadWord(EntityMention entityMention, Tree tree) {
        List leaves = tree.getLeaves();
        int indexByObjectEquality = getIndexByObjectEquality(leaves, tree.joinNode((Tree) leaves.get(entityMention.getExtentTokenStart()), (Tree) leaves.get(entityMention.getExtentTokenEnd())).headTerminal(this.headFinder));
        if (StringUtils.isPunct(((Tree) leaves.get(entityMention.getExtentTokenEnd())).label().value().trim()) && (indexByObjectEquality >= entityMention.getExtentTokenEnd() || indexByObjectEquality < entityMention.getExtentTokenStart())) {
            indexByObjectEquality = getIndexByObjectEquality(leaves, tree.joinNode((Tree) leaves.get(entityMention.getExtentTokenStart()), (Tree) leaves.get(entityMention.getExtentTokenEnd() - 1)).headTerminal(this.headFinder));
            if (indexByObjectEquality >= entityMention.getExtentTokenStart() && indexByObjectEquality <= entityMention.getExtentTokenEnd() - 1) {
                entityMention.setHeadTokenPosition(indexByObjectEquality);
                entityMention.setHeadTokenSpan(new Span(indexByObjectEquality, indexByObjectEquality + 1));
            }
        }
        if (indexByObjectEquality >= entityMention.getExtentTokenStart() && indexByObjectEquality <= entityMention.getExtentTokenEnd()) {
            entityMention.setHeadTokenPosition(indexByObjectEquality);
            entityMention.setHeadTokenSpan(new Span(indexByObjectEquality, indexByObjectEquality + 1));
            return;
        }
        ArrayList arrayList = new ArrayList();
        for (int extentTokenStart = entityMention.getExtentTokenStart(); extentTokenStart <= entityMention.getExtentTokenEnd(); extentTokenStart++) {
            arrayList.add(((Tree) leaves.get(extentTokenStart)).label().value());
        }
        if (StringUtils.isPunct(arrayList.get(arrayList.size() - 1))) {
            arrayList.remove(arrayList.size() - 1);
        }
        Tree parseStrings = parseStrings(arrayList);
        int indexByObjectEquality2 = getIndexByObjectEquality(parseStrings.getLeaves(), parseStrings.headTerminal(this.headFinder)) + entityMention.getExtentTokenStart();
        entityMention.setHeadTokenPosition(indexByObjectEquality2);
        entityMention.setHeadTokenSpan(new Span(indexByObjectEquality2, indexByObjectEquality2 + 1));
    }

    public static void main(String[] strArr) throws Exception {
        Properties argsToProperties = StringUtils.argsToProperties(strArr);
        RothCONLL04Reader rothCONLL04Reader = new RothCONLL04Reader();
        rothCONLL04Reader.setLoggerLevel(Level.INFO);
        rothCONLL04Reader.setProcessor(new StanfordCoreNLP(argsToProperties));
        System.out.println(AnnotationUtils.datasetToString(rothCONLL04Reader.parse("/u/nlp/data/RothCONLL04/conll04.corp")));
    }
}
